org.jwat.common.ISO8859_1 Maven / Gradle / Ivy
/**
* Java Web Archive Toolkit - Software to read and validate ARC, WARC
* and GZip files. (http://jwat.org/)
* Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jwat.common;
import java.io.ByteArrayOutputStream;
/**
* Small class to decode and encode ISO-8859-1 strings while also validating
* them. Invalid characters are removed in the conversion.
* Non static because not all information can be returned by the methods.
*
* @author nicl
*/
public class ISO8859_1 {
/** Array of valid bytes according to the ISO-8859-1 specification. */
public static final byte[] validBytes = new byte[256];
/** Populate array of valid ISO-8859-1 bytes. */
static {
int i;
for (i=0; i<32; ++i) {
validBytes[i] = 0;
}
for (i=32; i<127; ++i) {
validBytes[i] = (byte)i;
}
validBytes[127] = 0;
for (i=128; i<256; ++i) {
validBytes[i] = (byte)i;
}
}
/**
* Construct an instance that can be used to convert and validate.
*/
public ISO8859_1() {
}
/** Encoded string after call to encode method.
* Invalid chars will have been removed if the method returns false. */
public byte[] encoded;
/**
* Converts a string to a byte array removing invalid characters in the
* process and returning the validity status. The converted byte array
* is accessible through a separate field.
* @param inStr string to be converted and validated
* @param exceptions invalid characters which are allowed
* @return validity status of string to byte array conversion
*/
public boolean encode(String inStr, String exceptions) {
boolean valid = true;
StringBuffer sb = new StringBuffer();
ByteArrayOutputStream out = new ByteArrayOutputStream();
char c;
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy