liquibase.resource.UtfBomAwareReader Maven / Gradle / Ivy
package liquibase.resource;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
/**
* Reader that tries to identify the encoding by looking at the BOM. If no BOM
* is found it defaults to the encoding givent at initialization. Original ideas
* from Thomas Weidenfeller, Aki Nieminen.
*
* @author Dominique Broeglin
*/
public class UtfBomAwareReader extends Reader {
private static final byte _0xBF = (byte) 0xBF;
private static final byte _0x00 = (byte) 0x00;
private static final byte _0xBB = (byte) 0xBB;
private static final byte _0xFF = (byte) 0xFF;
private static final byte _0xFE = (byte) 0xFE;
private static final byte _0xEF = (byte) 0xEF;
private PushbackInputStream pis;
private InputStreamReader is = null;
private String defaultCharsetName;
public UtfBomAwareReader(InputStream in) {
pis = new PushbackInputStream(in, 4);
this.defaultCharsetName = "UTF-8";
}
public UtfBomAwareReader(InputStream in, String defaultCharsetName) {
pis = new PushbackInputStream(in, 4);
if (defaultCharsetName == null)
throw new NullPointerException("defaultCharsetName");
this.defaultCharsetName = defaultCharsetName;
}
public String getDefaultEncoding() {
return defaultCharsetName;
}
public String getEncoding() {
if (is == null) {
try {
init();
} catch (IOException e) {
throw new IllegalStateException("Unable to determine encoding",
e);
}
}
return is.getEncoding();
}
/**
* Read up to 4 bytes to determine the BOM. Extra bytes, of if no BOM is
* found are pushed back to the input stream. If no BOM is found, the
* defaultCharsetName is used to initialize the reader.
*/
protected void init() throws IOException {
String charsetName;
byte bom[] = new byte[4];
int n, unread;
n = pis.read(bom, 0, bom.length);
if (bom[0] == _0xEF && bom[1] == _0xBB && bom[2] == _0xBF) {
charsetName = "UTF-8";
unread = n - 3;
} else if (bom[0] == _0xFE && bom[1] == _0xFF) {
charsetName = "UTF-16BE";
unread = n - 2;
} else if (bom[0] == _0xFF && bom[1] == _0xFE) {
if (n == 4 && bom[2] == _0x00 && bom[3] == _0x00) {
charsetName = "UTF-32LE";
unread = 0;
} else {
charsetName = "UTF-16LE";
unread = n - 2;
}
} else if (bom[0] == _0x00 && bom[1] == _0x00 && bom[2] == _0xFE
&& bom[3] == _0xFF) {
charsetName = "UTF-32BE";
unread = 4;
} else {
charsetName = defaultCharsetName;
unread = n;
}
if (unread > 0) {
pis.unread(bom, (n - unread), unread);
} else if (unread < -1) {
pis.unread(bom, 0, 0);
}
if (charsetName == null) {
is = new InputStreamReader(pis);
} else {
is = new InputStreamReader(pis, charsetName);
}
}
@Override
public void close() throws IOException {
if (is == null) {
init();
}
is.close();
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
if (is == null) {
init();
}
return is.read(cbuf, off, len);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy