jetbrick.io.stream.UnicodeInputStream Maven / Gradle / Ivy
/**
* Copyright 2013-2016 Guoqiang Chen, Shanghai, China. All rights reserved.
*
* Author: Guoqiang Chen
* Email: [email protected]
* WebURL: https://github.com/subchen
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package jetbrick.io.stream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
public class UnicodeInputStream extends InputStream {
public static final int MAX_BOM_SIZE = 4;
private PushbackInputStream internalInputStream;
private boolean initialized;
private int bomSize = -1;
private String encoding;
private String targetEncoding;
public static final byte[] BOM_UTF8 = { -17, -69, -65 };
public static final byte[] BOM_UTF16_BE = { -2, -1 };
public static final byte[] BOM_UTF16_LE = { -1, -2 };
public static final byte[] BOM_UTF32_BE = { 0, 0, -2, -1 };
public static final byte[] BOM_UTF32_LE = { -1, -2, 0, 0 };
public UnicodeInputStream(InputStream in, String targetEncoding) {
this.internalInputStream = new PushbackInputStream(in, 4);
this.targetEncoding = targetEncoding;
}
public String getDetectedEncoding() {
if (!initialized) {
try {
init();
} catch (IOException ioex) {
throw new IllegalStateException(ioex);
}
}
return encoding;
}
protected void init() throws IOException {
if (initialized) {
return;
}
if (targetEncoding == null) {
byte[] bom = new byte[4];
int n = internalInputStream.read(bom, 0, bom.length);
int unread;
if ((bom[0] == BOM_UTF32_BE[0]) && (bom[1] == BOM_UTF32_BE[1]) && (bom[2] == BOM_UTF32_BE[2]) && (bom[3] == BOM_UTF32_BE[3])) {
encoding = "UTF-32BE";
unread = n - 4;
} else {
if ((bom[0] == BOM_UTF32_LE[0]) && (bom[1] == BOM_UTF32_LE[1]) && (bom[2] == BOM_UTF32_LE[2]) && (bom[3] == BOM_UTF32_LE[3])) {
encoding = "UTF-32LE";
unread = n - 4;
} else {
if ((bom[0] == BOM_UTF8[0]) && (bom[1] == BOM_UTF8[1]) && (bom[2] == BOM_UTF8[2])) {
encoding = "UTF-8";
unread = n - 3;
} else {
if ((bom[0] == BOM_UTF16_BE[0]) && (bom[1] == BOM_UTF16_BE[1])) {
encoding = "UTF-16BE";
unread = n - 2;
} else {
if ((bom[0] == BOM_UTF16_LE[0]) && (bom[1] == BOM_UTF16_LE[1])) {
encoding = "UTF-16LE";
unread = n - 2;
} else {
unread = n;
}
}
}
}
}
bomSize = (4 - unread);
if (unread > 0) {
internalInputStream.unread(bom, n - unread, unread);
}
} else {
byte[] bom = null;
if (targetEncoding.equals("UTF-8"))
bom = BOM_UTF8;
else if (targetEncoding.equals("UTF-16LE"))
bom = BOM_UTF16_LE;
else if ((targetEncoding.equals("UTF-16BE")) || (targetEncoding.equals("UTF-16")))
bom = BOM_UTF16_BE;
else if (targetEncoding.equals("UTF-32LE"))
bom = BOM_UTF32_LE;
else if ((targetEncoding.equals("UTF-32BE")) || (targetEncoding.equals("UTF-32"))) {
bom = BOM_UTF32_BE;
}
if (bom != null) {
byte[] fileBom = new byte[bom.length];
int n = internalInputStream.read(fileBom, 0, bom.length);
boolean bomDetected = true;
for (int i = 0; i < n; i++) {
if (fileBom[i] != bom[i]) {
bomDetected = false;
break;
}
}
if (!bomDetected) {
internalInputStream.unread(fileBom, 0, fileBom.length);
bomSize = 0;
} else {
bomSize = bom.length;
}
}
}
initialized = true;
}
@Override
public void close() throws IOException {
internalInputStream.close();
}
@Override
public int read() throws IOException {
init();
return internalInputStream.read();
}
public int getBOMSize() {
return bomSize;
}
}