org.apache.abdera.i18n.text.io.CharsetSniffingInputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.text.io;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Will attempt to autodetect the character encoding from the stream By default, this will preserve the BOM if it exists
*
* @deprecated This API is deprecated as Apache Abdera is a retired project since 2017.
*/
@Deprecated(since = "2021-07-29")
public class CharsetSniffingInputStream extends FilterInputStream {
public static enum Encoding {
UTF32be("UTF-32", true, new byte[] { 0x00, 0x00, 0xFFFFFFFE, 0xFFFFFFFF }),
UTF32le("UTF-32", true, new byte[] { 0xFFFFFFFF, 0xFFFFFFFE, 0x00, 0x00 }),
INVALID(null, true, new byte[] { 0xFFFFFFFE, 0xFFFFFFFF, 0x00, 0x00 }, new byte[] { 0x00, 0x00, 0xFFFFFFFF, 0xFFFFFFFE }),
UTF16be("UTF-16", true, new byte[] { 0xFFFFFFFE, 0xFFFFFFFF }),
UTF16le("UTF-16", true, new byte[] { 0xFFFFFFFF, 0xFFFFFFFE }),
UTF8("UTF-8", true, new byte[] { 0xFFFFFFEF, 0xFFFFFFBB, 0xFFFFFFBF }),
UTF32be2("UTF-32be", false, new byte[] { 0x00, 0x00, 0x00, 0x3C }),
UTF32le2("UTF-32le", false, new byte[] { 0x3C, 0x00, 0x00, 0x00 }),
UTF16be2("UTF-16be", false, new byte[] { 0x00, 0x3C, 0x00, 0x3F }),
UTF16le2("UTF-16le", false, new byte[] { 0x3C, 0x00, 0x3F, 0x00 });
private final String enc;
private final byte[][] checks;
private final boolean bom;
Encoding(String name, boolean bom, byte[]... checks) {
this.enc = name;
this.checks = checks;
this.bom = bom;
}
public String getEncoding() {
return enc;
}
public boolean getBom() {
return bom;
}
public int equals(byte[] bom) {
for (byte[] check : checks) {
if (CharsetSniffingInputStream.equals(bom, check.length, check))
return check.length;
}
return 0;
}
}
protected String encoding;
protected boolean bomset = false;
protected final boolean preserve;
public CharsetSniffingInputStream(InputStream in) {
this(in, true);
}
public CharsetSniffingInputStream(InputStream in, boolean preserveBom) {
super(!(in instanceof PeekAheadInputStream) ? new PeekAheadInputStream(in, 4) : in);
this.preserve = preserveBom;
try {
encoding = detectEncoding();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public boolean isBomSet() {
return bomset;
}
public String getEncoding() {
return encoding;
}
protected PeekAheadInputStream getInternal() {
return (PeekAheadInputStream) in;
}
private static boolean equals(byte[] a1, int len, byte[] a2) {
for (int n = 0, i = 0; n < len; n++, i++) {
if (a1[n] != a2[i])
return false;
}
return true;
}
protected String detectEncoding() throws IOException {
PeekAheadInputStream pin = (PeekAheadInputStream) this.in;
byte[] bom = new byte[4];
pin.peek(bom);
bomset = false;
for (Encoding enc : Encoding.values()) {
int bomlen = enc.equals(bom);
if (bomlen > 0) {
bomset = enc.getBom();
if (// consume the bom
bomset && !preserve)
pin.read(new byte[bomlen]);
return enc.getEncoding();
}
}
return null;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy