
fi.evolver.utils.CharsetUtils Maven / Gradle / Ivy
package fi.evolver.utils;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CharsetUtils {
private static final Logger LOG = LoggerFactory.getLogger(CharsetUtils.class);
private static final List UTF_BYTE_ORDER_MARKS;
static {
List utfByteOrderMarks = new ArrayList<>();
utfByteOrderMarks.add("\uEFBBBF");
utfByteOrderMarks.add("\uFEFF");
utfByteOrderMarks.add("\uFFFE");
utfByteOrderMarks.add("\uFFFE0000");
utfByteOrderMarks.add("\u0000FEFF");
UTF_BYTE_ORDER_MARKS = Collections.unmodifiableList(utfByteOrderMarks);
}
private CharsetUtils() { }
public static Charset parse(String name, Charset defaultValue) {
if (name == null)
return defaultValue;
Charset charset = defaultValue;
try {
charset = Charset.forName(name);
}
catch (RuntimeException e) {
LOG.warn("Invalid charset {}, defaulting to {}", name, defaultValue);
}
return charset;
}
public static String removeUtfBom(String text) {
if (text == null)
return null;
return text.substring(getUtfBom(text).length());
}
public static void skipUtfBom(Reader reader) throws IOException {
if (!reader.markSupported()) {
LOG.warn("Trying to skip UTF BOM for reader without mark support: not checking for BOM");
return;
}
reader.mark(2);
char[] buffer = new char[2];
reader.read(buffer);
reader.reset();
String bom = getUtfBom(new String(buffer));
reader.skip(bom.length());
}
private static String getUtfBom(String text) {
for (String bom : UTF_BYTE_ORDER_MARKS) {
if (text.startsWith(bom))
return bom;
}
return "";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy