All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fi.evolver.utils.CharsetUtils Maven / Gradle / Ivy

There is a newer version: 3.5.0
Show newest version
package fi.evolver.utils;

import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class CharsetUtils {
	private static final Logger LOG = LoggerFactory.getLogger(CharsetUtils.class);

	private static final List UTF_BYTE_ORDER_MARKS;

	static {
		List utfByteOrderMarks = new ArrayList<>();
		utfByteOrderMarks.add("\uEFBBBF");
		utfByteOrderMarks.add("\uFEFF");
		utfByteOrderMarks.add("\uFFFE");
		utfByteOrderMarks.add("\uFFFE0000");
		utfByteOrderMarks.add("\u0000FEFF");
		UTF_BYTE_ORDER_MARKS = Collections.unmodifiableList(utfByteOrderMarks);
	}


	private CharsetUtils() { }


	public static Charset parse(String name, Charset defaultValue) {
		if (name == null)
			return defaultValue;

		Charset charset = defaultValue;
		try {
			charset = Charset.forName(name);
		}
		catch (RuntimeException e) {
			LOG.warn("Invalid charset {}, defaulting to {}", name, defaultValue);
		}
		return charset;
	}


	public static String removeUtfBom(String text) {
		if (text == null)
			return null;

		return text.substring(getUtfBom(text).length());
	}


	public static void skipUtfBom(Reader reader) throws IOException {
		if (!reader.markSupported()) {
			LOG.warn("Trying to skip UTF BOM for reader without mark support: not checking for BOM");
			return;
		}

		reader.mark(2);
		char[] buffer = new char[2];
		reader.read(buffer);
		reader.reset();
		String bom = getUtfBom(new String(buffer));
		reader.skip(bom.length());
	}


	private static String getUtfBom(String text) {
		for (String bom : UTF_BYTE_ORDER_MARKS) {
			if (text.startsWith(bom))
				return bom;
		}
		return "";
	}

}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy