net.jawr.web.util.bom.UnicodeBOMReader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jawr-core Show documentation
Javascript/CSS bundling and compressing tool for java web apps. By using jawr resources are automatically bundled together and optionally minified and gzipped. Jawr provides tag libraries to reference a generated bundle either by id or by using the name of any of its members.
The newest version!
/**
 * Copyright 2012-2016 Ibrahim Chaehoi
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * 	http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package net.jawr.web.util.bom;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.security.InvalidParameterException;

/**
 * The UnicodeBOMInputReader class wraps any Reader
 * and detects the presence of any Unicode BOM (Byte Order Mark) at its
 * beginning, as defined by RFC
 * 3629 - UTF-8, a transformation format of ISO 10646
 * 
 * 
 * The Unicode FAQ
 * defines 5 types of BOMs:
 * 

 * 
 * 
 *  * 00 00 FE FF  = UTF-32, big-endian
 * 
 * 
 * 
 * 
 * 
 *  * FF FE 00 00  = UTF-32, little-endian
 * 
 * 
 * 
 * 
 * 
 *  * FE FF        = UTF-16, big-endian
 * 
 * 
 * 
 * 
 * 
 *  * FF FE        = UTF-16, little-endian
 * 
 * 
 * 
 * 
 * 
 *  * EF BB BF     = UTF-8
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * Use the {@link #getBOM()} method to know whether a BOM has been detected or
 * not.
 * 
 * 
 * Use the {@link #skipBOM()} method to remove the detected BOM from the wrapped
 * Reader object.
 * 
 * 
 * @author Ibrahim CHAEHOI inspired from UnicodeBOMInputStream from Gregory
 *         Pakosz
 */
public class UnicodeBOMReader extends Reader {

	private final PushbackReader in;
	private final Charset charset;
	private final BOM bom;
	private boolean skipped = false;

	/**
	 * Constructs a new UnicodeBOMInputStream that wraps the
	 * specified InputStream.
	 * 
	 * @param reader
	 *            a reader.
	 * 
	 * @param strCharset
	 *            a charset.
	 * 
	 * @throws IOException
	 *             on reading from the specified InputStream when
	 *             trying to detect the Unicode BOM.
	 */
	public UnicodeBOMReader(final Reader reader, final String strCharset) throws IOException

	{
		this(reader, Charset.forName(strCharset));
	}

	/**
	 * Constructs a new UnicodeBOMInputStream that wraps the
	 * specified InputStream.
	 * 
	 * @param reader
	 *            a reader.
	 * 
	 * @param pCharset
	 *            a charset.
	 * 
	 * @throws IOException
	 *             on reading from the specified InputStream when
	 *             trying to detect the Unicode BOM.
	 */
	public UnicodeBOMReader(final Reader reader, final Charset pCharset) throws IOException

	{
		if (reader == null)
			throw new InvalidParameterException("invalid reader: null is not allowed");

		if (pCharset == null)
			throw new InvalidParameterException("invalid charset: null is not allowed");

		in = new PushbackReader(reader, 4);
		charset = pCharset;

		final char[] chBom = new char[4];
		final int read = in.read(chBom);

		CharBuffer cbuf = CharBuffer.wrap(chBom);
		ByteBuffer bbuf = charset.encode(cbuf);
		final byte[] bom = bbuf.array();

		switch (read) {
		case 4:
			if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
					&& (bom[3] == (byte) 0x00)) {
				this.bom = BOM.UTF_32_LE;
				break;
			} else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE)
					&& (bom[3] == (byte) 0xFF)) {
				this.bom = BOM.UTF_32_BE;
				break;
			}

		case 3:
			if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
				this.bom = BOM.UTF_8;
				break;
			}

		case 2:
			if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
				this.bom = BOM.UTF_16_LE;
				break;
			} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
				this.bom = BOM.UTF_16_BE;
				break;
			}

		default:
			this.bom = BOM.NONE;
			break;
		}

		if (read > 0)
			in.unread(chBom, 0, read);
	}

	/**
	 * Returns the BOM that was detected in the wrapped
	 * InputStream object.
	 * 
	 * @return a BOM value.
	 */
	public final BOM getBOM() {
		// BOM type is immutable.
		return bom;
	}

	/**
	 * Skips the BOM that was found in the wrapped
	 * InputStream object.
	 * 
	 * @return this UnicodeBOMInputStream.
	 * 
	 * @throws IOException
	 *             when trying to skip the BOM from the wrapped
	 *             InputStream object.
	 */
	public final synchronized UnicodeBOMReader skipBOM() throws IOException {
		if (!skipped) {

			ByteBuffer bbuf = ByteBuffer.wrap(bom.getBytes());
			CharBuffer cbuf = charset.decode(bbuf);
			char[] bom = cbuf.array();
			int length = 0;
			for (int i = 0; i < bom.length; i++) {
				if (bom[i] == 0) {
					break;
				}
				length++;
			}
			in.skip(length);
			skipped = true;
		}
		return this;
	}

	/**
	 * Returns true if a BOM has been detected
	 * 
	 * @return true if a BOM has been detected
	 */
	public boolean hasBOM() {

		return !bom.equals(BOM.NONE);
	}

	@Override
	public int read(char[] cbuf, int off, int len) throws IOException {
		return in.read(cbuf, off, len);
	}

	@Override
	public int read(CharBuffer target) throws IOException {
		return in.read(target);
	}

	@Override
	public int read() throws IOException {

		return in.read();
	}

	@Override
	public int read(char[] cbuf) throws IOException {
		return in.read(cbuf);
	}

	@Override
	public long skip(long n) throws IOException {
		return in.skip(n);
	}

	@Override
	public boolean ready() throws IOException {
		return in.ready();
	}

	@Override
	public boolean markSupported() {
		return in.markSupported();
	}

	@Override
	public void mark(int readAheadLimit) throws IOException {
		in.mark(readAheadLimit);
	}

	@Override
	public void reset() throws IOException {
		in.reset();
	}

	@Override
	public void close() throws IOException {
		in.close();
	}

}