All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.jawr.web.util.bom.UnicodeBOMReader Maven / Gradle / Ivy

Go to download

Javascript/CSS bundling and compressing tool for java web apps. By using jawr resources are automatically bundled together and optionally minified and gzipped. Jawr provides tag libraries to reference a generated bundle either by id or by using the name of any of its members.

The newest version!
/**
 * Copyright 2012-2016 Ibrahim Chaehoi
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * 	http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package net.jawr.web.util.bom;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.security.InvalidParameterException;

/**
 * The UnicodeBOMInputReader class wraps any Reader
 * and detects the presence of any Unicode BOM (Byte Order Mark) at its
 * beginning, as defined by RFC
 * 3629 - UTF-8, a transformation format of ISO 10646
 * 
 * 

* The Unicode FAQ * defines 5 types of BOMs: *

    *
  • * *
     * 00 00 FE FF  = UTF-32, big-endian
     * 
    * *
  • *
  • * *
     * FF FE 00 00  = UTF-32, little-endian
     * 
    * *
  • *
  • * *
     * FE FF        = UTF-16, big-endian
     * 
    * *
  • *
  • * *
     * FF FE        = UTF-16, little-endian
     * 
    * *
  • *
  • * *
     * EF BB BF     = UTF-8
     * 
    * *
  • *
*

* *

* Use the {@link #getBOM()} method to know whether a BOM has been detected or * not. *

*

* Use the {@link #skipBOM()} method to remove the detected BOM from the wrapped * Reader object. *

* * @author Ibrahim CHAEHOI inspired from UnicodeBOMInputStream from Gregory * Pakosz */ public class UnicodeBOMReader extends Reader { private final PushbackReader in; private final Charset charset; private final BOM bom; private boolean skipped = false; /** * Constructs a new UnicodeBOMInputStream that wraps the * specified InputStream. * * @param reader * a reader. * * @param strCharset * a charset. * * @throws IOException * on reading from the specified InputStream when * trying to detect the Unicode BOM. */ public UnicodeBOMReader(final Reader reader, final String strCharset) throws IOException { this(reader, Charset.forName(strCharset)); } /** * Constructs a new UnicodeBOMInputStream that wraps the * specified InputStream. * * @param reader * a reader. * * @param pCharset * a charset. * * @throws IOException * on reading from the specified InputStream when * trying to detect the Unicode BOM. */ public UnicodeBOMReader(final Reader reader, final Charset pCharset) throws IOException { if (reader == null) throw new InvalidParameterException("invalid reader: null is not allowed"); if (pCharset == null) throw new InvalidParameterException("invalid charset: null is not allowed"); in = new PushbackReader(reader, 4); charset = pCharset; final char[] chBom = new char[4]; final int read = in.read(chBom); CharBuffer cbuf = CharBuffer.wrap(chBom); ByteBuffer bbuf = charset.encode(cbuf); final byte[] bom = bbuf.array(); switch (read) { case 4: if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { this.bom = BOM.UTF_32_LE; break; } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { this.bom = BOM.UTF_32_BE; break; } case 3: if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { this.bom = BOM.UTF_8; break; } case 2: if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { this.bom = BOM.UTF_16_LE; break; } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { this.bom = BOM.UTF_16_BE; break; } default: this.bom = BOM.NONE; break; } if (read > 0) in.unread(chBom, 0, read); } /** * Returns the BOM that was detected in the wrapped * InputStream object. * * @return a BOM value. */ public final BOM getBOM() { // BOM type is immutable. return bom; } /** * Skips the BOM that was found in the wrapped * InputStream object. * * @return this UnicodeBOMInputStream. * * @throws IOException * when trying to skip the BOM from the wrapped * InputStream object. */ public final synchronized UnicodeBOMReader skipBOM() throws IOException { if (!skipped) { ByteBuffer bbuf = ByteBuffer.wrap(bom.getBytes()); CharBuffer cbuf = charset.decode(bbuf); char[] bom = cbuf.array(); int length = 0; for (int i = 0; i < bom.length; i++) { if (bom[i] == 0) { break; } length++; } in.skip(length); skipped = true; } return this; } /** * Returns true if a BOM has been detected * * @return true if a BOM has been detected */ public boolean hasBOM() { return !bom.equals(BOM.NONE); } @Override public int read(char[] cbuf, int off, int len) throws IOException { return in.read(cbuf, off, len); } @Override public int read(CharBuffer target) throws IOException { return in.read(target); } @Override public int read() throws IOException { return in.read(); } @Override public int read(char[] cbuf) throws IOException { return in.read(cbuf); } @Override public long skip(long n) throws IOException { return in.skip(n); } @Override public boolean ready() throws IOException { return in.ready(); } @Override public boolean markSupported() { return in.markSupported(); } @Override public void mark(int readAheadLimit) throws IOException { in.mark(readAheadLimit); } @Override public void reset() throws IOException { in.reset(); } @Override public void close() throws IOException { in.close(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy