All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.glassfish.json.UnicodeDetectingInputStream Maven / Gradle / Ivy

There is a newer version: 1.1.4
Show newest version
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 2012-2017 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://oss.oracle.com/licenses/CDDL+GPL-1.1
 * or LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */

package org.glassfish.json;

import javax.json.JsonException;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
 * A filter stream that detects the unicode encoding for the original
 * stream
 *
 * @author Jitendra Kotamraju
 */
class UnicodeDetectingInputStream extends FilterInputStream {

    private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
    private static final Charset UTF_32BE = Charset.forName("UTF-32BE");

    private static final byte FF = (byte)0xFF;
    private static final byte FE = (byte)0xFE;
    private static final byte EF = (byte)0xEF;
    private static final byte BB = (byte)0xBB;
    private static final byte BF = (byte)0xBF;
    private static final byte NUL = (byte)0x00;

    private final byte[] buf = new byte[4];
    private int bufLen;
    private int curIndex;
    private final Charset charset;

    UnicodeDetectingInputStream(InputStream is) {
        super(is);
        charset = detectEncoding();
    }

    Charset getCharset() {
        return charset;
    }

    private void fillBuf() {
        int b1;
        int b2;
        int b3;
        int b4;

        try {
            b1 = in.read();
            if (b1 == -1) {
                return;
            }

            b2 = in.read();
            if (b2 == -1) {
                bufLen = 1;
                buf[0] = (byte)b1;
                return;
            }

            b3 = in.read();
            if (b3 == -1) {
                bufLen = 2;
                buf[0] = (byte)b1;
                buf[1] = (byte)b2;
                return;
            }

            b4 = in.read();
            if (b4 == -1) {
                bufLen = 3;
                buf[0] = (byte)b1;
                buf[1] = (byte)b2;
                buf[2] = (byte)b3;
                return;
            }
            bufLen = 4;
            buf[0] = (byte)b1;
            buf[1] = (byte)b2;
            buf[2] = (byte)b3;
            buf[3] = (byte)b4;
        } catch (IOException ioe) {
            throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_IOERR(), ioe);
        }
    }

    private Charset detectEncoding() {
        fillBuf();
        if (bufLen < 2) {
            throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_FAILED());
        } else if (bufLen == 4) {
            // Use BOM to detect encoding
            if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
                curIndex = 4;
                return UTF_32BE;
            } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
                curIndex = 4;
                return UTF_32LE;
            } else if (buf[0] == FE && buf[1] == FF) {
                curIndex = 2;
                return StandardCharsets.UTF_16BE;
            } else if (buf[0] == FF && buf[1] == FE) {
                curIndex = 2;
                return StandardCharsets.UTF_16LE;
            } else if (buf[0] == EF && buf[1] == BB && buf[2] == BF) {
                curIndex = 3;
                return StandardCharsets.UTF_8;
            }
            // No BOM, just use JSON RFC's encoding algo to auto-detect
            if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
                return UTF_32BE;
            } else if (buf[0] == NUL && buf[2] == NUL) {
                return StandardCharsets.UTF_16BE;
            } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
                return UTF_32LE;
            } else if (buf[1] == NUL && buf[3] == NUL) {
                return StandardCharsets.UTF_16LE;
            }
        }
        return StandardCharsets.UTF_8;
    }

    @Override
    public int read() throws IOException {
        if (curIndex < bufLen) {
            return buf[curIndex++];
        }
        return in.read();
    }

    @Override
    public int read(byte b[], int off, int len) throws IOException {
        if (curIndex < bufLen) {
            if (len == 0) {
                return 0;
            }
            if (off < 0 || len < 0 || len > b.length -off) {
                throw new IndexOutOfBoundsException();
            }
            int min = Math.min(bufLen-curIndex, len);
            System.arraycopy(buf, curIndex, b, off, min);
            curIndex += min;
            return min;
        }
        return in.read(b, off, len);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy