All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.org.codehaus.jackson.impl.ByteSourceBootstrapper Maven / Gradle / Ivy

Go to download

Jackson is a high-performance JSON processor (parser, generator)

There is a newer version: 0.9.5
Show newest version
package org.codehaus.jackson.impl;

import java.io.*;

import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.io.*;
import org.codehaus.jackson.sym.NameCanonicalizer;

/**
 * This class is used to determine the encoding of byte stream
 * that is to contain JSON content. Rules are fairly simple, and
 * defined in JSON specification (RFC-4627 or newer), except
 * for BOM handling, which is a property of underlying
 * streams.
 */
public final class ByteSourceBootstrapper
{
    /*
    ////////////////////////////////////////
    // Configuration
    ////////////////////////////////////////
    */

    final IOContext mContext;

    final InputStream mIn;

    /*
    ///////////////////////////////////////////////////////////////
    // Input buffering
    ///////////////////////////////////////////////////////////////
    */

    final byte[] mInputBuffer;

    private int mInputPtr;

    private int mInputEnd;

    /**
     * Flag that indicates whether buffer above is to be recycled
     * after being used or not.
     */
    private final boolean mBufferRecyclable;

    /*
    ///////////////////////////////////////////////////////////////
    // Input location
    ///////////////////////////////////////////////////////////////
    */

    /**
     * Current number of input units (bytes or chars) that were processed in
     * previous blocks,
     * before contents of current input buffer.
     *

* Note: includes possible BOMs, if those were part of the input. */ protected int mInputProcessed; /* /////////////////////////////////////////////////////////////// // Data gathered /////////////////////////////////////////////////////////////// */ boolean mBigEndian = true; int mBytesPerChar = 0; // 0 means "dunno yet" /* //////////////////////////////////////////////////// // Life-cycle //////////////////////////////////////////////////// */ public ByteSourceBootstrapper(IOContext ctxt, InputStream in) { mContext = ctxt; mIn = in; mInputBuffer = ctxt.allocReadIOBuffer(); mInputEnd = mInputPtr = 0; mInputProcessed = 0; mBufferRecyclable = true; } public ByteSourceBootstrapper(IOContext ctxt, byte[] inputBuffer, int inputStart, int inputLen) { mContext = ctxt; mIn = null; mInputBuffer = inputBuffer; mInputPtr = inputStart; mInputEnd = (inputStart + inputLen); // Need to offset this for correct location info mInputProcessed = -inputStart; mBufferRecyclable = false; } /** * Method that should be called after constructing an instace. * It will figure out encoding that content uses, to allow * for instantiating a proper scanner object. */ public JsonEncoding detectEncoding() throws IOException, JsonParseException { boolean foundEncoding = false; // First things first: BOM handling /* Note: we can require 4 bytes to be read, since no * combination of BOM + valid JSON content can have * shorter length (shortest valid JSON content is single * digit char, but BOMs are chosen such that combination * is always at least 4 chars long) */ if (ensureLoaded(4)) { int quad = (mInputBuffer[mInputPtr] << 24) | ((mInputBuffer[mInputPtr+1] & 0xFF) << 16) | ((mInputBuffer[mInputPtr+2] & 0xFF) << 8) | (mInputBuffer[mInputPtr+3] & 0xFF); if (handleBOM(quad)) { foundEncoding = true; } else { /* If no BOM, need to auto-detect based on first char; * this works since it must be 7-bit ascii (wrt. unicode * compatible encodings, only ones JSON can be transferred * over) */ // UTF-32? if (checkUTF32(quad)) { foundEncoding = true; } else if (checkUTF16(quad >>> 16)) { foundEncoding = true; } } } else if (ensureLoaded(2)) { int i16 = ((mInputBuffer[mInputPtr] & 0xFF) << 8) | (mInputBuffer[mInputPtr+1] & 0xFF); if (checkUTF16(i16)) { foundEncoding = true; } } JsonEncoding enc; /* Not found yet? As per specs, this means it must be UTF-8. */ if (!foundEncoding) { enc = JsonEncoding.UTF8; } else if (mBytesPerChar == 2) { enc = mBigEndian ? JsonEncoding.UTF16_BE : JsonEncoding.UTF16_LE; } else if (mBytesPerChar == 4) { enc = mBigEndian ? JsonEncoding.UTF32_BE : JsonEncoding.UTF32_LE; } else { throw new RuntimeException("Internal error"); // should never get here } mContext.setEncoding(enc); return enc; } public Reader constructReader() throws IOException { JsonEncoding enc = mContext.getEncoding(); switch (enc) { case UTF32_BE: case UTF32_LE: return new UTF32Reader(mContext, mIn, mInputBuffer, mInputPtr, mInputEnd, mContext.getEncoding().isBigEndian()); case UTF16_BE: case UTF16_LE: { // First: do we have a Stream? If not, need to create one: InputStream in = mIn; if (in == null) { in = new ByteArrayInputStream(mInputBuffer, mInputPtr, mInputEnd); } else { /* Also, if we have any read but unused input (usually true), * need to merge that input in: */ if (mInputPtr < mInputEnd) { in = new MergedStream(mContext, in, mInputBuffer, mInputPtr, mInputEnd); } } return new InputStreamReader(in, enc.getJavaName()); } case UTF8: return new UTF8Reader(mContext, mIn, mInputBuffer, mInputPtr, mInputEnd); default: throw new RuntimeException("Internal error"); // should never get here } //return new ReaderBasedParser(mContext, r, basicSymbols.makeChild()); } public Utf8StreamParser createFastUtf8Parser(NameCanonicalizer nc) { return new Utf8StreamParser(mContext, mIn, nc, mInputBuffer, mInputPtr, mInputEnd, mBufferRecyclable); } /* ///////////////////////////////////////////////////////////////// // Internal methods, parsing ///////////////////////////////////////////////////////////////// */ /** * @return True if a BOM was succesfully found, and encoding * thereby recognized. */ private boolean handleBOM(int quad) throws IOException { /* Handling of (usually) optional BOM (required for * multi-byte formats); first 32-bit charsets: */ switch (quad) { case 0x0000FEFF: mBigEndian = true; mInputPtr += 4; mBytesPerChar = 4; return true; case 0xFFFE0000: // UCS-4, LE? mInputPtr += 4; mBytesPerChar = 4; mBigEndian = false; return true; case 0x0000FFFE: // UCS-4, in-order... reportWeirdUCS4("2143"); // throws exception case 0xFEFF0000: // UCS-4, in-order... reportWeirdUCS4("3412"); // throws exception } // Ok, if not, how about 16-bit encoding BOMs? int msw = quad >>> 16; if (msw == 0xFEFF) { // UTF-16, BE mInputPtr += 2; mBytesPerChar = 2; mBigEndian = true; return true; } if (msw == 0xFFFE) { // UTF-16, LE mInputPtr += 2; mBytesPerChar = 2; mBigEndian = false; return true; } // And if not, then UTF-8 BOM? if ((quad >>> 8) == 0xEFBBBF) { // UTF-8 mInputPtr += 3; mBytesPerChar = 1; mBigEndian = true; // doesn't really matter return true; } return false; } private boolean checkUTF32(int quad) throws IOException { /* Handling of (usually) optional BOM (required for * multi-byte formats); first 32-bit charsets: */ if ((quad >> 8) == 0) { // 0x000000?? -> UTF32-BE mBigEndian = true; } else if ((quad & 0x00FFFFFF) == 0) { // 0x??000000 -> UTF32-LE mBigEndian = false; } else if ((quad & ~0x00FF0000) == 0) { // 0x00??0000 -> UTF32-in-order reportWeirdUCS4("3412"); } else if ((quad & ~0x0000FF00) == 0) { // 0x0000??00 -> UTF32-in-order reportWeirdUCS4("2143"); } else { // Can not be valid UTF-32 encoded JSON... return false; } // Not BOM (just regular content), nothing to skip past: //mInputPtr += 4; mBytesPerChar = 4; return true; } private boolean checkUTF16(int i16) { if ((i16 & 0xFF00) == 0) { // UTF-16BE mBigEndian = true; } else if ((i16 & 0x00FF) == 0) { // UTF-16LE mBigEndian = false; } else { // nope, not UTF-16 return false; } // Not BOM (just regular content), nothing to skip past: //mInputPtr += 2; mBytesPerChar = 2; return true; } /* ///////////////////////////////////////////////////////////////// // Internal methods, problem reporting ///////////////////////////////////////////////////////////////// */ private void reportWeirdUCS4(String type) throws IOException { throw new CharConversionException("Unsupported UCS-4 endianness ("+type+") detected"); } /* ///////////////////////////////////////////////////////////////// // Internal methods, raw input access ///////////////////////////////////////////////////////////////// */ protected boolean ensureLoaded(int minimum) throws IOException { /* Let's assume here buffer has enough room -- this will always * be true for the limited used this method gets */ int gotten = (mInputEnd - mInputPtr); while (gotten < minimum) { int count; if (mIn == null) { // block source count = -1; } else { count = mIn.read(mInputBuffer, mInputEnd, mInputBuffer.length - mInputEnd); } if (count < 1) { return false; } mInputEnd += count; gotten += count; } return true; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy