All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.smile.SmileParser Maven / Gradle / Ivy

Go to download

Support for reading and writing Smile ("binary JSON") encoded data using Jackson abstractions (streaming API, data binding, tree model)

There is a newer version: 2.18.2
Show newest version
package com.fasterxml.jackson.dataformat.smile;

import java.io.*;
import java.lang.ref.SoftReference;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.ParserBase;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;

import static com.fasterxml.jackson.dataformat.smile.SmileConstants.BYTE_MARKER_END_OF_STRING;

public class SmileParser extends ParserBase
{
    /**
     * Enumeration that defines all togglable features for Smile generators.
     */
    public enum Feature implements FormatFeature
    {
        /**
         * Feature that determines whether 4-byte Smile header is mandatory in input,
         * or optional. If enabled, it means that only input that starts with the header
         * is accepted as valid; if disabled, header is optional. In latter case,r 
         * settings for content are assumed to be defaults.
         */
        REQUIRE_HEADER(true)
        ;

        final boolean _defaultState;
        final int _mask;

        /**
         * Method that calculates bit set (flags) of all features that
         * are enabled by default.
         */
        public static int collectDefaults()
        {
            int flags = 0;
            for (Feature f : values()) {
                if (f.enabledByDefault()) {
                    flags |= f.getMask();
                }
            }
            return flags;
        }

        private Feature(boolean defaultState) {
            _defaultState = defaultState;
            _mask = (1 << ordinal());
        }

        @Override public boolean enabledByDefault() { return _defaultState; }
        @Override public int getMask() { return _mask; }
        @Override public boolean enabledIn(int flags) { return (flags & getMask()) != 0; }    
    }

    private final static int[] NO_INTS = new int[0];

    private final static String[] NO_STRINGS = new String[0];

    /*
    /**********************************************************
    /* Configuration
    /**********************************************************
     */
    
    /**
     * Codec used for data binding when (if) requested.
     */
    protected ObjectCodec _objectCodec;

    /**
     * Flag that indicates whether content can legally have raw (unquoted)
     * binary data. Since this information is included both in header and
     * in actual binary data blocks there is redundancy, and we want to
     * ensure settings are compliant. Using application may also want to
     * know this setting in case it does some direct (random) access.
     */
    protected boolean _mayContainRawBinary;

    /**
     * Helper object used for low-level recycling of Smile-generator
     * specific buffers.
     */
    final protected SmileBufferRecycler _smileBufferRecycler;

    /*
    /**********************************************************
    /* Input source config, state (from ex StreamBasedParserBase)
    /**********************************************************
     */

    /**
     * Input stream that can be used for reading more content, if one
     * in use. May be null, if input comes just as a full buffer,
     * or if the stream has been closed.
     */
    protected InputStream _inputStream;

    /**
     * Current buffer from which data is read; generally data is read into
     * buffer from input source, but in some cases pre-loaded buffer
     * is handed to the parser.
     */
    protected byte[] _inputBuffer;

    /**
     * Bit flag composed of bits that indicate which
     * {@link SmileParser.Feature}s are enabled.
     *

* NOTE: currently the only feature ({@link SmileParser.Feature#REQUIRE_HEADER} * takes effect during bootstrapping. */ protected int _formatFeatures; /** * Flag that indicates whether the input buffer is recycable (and * needs to be returned to recycler once we are done) or not. *

* If it is not, it also means that parser can NOT modify underlying * buffer. */ protected boolean _bufferRecyclable; /* /********************************************************** /* Additional parsing state /********************************************************** */ /** * Flag that indicates that the current token has not yet * been fully processed, and needs to be finished for * some access (or skipped to obtain the next token) */ protected boolean _tokenIncomplete = false; /** * Type byte of the current token (as in) */ protected int _typeAsInt; /** * Specific flag that is set when we encountered a 32-bit * floating point value; needed since numeric super classes do * not track distinction between float and double, but Smile * format does, and we want to retain that separation. */ protected boolean _got32BitFloat; /** * Alternative to {@link #_tokenInputTotal} that will only contain * offset within input buffer, as int. */ protected int _tokenOffsetForTotal; /* /********************************************************** /* Symbol handling, decoding /********************************************************** */ /** * Symbol table that contains field names encountered so far */ final protected ByteQuadsCanonicalizer _symbols; /** * Temporary buffer used for name parsing. */ protected int[] _quadBuffer = NO_INTS; /** * Quads used for hash calculation */ protected int _quad1, _quad2, _quad3; /** * Array of recently seen field names, which may be back referenced * by later fields. * Defaults set to enable handling even if no header found. */ protected String[] _seenNames = NO_STRINGS; protected int _seenNameCount = 0; /** * Array of recently seen field names, which may be back referenced * by later fields * Defaults set to disable handling if no header found. */ protected String[] _seenStringValues = null; protected int _seenStringValueCount = -1; /* /********************************************************** /* Thread-local recycling /********************************************************** */ /** * ThreadLocal contains a {@link java.lang.ref.SoftReference} * to a buffer recycler used to provide a low-cost * buffer recycling for Smile-specific buffers. */ final protected static ThreadLocal>> _smileRecyclerRef = new ThreadLocal>>(); /* /********************************************************** /* Life-cycle /********************************************************** */ public SmileParser(IOContext ctxt, int parserFeatures, int smileFeatures, ObjectCodec codec, ByteQuadsCanonicalizer sym, InputStream in, byte[] inputBuffer, int start, int end, boolean bufferRecyclable) { super(ctxt, parserFeatures); _objectCodec = codec; _symbols = sym; _formatFeatures = smileFeatures; _inputStream = in; _inputBuffer = inputBuffer; _inputPtr = start; _inputEnd = end; _bufferRecyclable = bufferRecyclable; _tokenInputRow = -1; _tokenInputCol = -1; _smileBufferRecycler = _smileBufferRecycler(); } @Override public ObjectCodec getCodec() { return _objectCodec; } @Override public void setCodec(ObjectCodec c) { _objectCodec = c; } /** * Helper method called when it looks like input might contain the signature; * and it is necessary to detect and handle signature to get configuration * information it might have. * * @return True if valid signature was found and handled; false if not */ protected boolean handleSignature(boolean consumeFirstByte, boolean throwException) throws IOException { if (consumeFirstByte) { ++_inputPtr; } if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_2) { if (throwException) { _reportError("Malformed content: signature not valid, starts with 0x3a but followed by 0x" +Integer.toHexString(_inputBuffer[_inputPtr])+", not 0x29"); } return false; } if (++_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_3) { if (throwException) { _reportError("Malformed content: signature not valid, starts with 0x3a, 0x29, but followed by 0x" +Integer.toHexString(_inputBuffer[_inputPtr])+", not 0xA"); } return false; } // Good enough; just need version info from 4th byte... if (++_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ch = _inputBuffer[_inputPtr++]; int versionBits = (ch >> 4) & 0x0F; // but failure with version number is fatal, can not ignore if (versionBits != SmileConstants.HEADER_VERSION_0) { _reportError("Header version number bits (0x"+Integer.toHexString(versionBits)+") indicate unrecognized version; only 0x0 handled by parser"); } // can avoid tracking names, if explicitly disabled if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_NAMES) == 0) { _seenNames = null; _seenNameCount = -1; } // conversely, shared string values must be explicitly enabled if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_STRING_VALUES) != 0) { _seenStringValues = NO_STRINGS; _seenStringValueCount = 0; } _mayContainRawBinary = ((ch & SmileConstants.HEADER_BIT_HAS_RAW_BINARY) != 0); return true; } protected final static SmileBufferRecycler _smileBufferRecycler() { SoftReference> ref = _smileRecyclerRef.get(); SmileBufferRecycler br = (ref == null) ? null : ref.get(); if (br == null) { br = new SmileBufferRecycler(); _smileRecyclerRef.set(new SoftReference>(br)); } return br; } /* /********************************************************** /* Versioned /********************************************************** */ @Override public Version version() { return PackageVersion.VERSION; } /* /********************************************************** /* FormatFeature support /********************************************************** */ @Override public int getFormatFeatures() { return _formatFeatures; } @Override public JsonParser overrideFormatFeatures(int values, int mask) { _formatFeatures = (_formatFeatures & ~mask) | (values & mask); return this; } /* /********************************************************** /* Former StreamBasedParserBase methods /********************************************************** */ @Override public int releaseBuffered(OutputStream out) throws IOException { int count = _inputEnd - _inputPtr; if (count < 1) { return 0; } // let's just advance ptr to end int origPtr = _inputPtr; out.write(_inputBuffer, origPtr, count); return count; } @Override public Object getInputSource() { return _inputStream; } /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public JsonLocation getTokenLocation() { // token location is correctly managed... long total = _currInputProcessed + _tokenOffsetForTotal; // 2.4: used to be: _tokenInputTotal return new JsonLocation(_ioContext.getSourceReference(), total, // bytes -1, -1, (int) total); // char offset, line, column } /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public JsonLocation getCurrentLocation() { final long offset = _currInputProcessed + _inputPtr; return new JsonLocation(_ioContext.getSourceReference(), offset, // bytes -1, -1, (int) offset); // char offset, line, column } /* /********************************************************** /* Low-level reading, other /********************************************************** */ @Override protected final boolean loadMore() throws IOException { //_currInputRowStart -= _inputEnd; if (_inputStream != null) { int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length); _currInputProcessed += _inputEnd; _inputPtr = 0; if (count > 0) { _inputEnd = count; return true; } // important: move pointer to same as end, to keep location accurate _inputEnd = 0; // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException("InputStream.read() returned 0 characters when trying to read "+_inputBuffer.length+" bytes"); } } return false; } /** * Helper method that will try to load at least specified number bytes in * input buffer, possible moving existing data around if necessary */ protected final void _loadToHaveAtLeast(int minAvailable) throws IOException { // No input stream, no leading (either we are closed, or have non-stream input source) if (_inputStream == null) { throw _constructError("Needed to read "+minAvailable+" bytes, reached end-of-input"); } // Need to move remaining data in front? int amount = _inputEnd - _inputPtr; _currInputProcessed += _inputPtr; if (amount > 0 && _inputPtr > 0) { //_currInputRowStart -= _inputPtr; System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, amount); _inputEnd = amount; } else { _inputEnd = 0; } _inputPtr = 0; while (_inputEnd < minAvailable) { int count = _inputStream.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd); if (count < 1) { // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException("InputStream.read() returned 0 characters when trying to read "+amount+" bytes"); } throw _constructError("Needed to read "+minAvailable+" bytes, missed "+minAvailable+" before end-of-input"); } _inputEnd += count; } } @Override protected void _closeInput() throws IOException { if (_inputStream != null) { if (_ioContext.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE)) { _inputStream.close(); } _inputStream = null; } } /* /********************************************************** /* Overridden methods /********************************************************** */ @Override protected void _finishString() throws IOException { // should never be called; but must be defined for superclass _throwInternal(); } @Override public void close() throws IOException { super.close(); // Merge found symbols, if any: _symbols.release(); } @Override public boolean hasTextCharacters() { if (_currToken == JsonToken.VALUE_STRING) { // yes; is or can be made available efficiently as char[] return _textBuffer.hasTextAsCharacters(); } if (_currToken == JsonToken.FIELD_NAME) { // not necessarily; possible but: return _nameCopied; } // other types, no benefit from accessing as char[] return false; } /** * Method called to release internal buffers owned by the base * reader. This may be called along with {@link #_closeInput} (for * example, when explicitly closing this reader instance), or * separately (if need be). */ @Override protected void _releaseBuffers() throws IOException { super._releaseBuffers(); if (_bufferRecyclable) { byte[] buf = _inputBuffer; if (buf != null) { _inputBuffer = null; _ioContext.releaseReadIOBuffer(buf); } } { String[] nameBuf = _seenNames; if (nameBuf != null && nameBuf.length > 0) { _seenNames = null; /* 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; * but we only need to clear up to count as it is not a hash area */ if (_seenNameCount > 0) { Arrays.fill(nameBuf, 0, _seenNameCount, null); } _smileBufferRecycler.releaseSeenNamesBuffer(nameBuf); } } { String[] valueBuf = _seenStringValues; if (valueBuf != null && valueBuf.length > 0) { _seenStringValues = null; /* 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; * but we only need to clear up to count as it is not a hash area */ if (_seenStringValueCount > 0) { Arrays.fill(valueBuf, 0, _seenStringValueCount, null); } _smileBufferRecycler.releaseSeenStringValuesBuffer(valueBuf); } } } /* /********************************************************** /* Extended API /********************************************************** */ public boolean mayContainRawBinary() { return _mayContainRawBinary; } /* /********************************************************** /* JsonParser impl /********************************************************** */ @Override public JsonToken nextToken() throws IOException { _numTypesValid = NR_UNKNOWN; // For longer tokens (text, binary), we'll only read when requested if (_tokenIncomplete) { _skipIncomplete(); } _tokenOffsetForTotal = _inputPtr; // _tokenInputTotal = _currInputProcessed + _inputPtr; // also: clear any data retained so far _binaryValue = null; // Two main modes: values, and field names. if ((_currToken != JsonToken.FIELD_NAME) && _parsingContext.inObject()) { return (_currToken = _handleFieldName()); } if (_inputPtr >= _inputEnd) { if (!loadMore()) { return _eofAsNextToken(); } } int ch = _inputBuffer[_inputPtr++] & 0xFF; _typeAsInt = ch; switch (ch >> 5) { case 0: // short shared string value reference if (ch != 0) { // 0x0 is invalid return _handleSharedString(ch-1); } break; case 1: // simple literals, numbers { int typeBits = ch & 0x1F; if (typeBits < 4) { switch (typeBits) { case 0x00: _textBuffer.resetWithEmpty(); return (_currToken = JsonToken.VALUE_STRING); case 0x01: return (_currToken = JsonToken.VALUE_NULL); case 0x02: // false return (_currToken = JsonToken.VALUE_FALSE); default: // 0x03 == true return (_currToken = JsonToken.VALUE_TRUE); } } if (typeBits == 4) { _finishInt(); return (_currToken = JsonToken.VALUE_NUMBER_INT); } // next 3 bytes define subtype if (typeBits <= 6) { // VInt (zigzag), BigInteger _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_NUMBER_INT); } if (typeBits < 11 && typeBits != 7) { // floating-point _got32BitFloat = (typeBits == 8); _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_NUMBER_FLOAT); } if (typeBits == 0x1A) { // == 0x3A == ':' -> possibly header signature for next chunk? if (handleSignature(false, false)) { /* Ok, now; end-marker and header both imply doc boundary and a * 'null token'; but if both are seen, they are collapsed. * We can check this by looking at current token; if it's null, * need to get non-null token */ if (_currToken == null) { return nextToken(); } return (_currToken = null); } _reportError("Unrecognized token byte 0x3A (malformed segment header?"); } } // and everything else is reserved, for now break; case 2: // tiny ASCII // fall through case 3: // short ASCII // fall through case 4: // tiny Unicode // fall through case 5: // short Unicode // No need to decode, unless we have to keep track of back-references (for shared string values) if (_seenStringValueCount >= 0) { // shared text values enabled return _addSeenStringValue(); } _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_STRING); case 6: // small integers; zigzag encoded _numberInt = SmileUtil.zigzagDecode(ch & 0x1F); _numTypesValid = NR_INT; return (_currToken = JsonToken.VALUE_NUMBER_INT); case 7: // binary/long-text/long-shared/start-end-markers switch (ch & 0x1F) { case 0x00: // long variable length ASCII case 0x04: // long variable length unicode _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_STRING); case 0x08: // binary, 7-bit (0xE8) _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT); case 0x0C: // long shared string (0xEC) case 0x0D: case 0x0E: case 0x0F: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return _handleSharedString(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF)); case 0x18: // START_ARRAY _parsingContext = _parsingContext.createChildArrayContext(-1, -1); return (_currToken = JsonToken.START_ARRAY); case 0x19: // END_ARRAY if (!_parsingContext.inArray()) { _reportMismatchedEndMarker(']', '}'); } _parsingContext = _parsingContext.getParent(); return (_currToken = JsonToken.END_ARRAY); case 0x1A: // START_OBJECT _parsingContext = _parsingContext.createChildObjectContext(-1, -1); return (_currToken = JsonToken.START_OBJECT); case 0x1B: // not used in this mode; would be END_OBJECT _reportError("Invalid type marker byte 0xFB in value mode (would be END_OBJECT in key mode)"); case 0x1D: // binary, raw _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT); case 0x1F: // 0xFF, end of content return (_currToken = null); } break; } // If we get this far, type byte is corrupt _reportError("Invalid type marker byte 0x"+Integer.toHexString(ch & 0xFF)+" for expected value token"); return null; } private final JsonToken _handleSharedString(int index) throws IOException { if (index >= _seenStringValueCount) { _reportInvalidSharedStringValue(index); } _textBuffer.resetWithString(_seenStringValues[index]); return (_currToken = JsonToken.VALUE_STRING); } private final JsonToken _addSeenStringValue() throws IOException { _finishToken(); String v = _textBuffer.contentsAsString(); if (_seenStringValueCount < _seenStringValues.length) { // !!! TODO: actually only store char[], first time around? _seenStringValues[_seenStringValueCount++] = v; } else { _expandSeenStringValues(v); } return (_currToken = JsonToken.VALUE_STRING); } private final void _expandSeenStringValues(String newText) { String[] oldShared = _seenStringValues; int len = oldShared.length; String[] newShared; if (len == 0) { newShared = _smileBufferRecycler.allocSeenStringValuesBuffer(); if (newShared == null) { newShared = new String[SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH]; } } else if (len == SmileConstants.MAX_SHARED_STRING_VALUES) { // too many? Just flush... newShared = oldShared; _seenStringValueCount = 0; // could also clear, but let's not yet bother } else { int newSize = (len == SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_STRING_VALUES; newShared = Arrays.copyOf(oldShared, newSize); } _seenStringValues = newShared; _seenStringValues[_seenStringValueCount++] = newText; } // base impl is fine: //public String getCurrentName() throws IOException @Override public NumberType getNumberType() throws IOException { if (_got32BitFloat && _currToken == JsonToken.VALUE_NUMBER_FLOAT) { return NumberType.FLOAT; } return super.getNumberType(); } /* /********************************************************** /* Optimized accessors, isXxx, nextXxx (except for nextToken() /********************************************************** */ // Not (yet?) overridden, as of 2.6 /* public boolean hasTokenId(int id) { return super.hasTokenId(id); } */ //public boolean isExpectedStartArrayToken() { return getCurrentToken() == JsonToken.START_ARRAY; } //public boolean isExpectedStartObjectToken() { return getCurrentToken() == JsonToken.START_OBJECT; } @Override public boolean nextFieldName(SerializableString str) throws IOException { // Two parsing modes; can only succeed if expecting field name, so handle that first: if (_currToken != JsonToken.FIELD_NAME && _parsingContext.inObject()) { // first, clear up state _numTypesValid = NR_UNKNOWN; if (_tokenIncomplete) { _skipIncomplete(); } _tokenOffsetForTotal = _inputPtr; _binaryValue = null; byte[] nameBytes = str.asQuotedUTF8(); final int byteLen = nameBytes.length; // need room for type byte, name bytes, possibly end marker, so: if ((_inputPtr + byteLen + 1) < _inputEnd) { // maybe... int ptr = _inputPtr; int ch = _inputBuffer[ptr++] & 0xFF; _typeAsInt = ch; main_switch: switch (ch >> 6) { case 0: // misc, including end marker switch (ch) { case 0x20: // empty String as name, legal if unusual _currToken = JsonToken.FIELD_NAME; _inputPtr = ptr; _parsingContext.setCurrentName(""); return (byteLen == 0); case 0x30: // long shared case 0x31: case 0x32: case 0x33: { int index = ((ch & 0x3) << 8) + (_inputBuffer[ptr++] & 0xFF); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } String name = _seenNames[index]; _parsingContext.setCurrentName(name); _inputPtr = ptr; _currToken = JsonToken.FIELD_NAME; return (name.equals(str.getValue())); } //case 0x34: // long ASCII/Unicode name; let's not even try... } break; case 1: // short shared, can fully process { int index = (ch & 0x3F); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); String name = _seenNames[index]; _parsingContext.setCurrentName(name); _inputPtr = ptr; _currToken = JsonToken.FIELD_NAME; return (name.equals(str.getValue())); } case 2: // short ASCII { int len = 1 + (ch & 0x3f); if (len == byteLen) { int i = 0; for (; i < len; ++i) { if (nameBytes[i] != _inputBuffer[ptr+i]) { break main_switch; } } // yes, does match... _inputPtr = ptr + len; final String name = str.getValue(); if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return true; } } break; case 3: // short Unicode // all valid, except for 0xFF { int len = (ch & 0x3F); if (len > 0x37) { if (len == 0x3B) { _currToken = JsonToken.END_OBJECT; if (!_parsingContext.inObject()) { _reportMismatchedEndMarker('}', ']'); } _inputPtr = ptr; _parsingContext = _parsingContext.getParent(); return false; } // error, but let's not worry about that here break; } len += 2; // values from 2 to 57... if (len == byteLen) { int i = 0; for (; i < len; ++i) { if (nameBytes[i] != _inputBuffer[ptr+i]) { break main_switch; } } // yes, does match... _inputPtr = ptr + len; final String name = str.getValue(); if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return true; } } break; } } // wouldn't fit in buffer, just fall back to default processing } // otherwise just fall back to default handling; should occur rarely return (nextToken() == JsonToken.FIELD_NAME) && str.getValue().equals(getCurrentName()); } @Override public String nextFieldName() throws IOException { // Two parsing modes; can only succeed if expecting field name, so handle that first: if (_currToken != JsonToken.FIELD_NAME && _parsingContext.inObject()) { // first, clear up state _numTypesValid = NR_UNKNOWN; if (_tokenIncomplete) { _skipIncomplete(); } _tokenOffsetForTotal = _inputPtr; _binaryValue = null; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ch = _inputBuffer[_inputPtr++] & 0xFF; // is this needed? _typeAsInt = ch; switch (ch >> 6) { case 0: // misc, including end marker switch (ch) { case 0x20: // empty String as name, legal if unusual _parsingContext.setCurrentName(""); _currToken = JsonToken.FIELD_NAME; return ""; case 0x30: // long shared case 0x31: case 0x32: case 0x33: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } { int index = ((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } String name = _seenNames[index]; _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return name; } case 0x34: // long ASCII/Unicode name _handleLongFieldName(); return getCurrentName(); } break; case 1: // short shared, can fully process { int index = (ch & 0x3F); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } String name = _seenNames[index]; _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return name; } case 2: // short ASCII { int len = 1 + (ch & 0x3f); String name = _findDecodedFromSymbols(len); if (name != null) { _inputPtr += len; } else { name = _decodeShortAsciiName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return name; } case 3: // short Unicode // all valid, except for 0xFF ch &= 0x3F; { if (ch > 0x37) { if (ch == 0x3B) { if (!_parsingContext.inObject()) { _reportMismatchedEndMarker('}', ']'); } _parsingContext = _parsingContext.getParent(); _currToken = JsonToken.END_OBJECT; return null; } } else { final int len = ch + 2; // values from 2 to 57... String name = _findDecodedFromSymbols(len); if (name != null) { _inputPtr += len; } else { name = _decodeShortUnicodeName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); _currToken = JsonToken.FIELD_NAME; return name; } } break; } // Other byte values are illegal _reportError("Invalid type marker byte 0x"+Integer.toHexString(_typeAsInt)+" for expected field name (or END_OBJECT marker)"); return null; } // otherwise just fall back to default handling; should occur rarely return (nextToken() == JsonToken.FIELD_NAME) ? getCurrentName() : null; } @Override public String nextTextValue() throws IOException { // can't get text value if expecting name, so if (!_parsingContext.inObject() || _currToken == JsonToken.FIELD_NAME) { if (_tokenIncomplete) { _skipIncomplete(); } int ptr = _inputPtr; if (ptr >= _inputEnd) { if (!loadMore()) { _eofAsNextToken(); return null; } ptr = _inputPtr; } _tokenOffsetForTotal = ptr; // _tokenInputTotal = _currInputProcessed + _inputPtr; int ch = _inputBuffer[ptr++] & 0xFF; _typeAsInt = ch; // also: clear any data retained so far _binaryValue = null; switch (ch >> 5) { case 0: // short shared string value reference if (ch != 0) { // _handleSharedString... --ch; if (ch >= _seenStringValueCount) { _reportInvalidSharedStringValue(ch); } _inputPtr = ptr; String text = _seenStringValues[ch]; _textBuffer.resetWithString(text); _currToken = JsonToken.VALUE_STRING; return text; } else { // important: this is invalid, don't accept _reportError("Invalid token byte 0x00"); } case 1: // simple literals, numbers { int typeBits = ch & 0x1F; if (typeBits == 0x00) { _inputPtr = ptr; _textBuffer.resetWithEmpty(); _currToken = JsonToken.VALUE_STRING; return ""; } } break; case 2: // tiny ASCII // fall through case 3: // short ASCII _currToken = JsonToken.VALUE_STRING; _inputPtr = ptr; { final String text = _decodeShortAsciiValue(1 + (ch & 0x3F)); if (_seenStringValueCount >= 0) { // shared text values enabled if (_seenStringValueCount < _seenStringValues.length) { _seenStringValues[_seenStringValueCount++] = text; } else { _expandSeenStringValues(text); } } return text; } case 4: // tiny Unicode // fall through case 5: // short Unicode _currToken = JsonToken.VALUE_STRING; _inputPtr = ptr; { final String text = _decodeShortUnicodeValue(2 + (ch & 0x3F)); if (_seenStringValueCount >= 0) { // shared text values enabled if (_seenStringValueCount < _seenStringValues.length) { _seenStringValues[_seenStringValueCount++] = text; } else { _expandSeenStringValues(text); } } return text; } case 6: // small integers; zigzag encoded break; case 7: // binary/long-text/long-shared/start-end-markers // TODO: support longer strings too? /* switch (ch & 0x1F) { case 0x00: // long variable length ASCII case 0x04: // long variable length unicode _tokenIncomplete = true; return (_currToken = JsonToken.VALUE_STRING); case 0x08: // binary, 7-bit break main; case 0x0C: // long shared string case 0x0D: case 0x0E: case 0x0F: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return _handleSharedString(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF)); } break; */ break; } } // otherwise fall back to generic handling (note: we do NOT assign 'ptr') return (nextToken() == JsonToken.VALUE_STRING) ? getText() : null; } @Override public int nextIntValue(int defaultValue) throws IOException { if (nextToken() == JsonToken.VALUE_NUMBER_INT) { return getIntValue(); } return defaultValue; } @Override public long nextLongValue(long defaultValue) throws IOException { if (nextToken() == JsonToken.VALUE_NUMBER_INT) { return getLongValue(); } return defaultValue; } @Override public Boolean nextBooleanValue() throws IOException { switch (nextToken()) { case VALUE_TRUE: return Boolean.TRUE; case VALUE_FALSE: return Boolean.FALSE; default: return null; } } /* /********************************************************** /* Public API, access to token information, text /********************************************************** */ /** * Method for accessing textual representation of the current event; * if no current event (before first call to {@link #nextToken}, or * after encountering end-of-input), returns null. * Method can be called for any event. */ @Override public String getText() throws IOException { if (_tokenIncomplete) { _tokenIncomplete = false; // Let's inline part of "_finishToken", common case int tb = _typeAsInt; int type = (tb >> 5); if (type == 2 || type == 3) { // tiny & short ASCII return _decodeShortAsciiValue(1 + (tb & 0x3F)); } if (type == 4 || type == 5) { // tiny & short Unicode // short unicode; note, lengths 2 - 65 (off-by-one compared to ASCII) return _decodeShortUnicodeValue(2 + (tb & 0x3F)); } _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { return _textBuffer.contentsAsString(); } JsonToken t = _currToken; if (t == null) { // null only before/after document return null; } if (t == JsonToken.FIELD_NAME) { return _parsingContext.getCurrentName(); } if (t.isNumeric()) { // TODO: optimize? return getNumberValue().toString(); } return _currToken.asString(); } @Override public char[] getTextCharacters() throws IOException { if (_currToken != null) { // null only before/after document if (_tokenIncomplete) { _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { return _textBuffer.getTextBuffer(); } if (_currToken == JsonToken.FIELD_NAME) { if (!_nameCopied) { String name = _parsingContext.getCurrentName(); int nameLen = name.length(); if (_nameCopyBuffer == null) { _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); } else if (_nameCopyBuffer.length < nameLen) { _nameCopyBuffer = new char[nameLen]; } name.getChars(0, nameLen, _nameCopyBuffer, 0); _nameCopied = true; } return _nameCopyBuffer; } if (_currToken.isNumeric()) { // TODO: optimize? return getNumberValue().toString().toCharArray(); } return _currToken.asCharArray(); } return null; } @Override public int getTextLength() throws IOException { if (_currToken != null) { // null only before/after document if (_tokenIncomplete) { _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { return _textBuffer.size(); } switch (_currToken) { case FIELD_NAME: return _parsingContext.getCurrentName().length(); // fall through case VALUE_NUMBER_INT: case VALUE_NUMBER_FLOAT: // TODO: optimize return getNumberValue().toString().length(); default: return _currToken.asCharArray().length; } } return 0; } @Override public int getTextOffset() throws IOException { return 0; } @Override public String getValueAsString() throws IOException { // inlined 'getText()' for common case of having String if (_tokenIncomplete) { _tokenIncomplete = false; int tb = _typeAsInt; int type = (tb >> 5); if (type == 2 || type == 3) { // tiny & short ASCII return _decodeShortAsciiValue(1 + (tb & 0x3F)); } if (type == 4 || type == 5) { // tiny & short Unicode return _decodeShortUnicodeValue(2 + (tb & 0x3F)); } _finishToken(); } if (_currToken == JsonToken.VALUE_STRING) { return _textBuffer.contentsAsString(); } if (_currToken == null || _currToken == JsonToken.VALUE_NULL || !_currToken.isScalarValue()) { return null; } return getText(); } @Override public String getValueAsString(String defaultValue) throws IOException { if (_currToken != JsonToken.VALUE_STRING) { if (_currToken == null || _currToken == JsonToken.VALUE_NULL || !_currToken.isScalarValue()) { return defaultValue; } } return getText(); } /* /********************************************************** /* Public API, access to token information, binary /********************************************************** */ @Override public byte[] getBinaryValue(Base64Variant b64variant) throws IOException { if (_tokenIncomplete) { _finishToken(); } if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT) { // Todo, maybe: support base64 for text? _reportError("Current token ("+_currToken+") not VALUE_EMBEDDED_OBJECT, can not access as binary"); } return _binaryValue; } @Override public Object getEmbeddedObject() throws IOException { if (_tokenIncomplete) { _finishToken(); } if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) { return _binaryValue; } return null; } @Override public int readBinaryValue(Base64Variant b64variant, OutputStream out) throws IOException { if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) { // Todo, maybe: support base64 for text? _reportError("Current token ("+_currToken+") not VALUE_EMBEDDED_OBJECT, can not access as binary"); } // Ok, first, unlikely (but legal?) case where someone already requested binary data: if (!_tokenIncomplete) { if (_binaryValue == null) { // most likely already read... return 0; } final int len = _binaryValue.length; out.write(_binaryValue, 0, len); return len; } // otherwise, handle, mark as complete // first, raw inlined binary data (simple) if (_typeAsInt == SmileConstants.INT_MISC_BINARY_RAW) { final int totalCount = _readUnsignedVInt(); int left = totalCount; while (left > 0) { int avail = _inputEnd - _inputPtr; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); avail = _inputEnd - _inputPtr; } int count = Math.min(avail, left); out.write(_inputBuffer, _inputPtr, count); _inputPtr += count; left -= count; } _tokenIncomplete = false; return totalCount; } if (_typeAsInt != SmileConstants.INT_MISC_BINARY_7BIT) { _throwInternal(); } // or, alternative, 7-bit encoded stuff: final int totalCount = _readUnsignedVInt(); byte[] encodingBuffer = _ioContext.allocBase64Buffer(); try { _readBinaryEncoded(out, totalCount, encodingBuffer); } finally { _ioContext.releaseBase64Buffer(encodingBuffer); } _tokenIncomplete = false; return totalCount; } private void _readBinaryEncoded(OutputStream out, int length, byte[] buffer) throws IOException { int outPtr = 0; final int lastSafeOut = buffer.length - 7; // first handle all full 7/8 units while (length > 7) { if ((_inputEnd - _inputPtr) < 8) { _loadToHaveAtLeast(8); } int i1 = (_inputBuffer[_inputPtr++] << 25) + (_inputBuffer[_inputPtr++] << 18) + (_inputBuffer[_inputPtr++] << 11) + (_inputBuffer[_inputPtr++] << 4); int x = _inputBuffer[_inputPtr++]; i1 += x >> 3; int i2 = ((x & 0x7) << 21) + (_inputBuffer[_inputPtr++] << 14) + (_inputBuffer[_inputPtr++] << 7) + _inputBuffer[_inputPtr++]; // Ok: got our 7 bytes, just need to split, copy buffer[outPtr++] = (byte)(i1 >> 24); buffer[outPtr++] = (byte)(i1 >> 16); buffer[outPtr++] = (byte)(i1 >> 8); buffer[outPtr++] = (byte)i1; buffer[outPtr++] = (byte)(i2 >> 16); buffer[outPtr++] = (byte)(i2 >> 8); buffer[outPtr++] = (byte)i2; length -= 7; // ensure there's always room for at least 7 bytes more after looping: if (outPtr > lastSafeOut) { out.write(buffer, 0, outPtr); outPtr = 0; } } // and then leftovers: n+1 bytes to decode n bytes if (length > 0) { if ((_inputEnd - _inputPtr) < (length+1)) { _loadToHaveAtLeast(length+1); } int value = _inputBuffer[_inputPtr++]; for (int i = 1; i < length; ++i) { value = (value << 7) + _inputBuffer[_inputPtr++]; buffer[outPtr++] = (byte) (value >> (7 - i)); } // last byte is different, has remaining 1 - 6 bits, right-aligned value <<= length; buffer[outPtr++] = (byte) (value + _inputBuffer[_inputPtr++]); } if (outPtr > 0) { out.write(buffer, 0, outPtr); } } /* /********************************************************** /* Internal methods, field name parsing /********************************************************** */ /** * Method that handles initial token type recognition for token * that has to be either FIELD_NAME or END_OBJECT. */ protected final JsonToken _handleFieldName() throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ch = _inputBuffer[_inputPtr++] & 0xFF; // is this needed? _typeAsInt = ch; switch (ch >> 6) { case 0: // misc, including end marker switch (ch) { case 0x20: // empty String as name, legal if unusual _parsingContext.setCurrentName(""); return JsonToken.FIELD_NAME; case 0x30: // long shared case 0x31: case 0x32: case 0x33: if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } { int index = ((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); } return JsonToken.FIELD_NAME; case 0x34: // long ASCII/Unicode name _handleLongFieldName(); return JsonToken.FIELD_NAME; } break; case 1: // short shared, can fully process { int index = (ch & 0x3F); if (index >= _seenNameCount) { _reportInvalidSharedName(index); } _parsingContext.setCurrentName(_seenNames[index]); } return JsonToken.FIELD_NAME; case 2: // short ASCII { int len = 1 + (ch & 0x3f); String name = _findDecodedFromSymbols(len); if (name != null) { _inputPtr += len; } else { name = _decodeShortAsciiName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); } return JsonToken.FIELD_NAME; case 3: // short Unicode // all valid, except for 0xFF ch &= 0x3F; { if (ch > 0x37) { if (ch == 0x3B) { if (!_parsingContext.inObject()) { _reportMismatchedEndMarker('}', ']'); } _parsingContext = _parsingContext.getParent(); return JsonToken.END_OBJECT; } } else { final int len = ch + 2; // values from 2 to 57... String name = _findDecodedFromSymbols(len); if (name != null) { _inputPtr += len; } else { name = _decodeShortUnicodeName(len); name = _addDecodedToSymbols(len, name); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); return JsonToken.FIELD_NAME; } } break; } // Other byte values are illegal _reportError("Invalid type marker byte 0x"+Integer.toHexString(_typeAsInt)+" for expected field name (or END_OBJECT marker)"); return null; } /** * Method called to try to expand shared name area to fit one more potentially * shared String. If area is already at its biggest size, will just clear * the area (by setting next-offset to 0) */ private final String[] _expandSeenNames(String[] oldShared) { int len = oldShared.length; String[] newShared; if (len == 0) { newShared = _smileBufferRecycler.allocSeenNamesBuffer(); if (newShared == null) { newShared = new String[SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH]; } } else if (len == SmileConstants.MAX_SHARED_NAMES) { // too many? Just flush... newShared = oldShared; _seenNameCount = 0; // could also clear, but let's not yet bother } else { int newSize = (len == SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_NAMES; newShared = Arrays.copyOf(oldShared, newSize); } return newShared; } private final String _addDecodedToSymbols(int len, String name) { if (len < 5) { return _symbols.addName(name, _quad1); } if (len < 9) { return _symbols.addName(name, _quad1, _quad2); } if (len < 13) { return _symbols.addName(name, _quad1, _quad2, _quad3); } int qlen = (len + 3) >> 2; return _symbols.addName(name, _quadBuffer, qlen); } private final String _decodeShortAsciiName(int len) throws IOException { // note: caller ensures we have enough bytes available // also note that since it's a short name (64 bytes), segment WILL have enough space char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; final byte[] inBuf = _inputBuffer; int inPtr = _inputPtr; /* 25-Jan-2014, tsaloranta: Micro-benchmarks suggest that unrolling * does NOT speed up things on JDK 7, let's not do it. */ // loop unrolling seems to help here: /* for (int inEnd = inPtr + len - 3; inPtr < inEnd; ) { outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; outBuf[outPtr++] = (char) inBuf[inPtr++]; } int left = (len & 3); if (left > 0) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 1) { outBuf[outPtr++] = (char) inBuf[inPtr++]; if (left > 2) { outBuf[outPtr++] = (char) inBuf[inPtr++]; } } } */ for (int inEnd = inPtr + len; inPtr < inEnd; ++inPtr) { outBuf[outPtr++] = (char) inBuf[inPtr]; } _inputPtr = inPtr; return _textBuffer.setCurrentAndReturn(len); } /** * Helper method used to decode short Unicode string, length for which actual * length (in bytes) is known * * @param len Length between 1 and 64 */ private final String _decodeShortUnicodeName(int len) throws IOException { // note: caller ensures we have enough bytes available int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int inPtr = _inputPtr; _inputPtr += len; final int[] codes = SmileConstants.sUtf8UnitLengths; final byte[] inBuf = _inputBuffer; for (int end = inPtr + len; inPtr < end; ) { int i = inBuf[inPtr++] & 0xFF; int code = codes[i]; if (code != 0) { // trickiest one, need surrogate handling switch (code) { case 1: i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F); break; case 2: i = ((i & 0x0F) << 12) | ((inBuf[inPtr++] & 0x3F) << 6) | (inBuf[inPtr++] & 0x3F); break; case 3: i = ((i & 0x07) << 18) | ((inBuf[inPtr++] & 0x3F) << 12) | ((inBuf[inPtr++] & 0x3F) << 6) | (inBuf[inPtr++] & 0x3F); // note: this is the codepoint value; need to split, too i -= 0x10000; outBuf[outPtr++] = (char) (0xD800 | (i >> 10)); i = 0xDC00 | (i & 0x3FF); break; default: // invalid _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block"); } } outBuf[outPtr++] = (char) i; } return _textBuffer.setCurrentAndReturn(outPtr); } // note: slightly edited copy of UTF8StreamParser.addName() private final String _decodeLongUnicodeName(int[] quads, int byteLen, int quadLen) throws IOException { int lastQuadBytes = byteLen & 3; // Ok: must decode UTF-8 chars. No other validation SHOULD be needed (except bounds checks?) /* Note: last quad is not correctly aligned (leading zero bytes instead * need to shift a bit, instead of trailing). Only need to shift it * for UTF-8 decoding; need revert for storage (since key will not * be aligned, to optimize lookup speed) */ int lastQuad; if (lastQuadBytes < 4) { lastQuad = quads[quadLen-1]; // 8/16/24 bit left shift quads[quadLen-1] = (lastQuad << ((4 - lastQuadBytes) << 3)); } else { lastQuad = 0; } char[] cbuf = _textBuffer.emptyAndGetCurrentSegment(); int cix = 0; for (int ix = 0; ix < byteLen; ) { int ch = quads[ix >> 2]; // current quad, need to shift+mask int byteIx = (ix & 3); ch = (ch >> ((3 - byteIx) << 3)) & 0xFF; ++ix; if (ch > 127) { // multi-byte int needed; if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF) ch &= 0x1F; needed = 1; } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF) ch &= 0x0F; needed = 2; } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all... ch &= 0x07; needed = 3; } else { // 5- and 6-byte chars not valid chars _reportInvalidInitial(ch); needed = ch = 1; // never really gets this far } if ((ix + needed) > byteLen) { _reportInvalidEOF(" in long field name"); } // Ok, always need at least one more: int ch2 = quads[ix >> 2]; // current quad, need to shift+mask byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2); } ch = (ch << 6) | (ch2 & 0x3F); if (needed > 1) { ch2 = quads[ix >> 2]; byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2); } ch = (ch << 6) | (ch2 & 0x3F); if (needed > 2) { // 4 bytes? (need surrogates on output) ch2 = quads[ix >> 2]; byteIx = (ix & 3); ch2 = (ch2 >> ((3 - byteIx) << 3)); ++ix; if ((ch2 & 0xC0) != 0x080) { _reportInvalidOther(ch2 & 0xFF); } ch = (ch << 6) | (ch2 & 0x3F); } } if (needed > 2) { // surrogate pair? once again, let's output one here, one later on ch -= 0x10000; // to normalize it starting with 0x0 if (cix >= cbuf.length) { cbuf = _textBuffer.expandCurrentSegment(); } cbuf[cix++] = (char) (0xD800 + (ch >> 10)); ch = 0xDC00 | (ch & 0x03FF); } } if (cix >= cbuf.length) { cbuf = _textBuffer.expandCurrentSegment(); } cbuf[cix++] = (char) ch; } // Ok. Now we have the character array, and can construct the String String baseName = new String(cbuf, 0, cix); // And finally, un-align if necessary if (lastQuadBytes < 4) { quads[quadLen-1] = lastQuad; } return _symbols.addName(baseName, quads, quadLen); } private final void _handleLongFieldName() throws IOException { // First: gather quads we need, looking for end marker final byte[] inBuf = _inputBuffer; int quads = 0; int bytes = 0; int q = 0; while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } byte b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 0; break; } q = ((int) b) & 0xFF; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 1; break; } q = (q << 8) | (b & 0xFF); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 2; break; } q = (q << 8) | (b & 0xFF); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } b = inBuf[_inputPtr++]; if (BYTE_MARKER_END_OF_STRING == b) { bytes = 3; break; } q = (q << 8) | (b & 0xFF); if (quads >= _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256); // grow by 1k } _quadBuffer[quads++] = q; } // and if we have more bytes, append those too int byteLen = (quads << 2); if (bytes > 0) { if (quads >= _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256); } _quadBuffer[quads++] = q; byteLen += bytes; } // Know this name already? String name = _symbols.findName(_quadBuffer, quads); if (name == null) { name = _decodeLongUnicodeName(_quadBuffer, byteLen, quads); } if (_seenNames != null) { if (_seenNameCount >= _seenNames.length) { _seenNames = _expandSeenNames(_seenNames); } _seenNames[_seenNameCount++] = name; } _parsingContext.setCurrentName(name); } /** * Helper method for trying to find specified encoded UTF-8 byte sequence * from symbol table; if successful avoids actual decoding to String */ private final String _findDecodedFromSymbols(final int len) throws IOException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } // First: maybe we already have this name decoded? if (len < 5) { int inPtr = _inputPtr; final byte[] inBuf = _inputBuffer; int q = inBuf[inPtr] & 0xFF; if (len > 1) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (len > 2) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (len > 3) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); } } } _quad1 = q; return _symbols.findName(q); } final byte[] inBuf = _inputBuffer; int inPtr = _inputPtr; // First quadbyte is easy int q1 = (inBuf[inPtr++] & 0xFF); q1 = (q1 << 8) | (inBuf[inPtr++] & 0xFF); q1 = (q1 << 8) | (inBuf[inPtr++] & 0xFF); q1 = (q1 << 8) | (inBuf[inPtr++] & 0xFF); if (len < 9) { int q2 = (inBuf[inPtr++] & 0xFF); int left = len - 5; if (left > 0) { q2 = (q2 << 8) + (inBuf[inPtr++] & 0xFF); if (left > 1) { q2 = (q2 << 8) + (inBuf[inPtr++] & 0xFF); if (left > 2) { q2 = (q2 << 8) + (inBuf[inPtr++] & 0xFF); } } } _quad1 = q1; _quad2 = q2; return _symbols.findName(q1, q2); } int q2 = (inBuf[inPtr++] & 0xFF); q2 = (q2 << 8) | (inBuf[inPtr++] & 0xFF); q2 = (q2 << 8) | (inBuf[inPtr++] & 0xFF); q2 = (q2 << 8) | (inBuf[inPtr++] & 0xFF); if (len < 13) { int q3 = (inBuf[inPtr++] & 0xFF); int left = len - 9; if (left > 0) { q3 = (q3 << 8) + (inBuf[inPtr++] & 0xFF); if (left > 1) { q3 = (q3 << 8) + (inBuf[inPtr++] & 0xFF); if (left > 2) { q3 = (q3 << 8) + (inBuf[inPtr++] & 0xFF); } } } _quad1 = q1; _quad2 = q2; _quad3 = q3; return _symbols.findName(q1, q2, q3); } return _findDecodedLong(len, q1, q2); } /** * Method for locating names longer than 8 bytes (in UTF-8) */ private final String _findDecodedLong(int len, int q1, int q2) throws IOException { // first, need enough buffer to store bytes as ints: { int bufLen = (len + 3) >> 2; if (bufLen > _quadBuffer.length) { _quadBuffer = _growArrayTo(_quadBuffer, bufLen); } } _quadBuffer[0] = q1; _quadBuffer[1] = q2; // then decode, full quads first int offset = 2; int inPtr = _inputPtr+8; len -= 8; final byte[] inBuf = _inputBuffer; do { int q = (inBuf[inPtr++] & 0xFF); q = (q << 8) | inBuf[inPtr++] & 0xFF; q = (q << 8) | inBuf[inPtr++] & 0xFF; q = (q << 8) | inBuf[inPtr++] & 0xFF; _quadBuffer[offset++] = q; } while ((len -= 4) > 3); // and then leftovers if (len > 0) { int q = inBuf[inPtr] & 0xFF; if (len > 1) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); if (len > 2) { q = (q << 8) + (inBuf[++inPtr] & 0xFF); } } _quadBuffer[offset++] = q; } return _symbols.findName(_quadBuffer, offset); } private static int[] _growArrayTo(int[] arr, int minSize) { final int size = minSize+4; if (arr == null) { return new int[size]; } return Arrays.copyOf(arr, size); } /* /********************************************************** /* Internal methods, secondary parsing /********************************************************** */ @Override protected void _parseNumericValue(int expType) throws IOException { if (_tokenIncomplete) { int tb = _typeAsInt; // ensure we got a numeric type with value that is lazily parsed if ((tb >> 5) != 1) { _reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors"); } _tokenIncomplete = false; _finishNumberToken(tb); } } @Override // since 2.6 protected int _parseIntValue() throws IOException { // Inlined variant of: _parseNumericValue(NR_INT) if (_tokenIncomplete) { _tokenIncomplete = false; if ((_typeAsInt & 0x1F) == 4) { _finishInt(); // vint return _numberInt; } _finishNumberToken(_typeAsInt); } if ((_numTypesValid & NR_INT) == 0) { convertNumberToInt(); } return _numberInt; } /** * Method called to finish parsing of a token so that token contents * are retrievable */ protected final void _finishToken() throws IOException { _tokenIncomplete = false; int tb = _typeAsInt; int type = (tb >> 5); if (type == 1) { // simple literals, numbers _finishNumberToken(tb); return; } if (type <= 3) { // tiny & short ASCII _decodeShortAsciiValue(1 + (tb & 0x3F)); return; } if (type <= 5) { // tiny & short Unicode // short unicode; note, lengths 2 - 65 (off-by-one compared to ASCII) _decodeShortUnicodeValue(2 + (tb & 0x3F)); return; } if (type == 7) { tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 0: // long variable length ASCII _decodeLongAscii(); return; case 1: // long variable length Unicode _decodeLongUnicode(); return; case 2: // binary, 7-bit _binaryValue = _read7BitBinaryWithLength(); return; case 7: // binary, raw _finishRawBinary(); return; } } // sanity check _throwInternal(); } protected final void _finishNumberToken(int tb) throws IOException { switch (tb & 0x1F) { case 4: _finishInt(); // vint return; case 5: // vlong _finishLong(); return; case 6: _finishBigInteger(); return; case 8: // float _finishFloat(); return; case 9: // double _finishDouble(); return; case 10: // big-decimal _finishBigDecimal(); return; } _throwInternal(); } /* /********************************************************** /* Internal methods, secondary Number parsing /********************************************************** */ private final void _finishInt() throws IOException { int ptr = _inputPtr; if ((ptr + 5) >= _inputEnd) { _finishIntSlow(); return; } int value = _inputBuffer[ptr++]; int i; if (value < 0) { // 6 bits value &= 0x3F; } else { i = _inputBuffer[ptr++]; if (i >= 0) { // 13 bits value = (value << 7) + i; i = _inputBuffer[ptr++]; if (i >= 0) { value = (value << 7) + i; i = _inputBuffer[ptr++]; if (i >= 0) { value = (value << 7) + i; // and then we must get negative i = _inputBuffer[ptr++]; if (i >= 0) { _reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes"); } } } } value = (value << 6) + (i & 0x3F); } _inputPtr = ptr; _numberInt = SmileUtil.zigzagDecode(value); _numTypesValid = NR_INT; } private final void _finishIntSlow() throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int value = _inputBuffer[_inputPtr++]; int i; if (value < 0) { // 6 bits value &= 0x3F; } else { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { // 13 bits value = (value << 7) + i; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { value = (value << 7) + i; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { value = (value << 7) + i; // and then we must get negative if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = _inputBuffer[_inputPtr++]; if (i >= 0) { _reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes"); } } } } value = (value << 6) + (i & 0x3F); } _numberInt = SmileUtil.zigzagDecode(value); _numTypesValid = NR_INT; } private final void _finishLong() throws IOException { int ptr = _inputPtr; final int maxEnd = ptr+11; if (maxEnd >= _inputEnd) { _finishLongSlow(); return; } int i = _inputBuffer[ptr++]; // first 7 bits i = (i << 7) + _inputBuffer[ptr++]; // 14 bits i = (i << 7) + _inputBuffer[ptr++]; // 21 i = (i << 7) + _inputBuffer[ptr++]; // Ok: couple of bytes more long l = i; do { int value = _inputBuffer[ptr++]; if (value < 0) { l = (l << 6) + (value & 0x3F); _inputPtr = ptr; _numberLong = SmileUtil.zigzagDecode(l); _numTypesValid = NR_LONG; return; } l = (l << 7) + value; } while (ptr < maxEnd); _reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes"); } private final void _finishLongSlow() throws IOException { // Ok, first, will always get 4 full data bytes first; 1 was already passed long l = (long) _fourBytesToInt(); // and loop for the rest while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int value = _inputBuffer[_inputPtr++]; if (value < 0) { l = (l << 6) + (value & 0x3F); _numberLong = SmileUtil.zigzagDecode(l); _numTypesValid = NR_LONG; return; } l = (l << 7) + value; } } private final int _fourBytesToInt() throws IOException { int ptr = _inputPtr; if ((ptr + 3) >= _inputEnd) { return _fourBytesToIntSlow(); } int i = _inputBuffer[ptr++]; // first 7 bits i = (i << 7) + _inputBuffer[ptr++]; // 14 bits i = (i << 7) + _inputBuffer[ptr++]; // 21 i = (i << 7) + _inputBuffer[ptr++]; _inputPtr = ptr; return i; } private final int _fourBytesToIntSlow() throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int i = _inputBuffer[_inputPtr++]; // first 7 bits if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; // 14 bits if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; // 21 if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } return (i << 7) + _inputBuffer[_inputPtr++]; } private final void _finishBigInteger() throws IOException { byte[] raw = _read7BitBinaryWithLength(); _numberBigInt = new BigInteger(raw); _numTypesValid = NR_BIGINT; } private final void _finishFloat() throws IOException { // just need 5 bytes to get int32 first; all are unsigned int i = _fourBytesToInt(); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } i = (i << 7) + _inputBuffer[_inputPtr++]; float f = Float.intBitsToFloat(i); _numberDouble = (double) f; _numTypesValid = NR_DOUBLE; } private final void _finishDouble() throws IOException { // ok; let's take two sets of 4 bytes (each is int) long hi = _fourBytesToInt(); long value = (hi << 28) + (long) _fourBytesToInt(); // and then remaining 2 bytes if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } value = (value << 7) + _inputBuffer[_inputPtr++]; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } value = (value << 7) + _inputBuffer[_inputPtr++]; _numberDouble = Double.longBitsToDouble(value); _numTypesValid = NR_DOUBLE; } private final void _finishBigDecimal() throws IOException { int scale = SmileUtil.zigzagDecode(_readUnsignedVInt()); byte[] raw = _read7BitBinaryWithLength(); _numberBigDecimal = new BigDecimal(new BigInteger(raw), scale); _numTypesValid = NR_BIGDECIMAL; } private final int _readUnsignedVInt() throws IOException { int value = 0; while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int i = _inputBuffer[_inputPtr++]; if (i < 0) { // last byte value = (value << 6) + (i & 0x3F); return value; } value = (value << 7) + i; } } private final byte[] _read7BitBinaryWithLength() throws IOException { int byteLen = _readUnsignedVInt(); byte[] result = new byte[byteLen]; int ptr = 0; int lastOkPtr = byteLen - 7; // first, read all 7-by-8 byte chunks while (ptr <= lastOkPtr) { if ((_inputEnd - _inputPtr) < 8) { _loadToHaveAtLeast(8); } int i1 = (_inputBuffer[_inputPtr++] << 25) + (_inputBuffer[_inputPtr++] << 18) + (_inputBuffer[_inputPtr++] << 11) + (_inputBuffer[_inputPtr++] << 4); int x = _inputBuffer[_inputPtr++]; i1 += x >> 3; int i2 = ((x & 0x7) << 21) + (_inputBuffer[_inputPtr++] << 14) + (_inputBuffer[_inputPtr++] << 7) + _inputBuffer[_inputPtr++]; // Ok: got our 7 bytes, just need to split, copy result[ptr++] = (byte)(i1 >> 24); result[ptr++] = (byte)(i1 >> 16); result[ptr++] = (byte)(i1 >> 8); result[ptr++] = (byte)i1; result[ptr++] = (byte)(i2 >> 16); result[ptr++] = (byte)(i2 >> 8); result[ptr++] = (byte)i2; } // and then leftovers: n+1 bytes to decode n bytes int toDecode = (result.length - ptr); if (toDecode > 0) { if ((_inputEnd - _inputPtr) < (toDecode+1)) { _loadToHaveAtLeast(toDecode+1); } int value = _inputBuffer[_inputPtr++]; for (int i = 1; i < toDecode; ++i) { value = (value << 7) + _inputBuffer[_inputPtr++]; result[ptr++] = (byte) (value >> (7 - i)); } // last byte is different, has remaining 1 - 6 bits, right-aligned value <<= toDecode; result[ptr] = (byte) (value + _inputBuffer[_inputPtr++]); } return result; } /* /********************************************************** /* Internal methods, secondary String parsing /********************************************************** */ protected final String _decodeShortAsciiValue(int len) throws IOException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } // Note: we count on fact that buffer must have at least 'len' (<= 64) empty char slots final char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int outPtr = 0; final byte[] inBuf = _inputBuffer; int inPtr = _inputPtr; // as with _decodeShortAsciiName, no unrolling for (final int end = inPtr + len; inPtr < end; ++inPtr) { outBuf[outPtr++] = (char) inBuf[inPtr]; } _inputPtr = inPtr; return _textBuffer.setCurrentAndReturn(len); } protected final String _decodeShortUnicodeValue(int len) throws IOException { if ((_inputEnd - _inputPtr) < len) { _loadToHaveAtLeast(len); } int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); int inPtr = _inputPtr; _inputPtr += len; final int[] codes = SmileConstants.sUtf8UnitLengths; final byte[] inputBuf = _inputBuffer; for (int end = inPtr + len; inPtr < end; ) { int i = inputBuf[inPtr++] & 0xFF; int code = codes[i]; if (code != 0) { // trickiest one, need surrogate handling switch (code) { case 1: i = ((i & 0x1F) << 6) | (inputBuf[inPtr++] & 0x3F); break; case 2: i = ((i & 0x0F) << 12) | ((inputBuf[inPtr++] & 0x3F) << 6) | (inputBuf[inPtr++] & 0x3F); break; case 3: i = ((i & 0x07) << 18) | ((inputBuf[inPtr++] & 0x3F) << 12) | ((inputBuf[inPtr++] & 0x3F) << 6) | (inputBuf[inPtr++] & 0x3F); // note: this is the codepoint value; need to split, too i -= 0x10000; outBuf[outPtr++] = (char) (0xD800 | (i >> 10)); i = 0xDC00 | (i & 0x3FF); break; default: // invalid _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block"); } } outBuf[outPtr++] = (char) i; } return _textBuffer.setCurrentAndReturn(outPtr); } private final void _decodeLongAscii() throws IOException { int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); main_loop: while (true) { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int inPtr = _inputPtr; int left = _inputEnd - inPtr; if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } left = Math.min(left, outBuf.length - outPtr); do { byte b = _inputBuffer[inPtr++]; if (b == SmileConstants.BYTE_MARKER_END_OF_STRING) { _inputPtr = inPtr; break main_loop; } outBuf[outPtr++] = (char) b; } while (--left > 0); _inputPtr = inPtr; } _textBuffer.setCurrentLength(outPtr); } private final void _decodeLongUnicode() throws IOException { int outPtr = 0; char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); final int[] codes = SmileConstants.sUtf8UnitLengths; int c; final byte[] inputBuffer = _inputBuffer; main_loop: while (true) { // First the tight ASCII loop: ascii_loop: while (true) { int ptr = _inputPtr; if (ptr >= _inputEnd) { loadMoreGuaranteed(); ptr = _inputPtr; } if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } int max = _inputEnd; { int max2 = ptr + (outBuf.length - outPtr); if (max2 < max) { max = max2; } } while (ptr < max) { c = (int) inputBuffer[ptr++] & 0xFF; if (codes[c] != 0) { _inputPtr = ptr; break ascii_loop; } outBuf[outPtr++] = (char) c; } _inputPtr = ptr; } // Ok: end marker, escape or multi-byte? if (c == SmileConstants.INT_MARKER_END_OF_STRING) { break main_loop; } switch (codes[c]) { case 1: // 2-byte UTF c = _decodeUtf8_2(c); break; case 2: // 3-byte UTF if ((_inputEnd - _inputPtr) >= 2) { c = _decodeUtf8_3fast(c); } else { c = _decodeUtf8_3(c); } break; case 3: // 4-byte UTF c = _decodeUtf8_4(c); // Let's add first part right away: outBuf[outPtr++] = (char) (0xD800 | (c >> 10)); if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } c = 0xDC00 | (c & 0x3FF); // And let the other char output down below break; default: // Is this good enough error message? _reportInvalidChar(c); } // Need more room? if (outPtr >= outBuf.length) { outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; } // Ok, let's add char to output: outBuf[outPtr++] = (char) c; } _textBuffer.setCurrentLength(outPtr); } private final void _finishRawBinary() throws IOException { int byteLen = _readUnsignedVInt(); _binaryValue = new byte[byteLen]; if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int ptr = 0; while (true) { int toAdd = Math.min(byteLen, _inputEnd - _inputPtr); System.arraycopy(_inputBuffer, _inputPtr, _binaryValue, ptr, toAdd); _inputPtr += toAdd; ptr += toAdd; byteLen -= toAdd; if (byteLen <= 0) { return; } loadMoreGuaranteed(); } } /* /********************************************************** /* Internal methods, skipping /********************************************************** */ /** * Method called to skip remainders of an incomplete token, when * contents themselves will not be needed any more */ protected void _skipIncomplete() throws IOException { _tokenIncomplete = false; int tb = _typeAsInt; switch (tb >> 5) { case 1: // simple literals, numbers tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 1: // VInt (zigzag) // easy, just skip until we see sign bit... (should we try to limit damage?) switch (tb & 0x3) { case 1: // vlong _skipBytes(4); // min 5 bytes // fall through case 0: // vint while (true) { final int end = _inputEnd; final byte[] buf = _inputBuffer; while (_inputPtr < end) { if (buf[_inputPtr++] < 0) { return; } } loadMoreGuaranteed(); } case 2: // big-int // just has binary data _skip7BitBinary(); return; } break; case 2: // other numbers switch (tb & 0x3) { case 0: // float _skipBytes(5); return; case 1: // double _skipBytes(10); return; case 2: // big-decimal // first, skip scale _readUnsignedVInt(); // then length-prefixed binary serialization _skip7BitBinary(); return; } break; } break; case 2: // tiny ASCII // fall through case 3: // short ASCII _skipBytes(1 + (tb & 0x3F)); return; case 4: // tiny unicode // fall through case 5: // short unicode _skipBytes(2 + (tb & 0x3F)); return; case 7: tb &= 0x1F; // next 3 bytes define subtype switch (tb >> 2) { case 0: // long variable length ASCII case 1: // long variable length unicode /* Doesn't matter which one, just need to find the end marker * (note: can potentially skip invalid UTF-8 too) */ while (true) { final int end = _inputEnd; final byte[] buf = _inputBuffer; while (_inputPtr < end) { if (buf[_inputPtr++] == BYTE_MARKER_END_OF_STRING) { return; } } loadMoreGuaranteed(); } // never gets here case 2: // binary, 7-bit _skip7BitBinary(); return; case 7: // binary, raw _skipBytes(_readUnsignedVInt()); return; } } _throwInternal(); } protected void _skipBytes(int len) throws IOException { while (true) { int toAdd = Math.min(len, _inputEnd - _inputPtr); _inputPtr += toAdd; len -= toAdd; if (len <= 0) { return; } loadMoreGuaranteed(); } } /** * Helper method for skipping length-prefixed binary data * section */ protected void _skip7BitBinary() throws IOException { int origBytes = _readUnsignedVInt(); // Ok; 8 encoded bytes for 7 payload bytes first int chunks = origBytes / 7; int encBytes = chunks * 8; // and for last 0 - 6 bytes, last+1 (except none if no leftovers) origBytes -= 7 * chunks; if (origBytes > 0) { encBytes += 1 + origBytes; } _skipBytes(encBytes); } /* /********************************************************** /* Internal methods, UTF8 decoding /********************************************************** */ private final int _decodeUtf8_2(int c) throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } return ((c & 0x1F) << 6) | (d & 0x3F); } private final int _decodeUtf8_3(int c1) throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } c1 &= 0x0F; int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } int c = (c1 << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); return c; } private final int _decodeUtf8_3fast(int c1) throws IOException { c1 &= 0x0F; int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } int c = (c1 << 6) | (d & 0x3F); d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); return c; } /** * @return Character value minus 0x10000; this so that caller * can readily expand it to actual surrogates */ private final int _decodeUtf8_4(int c) throws IOException { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } int d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = ((c & 0x07) << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } c = (c << 6) | (d & 0x3F); if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); } d = (int) _inputBuffer[_inputPtr++]; if ((d & 0xC0) != 0x080) { _reportInvalidOther(d & 0xFF, _inputPtr); } /* note: won't change it to negative here, since caller * already knows it'll need a surrogate */ return ((c << 6) | (d & 0x3F)) - 0x10000; } /* /********************************************************** /* Internal methods, error reporting /********************************************************** */ protected void _reportInvalidSharedName(int index) throws IOException { if (_seenNames == null) { _reportError("Encountered shared name reference, even though document header explicitly declared no shared name references are included"); } _reportError("Invalid shared name reference "+index+"; only got "+_seenNameCount+" names in buffer (invalid content)"); } protected void _reportInvalidSharedStringValue(int index) throws IOException { if (_seenStringValues == null) { _reportError("Encountered shared text value reference, even though document header did not declared shared text value references may be included"); } _reportError("Invalid shared text value reference "+index+"; only got "+_seenStringValueCount+" names in buffer (invalid content)"); } protected void _reportInvalidChar(int c) throws JsonParseException { // Either invalid WS or illegal UTF-8 start char if (c < ' ') { _throwInvalidSpace(c); } _reportInvalidInitial(c); } protected void _reportInvalidInitial(int mask) throws JsonParseException { _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask)); } protected void _reportInvalidOther(int mask) throws JsonParseException { _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask)); } protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException { _inputPtr = ptr; _reportInvalidOther(mask); } /* /********************************************************** /* Internal methods, other /********************************************************** */ private final JsonToken _eofAsNextToken() throws IOException { if (!_parsingContext.inRoot()) { _handleEOF(); } close(); return (_currToken = null); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy