All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codehaus.jackson.smile.SmileParser Maven / Gradle / Ivy

Go to download

Support for efficient binary data format that can be access same as textual JSON ("binary infoset for JSON"), using standard Jackson abstractions: specifically, extension of JsonFactory and implementations of JsonParser and JsonGenerator.

There is a newer version: 1.9.13
Show newest version
package org.codehaus.jackson.smile;

import static org.codehaus.jackson.smile.SmileConstants.BYTE_MARKER_END_OF_STRING;

import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.SoftReference;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;

import org.codehaus.jackson.*;
import org.codehaus.jackson.impl.StreamBasedParserBase;
import org.codehaus.jackson.io.IOContext;
import org.codehaus.jackson.sym.BytesToNameCanonicalizer;
import org.codehaus.jackson.sym.Name;

public class SmileParser
    extends StreamBasedParserBase
{
    /**
     * Enumeration that defines all togglable features for Smile generators.
     */
    public enum Feature {
        /**
         * Feature that determines whether 4-byte Smile header is mandatory in input,
         * or optional. If enabled, it means that only input that starts with the header
         * is accepted as valid; if disabled, header is optional. In latter case,
         * settings for content are assumed to be defaults.
         */
        REQUIRE_HEADER(true)
        ;

        final boolean _defaultState;
        final int _mask;
        
        /**
         * Method that calculates bit set (flags) of all features that
         * are enabled by default.
         */
        public static int collectDefaults()
        {
            int flags = 0;
            for (Feature f : values()) {
                if (f.enabledByDefault()) {
                    flags |= f.getMask();
                }
            }
            return flags;
        }
        
        private Feature(boolean defaultState) {
            _defaultState = defaultState;
            _mask = (1 << ordinal());
        }
        
        public boolean enabledByDefault() { return _defaultState; }
        public int getMask() { return _mask; }
    }

    private final static int[] NO_INTS = new int[0];

    private final static String[] NO_STRINGS = new String[0];
    
    /*
    /**********************************************************
    /* Configuration
    /**********************************************************
     */
    
    /**
     * Codec used for data binding when (if) requested.
     */
    protected ObjectCodec _objectCodec;

    /**
     * Flag that indicates whether content can legally have raw (unquoted)
     * binary data. Since this information is included both in header and
     * in actual binary data blocks there is redundancy, and we want to
     * ensure settings are compliant. Using application may also want to
     * know this setting in case it does some direct (random) access.
     */
    protected boolean _mayContainRawBinary;

    /**
     * Helper object used for low-level recycling of Smile-generator
     * specific buffers.
     * 
     * @since 1.7
     */
    final protected SmileBufferRecycler _smileBufferRecycler;
    
    /*
    /**********************************************************
    /* Additional parsing state
    /**********************************************************
     */

    /**
     * Flag that indicates that the current token has not yet
     * been fully processed, and needs to be finished for
     * some access (or skipped to obtain the next token)
     */
    protected boolean _tokenIncomplete = false;
    
    /**
     * Type byte of the current token
     */
    protected int _typeByte;

    /**
     * Specific flag that is set when we encountered a 32-bit
     * floating point value; needed since numeric super classes do
     * not track distinction between float and double, but Smile
     * format does, and we want to retain that separation.
     */
    protected boolean _got32BitFloat;

    /*
    /**********************************************************
    /* Symbol handling, decoding
    /**********************************************************
     */

    /**
     * Symbol table that contains field names encountered so far
     */
    final protected BytesToNameCanonicalizer _symbols;
    
    /**
     * Temporary buffer used for name parsing.
     */
    protected int[] _quadBuffer = NO_INTS;

    /**
     * Quads used for hash calculation
     */
    protected int _quad1, _quad2;
     
    /**
     * Array of recently seen field names, which may be back referenced
     * by later fields.
     * Defaults set to enable handling even if no header found.
     */
    protected String[] _seenNames = NO_STRINGS;

    protected int _seenNameCount = 0;

    /**
     * Array of recently seen field names, which may be back referenced
     * by later fields
     * Defaults set to disable handling if no header found.
     */
    protected String[] _seenStringValues = null;

    protected int _seenStringValueCount = -1;
    
    /*
    /**********************************************************
    /* Thread-local recycling
    /**********************************************************
     */
    
    /**
     * ThreadLocal contains a {@link java.lang.ref.SoftRerefence}
     * to a buffer recycler used to provide a low-cost
     * buffer recycling for Smile-specific buffers.
     */
    final protected static ThreadLocal>> _smileRecyclerRef
        = new ThreadLocal>>();
    
    /*
    /**********************************************************
    /* Life-cycle
    /**********************************************************
     */
    
    public SmileParser(IOContext ctxt, int parserFeatures, int smileFeatures,
            ObjectCodec codec,
            BytesToNameCanonicalizer sym,
            InputStream in, byte[] inputBuffer, int start, int end,
            boolean bufferRecyclable)
    {
        super(ctxt, parserFeatures, in, inputBuffer, start, end, bufferRecyclable);
        _objectCodec = codec;
        _symbols = sym;
        _tokenInputRow = -1;
        _tokenInputCol = -1;
        _smileBufferRecycler = _smileBufferRecycler();
    }

    @Override
    public ObjectCodec getCodec() {
        return _objectCodec;
    }

    @Override
    public void setCodec(ObjectCodec c) {
        _objectCodec = c;
    }

    /**
     * Helper method called when it looks like input might contain the signature;
     * and it is necessary to detect and handle signature to get configuration
     * information it might have.
     * 
     * @return True if valid signature was found and handled; false if not
     */
    protected boolean handleSignature(boolean consumeFirstByte, boolean throwException)
        throws IOException, JsonParseException
    {
        if (consumeFirstByte) {
            ++_inputPtr;
        }
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_2) {
            if (throwException) {
            	_reportError("Malformed content: signature not valid, starts with 0x3a but followed by 0x"
            			+Integer.toHexString(_inputBuffer[_inputPtr])+", not 0x29");
            }
            return false;
        }
        if (++_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();        	
        }
        if (_inputBuffer[_inputPtr] != SmileConstants.HEADER_BYTE_3) {
            if (throwException) {
            	_reportError("Malformed content: signature not valid, starts with 0x3a, 0x29, but followed by 0x"
            			+Integer.toHexString(_inputBuffer[_inputPtr])+", not 0xA");
            }
            return false;
        }
    	// Good enough; just need version info from 4th byte...
        if (++_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();        	
        }
        int ch = _inputBuffer[_inputPtr++];
        int versionBits = (ch >> 4) & 0x0F;
        // but failure with version number is fatal, can not ignore
        if (versionBits != SmileConstants.HEADER_VERSION_0) {
            _reportError("Header version number bits (0x"+Integer.toHexString(versionBits)+") indicate unrecognized version; only 0x0 handled by parser");
        }

        // can avoid tracking names, if explicitly disabled
        if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_NAMES) == 0) {
            _seenNames = null;
            _seenNameCount = -1;
        }
        // conversely, shared string values must be explicitly enabled
        if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_STRING_VALUES) != 0) {
            _seenStringValues = NO_STRINGS;
            _seenStringValueCount = 0;
        }
        _mayContainRawBinary = ((ch & SmileConstants.HEADER_BIT_HAS_RAW_BINARY) != 0);
        return true;
    }

    /**
     * @since 1.7
     */
    protected final static SmileBufferRecycler _smileBufferRecycler()
    {
        SoftReference> ref = _smileRecyclerRef.get();
        SmileBufferRecycler br = (ref == null) ? null : ref.get();

        if (br == null) {
            br = new SmileBufferRecycler();
            _smileRecyclerRef.set(new SoftReference>(br));
        }
        return br;
    }
    
    /*
    /**********************************************************
    /* Overridden methods
    /**********************************************************
     */

    @Override
    protected void _finishString() throws IOException, JsonParseException
    {
        // should never be called; but must be defined for superclass
        _throwInternal();
    }

    @Override
    public void close() throws IOException
    {
        super.close();
        // Merge found symbols, if any:
        _symbols.release();
    }

    @Override
    protected void _releaseBuffers() throws IOException
    {
        super._releaseBuffers();
        {
            String[] nameBuf = _seenNames;
            if (nameBuf != null && nameBuf.length > 0) {
                _seenNames = null;
                // Note: while not mandatory, it's probably good idea to clear up cruft to reduce memory retention
                Arrays.fill(nameBuf, 0, _seenNameCount, null);
                _smileBufferRecycler.releaseSeenNamesBuffer(nameBuf);
            }
        }
        {
            String[] valueBuf = _seenStringValues;
            if (valueBuf != null && valueBuf.length > 0) {
                _seenStringValues = null;
                // Note: while not mandatory, it's probably good idea to clear up cruft to reduce memory retention
                Arrays.fill(valueBuf, 0, _seenStringValueCount, null);
                _smileBufferRecycler.releaseSeenStringValuesBuffer(valueBuf);
            }
        }
    }
    
    /*
    /**********************************************************
    /* Extended API
    /**********************************************************
     */

    public boolean mayContainRawBinary() {
        return _mayContainRawBinary;
    }
    
    /*
    /**********************************************************
    /* JsonParser impl
    /**********************************************************
     */

    @Override
    public JsonToken nextToken() throws IOException, JsonParseException
    {
        // For longer tokens (text, binary), we'll only read when requested
        if (_tokenIncomplete) {
            _skipIncomplete();
        }
        _tokenInputTotal = _currInputProcessed + _inputPtr - 1;
        // also: clear any data retained so far
        _binaryValue = null;
        // Two main modes: values, and field names.
        if (_parsingContext.inObject() && _currToken != JsonToken.FIELD_NAME) {
            return (_currToken = _handleFieldName());
        }
        if (_inputPtr >= _inputEnd) {
            if (!loadMore()) {
                _handleEOF();
                /* NOTE: here we can and should close input, release buffers,
                 * since this is "hard" EOF, not a boundary imposed by
                 * header token.
                 */
                close();
                return (_currToken = null);
            }
        }
        int ch = _inputBuffer[_inputPtr++];
        _typeByte = ch;
        switch ((ch >> 5) & 0x7) {
        case 0: // short shared string value reference
            if (ch == 0) { // important: this is invalid, don't accept
                _reportError("Invalid token byte 0x00");
            }
            return _handleSharedString(ch-1);

        case 1: // simple literals, numbers
            {
                int typeBits = ch & 0x1F;
                if (typeBits < 4) {
                    switch (typeBits) {
                    case 0x00:
                        _textBuffer.resetWithEmpty();
                        return (_currToken = JsonToken.VALUE_STRING);
                    case 0x01:
                        return (_currToken = JsonToken.VALUE_NULL);
                    case 0x02: // false
                        return (_currToken = JsonToken.VALUE_FALSE);
                    default: // 0x03 == true
                        return (_currToken = JsonToken.VALUE_TRUE);
                    }
                }
                // next 3 bytes define subtype
                if (typeBits < 8) { // VInt (zigzag), BigInteger
                    if ((typeBits & 0x3) <= 0x2) { // 0x3 reserved (should never occur)
                        _tokenIncomplete = true;
                        _numTypesValid = 0;
                        return (_currToken = JsonToken.VALUE_NUMBER_INT);
                    }
                    break;
                }
                if (typeBits < 12) { // floating-point
                    int subtype = typeBits & 0x3;
                    if (subtype <= 0x2) { // 0x3 reserved (should never occur)
                        _tokenIncomplete = true;
                        _numTypesValid = 0;
                        _got32BitFloat = (subtype == 0);
                        return (_currToken = JsonToken.VALUE_NUMBER_FLOAT);
                    }
                    break;
                }
                if (typeBits == 0x1A) { // == 0x3A == ':' -> possibly header signature for next chunk?
                    if (handleSignature(false, false)) {
                        /* Ok, now; end-marker and header both imply doc boundary and a
                         * 'null token'; but if both are seen, they are collapsed.
                         * We can check this by looking at current token; if it's null,
                         * need to get non-null token
                         */
                        if (_currToken == null) {
                            return nextToken();
                        }
                        return (_currToken = null);
                    }
            	}
            	_reportError("Unrecognized token byte 0x3A (malformed segment header?");
            }
            // and everything else is reserved, for now
            break;
        case 2: // tiny ASCII
            // fall through            
        case 3: // short ASCII
            // fall through
        case 4: // tiny Unicode
            // fall through
        case 5: // short Unicode
            // No need to decode, unless we have to keep track of back-references (for shared string values)
            _currToken = JsonToken.VALUE_STRING;
            if (_seenStringValueCount >= 0) { // shared text values enabled
                _addSeenStringValue();
            } else {
                _tokenIncomplete = true;
            }
            return _currToken;
        case 6: // small integers; zigzag encoded
            _numberInt = SmileUtil.zigzagDecode(ch & 0x1F);
            _numTypesValid = NR_INT;
            return (_currToken = JsonToken.VALUE_NUMBER_INT);
        case 7: // binary/long-text/long-shared/start-end-markers
            switch (ch & 0x1F) {
            case 0x00: // long variable length ASCII
            case 0x04: // long variable length unicode
                _tokenIncomplete = true;
                return (_currToken = JsonToken.VALUE_STRING);
            case 0x08: // binary, 7-bit
                _tokenIncomplete = true;
                return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT);
            case 0x0C: // long shared string
            case 0x0D:
            case 0x0E:
            case 0x0F:
                if (_inputPtr >= _inputEnd) {
                    loadMoreGuaranteed();
                }
                return _handleSharedString(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF));
            case 0x18: // START_ARRAY
                _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol);
                return (_currToken = JsonToken.START_ARRAY);
            case 0x19: // END_ARRAY
                if (!_parsingContext.inArray()) {
                    _reportMismatchedEndMarker(']', '}');
                }
                _parsingContext = _parsingContext.getParent();
                return (_currToken = JsonToken.END_ARRAY);
            case 0x1A: // START_OBJECT
                _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
                return (_currToken = JsonToken.START_OBJECT);
            case 0x1B: // not used in this mode; would be END_OBJECT
                _reportError("Invalid type marker byte 0xFB in value mode (would be END_OBJECT in key mode)");
            case 0x1D: // binary, raw
                _tokenIncomplete = true;
                return (_currToken = JsonToken.VALUE_EMBEDDED_OBJECT);
            case 0x1F: // 0xFF, end of content
                return (_currToken = null);
            }
            break;
        }
        // If we get this far, type byte is corrupt
        _reportError("Invalid type marker byte 0x"+Integer.toHexString(ch & 0xFF)+" for expected value token");
        return null;
    }

    private final JsonToken _handleSharedString(int index)
        throws IOException, JsonParseException
    {
        if (index >= _seenStringValueCount) {
            _reportInvalidSharedStringValue(index);
        }
        _textBuffer.resetWithString(_seenStringValues[index]);
        return (_currToken = JsonToken.VALUE_STRING);
    }

    private final void _addSeenStringValue()
        throws IOException, JsonParseException
    {
        _finishToken();
        if (_seenStringValueCount < _seenStringValues.length) {
            // !!! TODO: actually only store char[], first time around?
            _seenStringValues[_seenStringValueCount++] = _textBuffer.contentsAsString();
            return;
        }
        _expandSeenStringValues();
    }
    
    private final void _expandSeenStringValues()
    {
        String[] oldShared = _seenStringValues;
        int len = oldShared.length;
        String[] newShared;
        if (len == 0) {
            newShared = _smileBufferRecycler.allocSeenStringValuesBuffer();
            if (newShared == null) {
                newShared = new String[SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH];
            }
        } else if (len == SmileConstants.MAX_SHARED_STRING_VALUES) { // too many? Just flush...
           newShared = oldShared;
           _seenStringValueCount = 0; // could also clear, but let's not yet bother
        } else {
            int newSize = (len == SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_STRING_VALUES;
            newShared = new String[newSize];
            System.arraycopy(oldShared, 0, newShared, 0, oldShared.length);
        }
        _seenStringValues = newShared;
        _seenStringValues[_seenStringValueCount++] = _textBuffer.contentsAsString();
    }

    @Override
    public String getCurrentName() throws IOException, JsonParseException
    {
        return _parsingContext.getCurrentName();
    }

    @Override
    public NumberType getNumberType()
        throws IOException, JsonParseException
    {
    	if (_got32BitFloat) {
    	    return NumberType.FLOAT;
    	}
    	return super.getNumberType();
    }
    
    /*
    /**********************************************************
    /* Public API, access to token information, text
    /**********************************************************
     */

    /**
     * Method for accessing textual representation of the current event;
     * if no current event (before first call to {@link #nextToken}, or
     * after encountering end-of-input), returns null.
     * Method can be called for any event.
     */
    @Override    
    public String getText()
        throws IOException, JsonParseException
    {
        if (_tokenIncomplete) {
            _tokenIncomplete = false;
            // Let's inline part of "_finishToken", common case
            int tb = _typeByte;
            int type = (tb >> 5) & 0x7;
            if (type == 2 || type == 3) { // tiny & short ASCII
                _decodeShortAsciiValue(1 + (tb & 0x3F));
                return _textBuffer.contentsAsString();
            }
            if (type == 4 || type == 5) { // tiny & short Unicode
                 // short unicode; note, lengths 2 - 65  (off-by-one compared to ASCII)
                _decodeShortUnicodeValue(2 + (tb & 0x3F));
                return _textBuffer.contentsAsString();
            }
            _finishToken();
        }
        if (_currToken == JsonToken.VALUE_STRING) {
            return _textBuffer.contentsAsString();
        }
        JsonToken t = _currToken;
        if (t == null) { // null only before/after document
            return null;
        }
        if (t == JsonToken.FIELD_NAME) {
            return _parsingContext.getCurrentName();
        }
        if (t.isNumeric()) {
            // TODO: optimize?
            return getNumberValue().toString();
        }
        return _currToken.asString();
    }

    @Override
    public char[] getTextCharacters()
        throws IOException, JsonParseException
    {
        if (_currToken != null) { // null only before/after document
            if (_tokenIncomplete) {
                _finishToken();
            }
            switch (_currToken) {                
            case VALUE_STRING:
                return _textBuffer.getTextBuffer();
            case FIELD_NAME:
                if (!_nameCopied) {
                    String name = _parsingContext.getCurrentName();
                    int nameLen = name.length();
                    if (_nameCopyBuffer == null) {
                        _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
                    } else if (_nameCopyBuffer.length < nameLen) {
                        _nameCopyBuffer = new char[nameLen];
                    }
                    name.getChars(0, nameLen, _nameCopyBuffer, 0);
                    _nameCopied = true;
                }
                return _nameCopyBuffer;

                // fall through
            case VALUE_NUMBER_INT:
            case VALUE_NUMBER_FLOAT:
                // TODO: optimize
                return getNumberValue().toString().toCharArray();
                
            default:
                return _currToken.asCharArray();
            }
        }
        return null;
    }

    @Override    
    public int getTextLength()
        throws IOException, JsonParseException
    {
        if (_currToken != null) { // null only before/after document
            if (_tokenIncomplete) {
                _finishToken();
            }
            switch (_currToken) {
            case VALUE_STRING:
                return _textBuffer.size();                
            case FIELD_NAME:
                return _parsingContext.getCurrentName().length();
                // fall through
            case VALUE_NUMBER_INT:
            case VALUE_NUMBER_FLOAT:
                // TODO: optimize
                return getNumberValue().toString().length();
                
            default:
                return _currToken.asCharArray().length;
            }
        }
        return 0;
    }

    @Override
    public int getTextOffset() throws IOException, JsonParseException
    {
        return 0;
    }

    /*
    /**********************************************************
    /* Public API, access to token information, binary
    /**********************************************************
     */

    @Override
    public byte[] getBinaryValue(Base64Variant b64variant)
        throws IOException, JsonParseException
    {
        if (_tokenIncomplete) {
            _finishToken();
        }
        if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
            // Todo, maybe: support base64 for text?
            _reportError("Current token ("+_currToken+") not VALUE_EMBEDDED_OBJECT, can not access as binary");
        }
        return _binaryValue;
    }

    @Override
    protected byte[] _decodeBase64(Base64Variant b64variant)
        throws IOException, JsonParseException
    {
        // Should never get called, but must be defined for base class
        _throwInternal();
        return null;
    }
    
    /*
    /**********************************************************
    /* Internal methods, field name parsing
    /**********************************************************
     */

    /**
     * Method that handles initial token type recognition for token
     * that has to be either FIELD_NAME or END_OBJECT.
     */
    protected final JsonToken _handleFieldName() throws IOException, JsonParseException
    {    	
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        int ch = _inputBuffer[_inputPtr++];
        // is this needed?
        _typeByte = ch;
        switch ((ch >> 6) & 3) {
        case 0: // misc, including end marker
            switch (ch) {
            case 0x20: // empty String as name, legal if unusual
                _parsingContext.setCurrentName("");
                return JsonToken.FIELD_NAME;
            case 0x30: // long shared
            case 0x31:
            case 0x32:
            case 0x33:
                {
                    if (_inputPtr >= _inputEnd) {
                        loadMoreGuaranteed();
	            }
	            int index = ((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF);
                    if (index >= _seenNameCount) {
                        _reportInvalidSharedName(index);
                    }
	            _parsingContext.setCurrentName(_seenNames[index]);
	        }
                return JsonToken.FIELD_NAME;
            case 0x34: // long ASCII/Unicode name
                _handleLongFieldName();
                return JsonToken.FIELD_NAME;            	
            }
            break;
        case 1: // short shared, can fully process
            {
                int index = (ch & 0x3F);
                if (index >= _seenNameCount) {
                    _reportInvalidSharedName(index);
                }
                _parsingContext.setCurrentName(_seenNames[index]);
            }
            return JsonToken.FIELD_NAME;
        case 2: // short ASCII
	    {
	        int len = 1 + (ch & 0x3f);
        	String name;
        	Name n = _findDecodedFromSymbols(len);
        	if (n != null) {
        	    name = n.getName();
        	    _inputPtr += len;
        	} else {
        	    name = _decodeShortAsciiName(len);
        	    name = _addDecodedToSymbols(len, name);
        	}
                if (_seenNames != null) {
                   if (_seenNameCount >= _seenNames.length) {
   	               _seenNames = _expandSeenNames(_seenNames);
                   }
                   _seenNames[_seenNameCount++] = name;
                }
                _parsingContext.setCurrentName(name);
	    }
	    return JsonToken.FIELD_NAME;                
        case 3: // short Unicode
            // all valid, except for 0xFF
            {
                int len = (ch & 0x3F);
                if (len > 0x37) {
                    if (len == 0x3B) {
                        if (!_parsingContext.inObject()) {
                            _reportMismatchedEndMarker('}', ']');
                        }
                        _parsingContext = _parsingContext.getParent();
                        return JsonToken.END_OBJECT;
                    }
                } else {
                    len += 2; // values from 2 to 57...
                    String name;
                    Name n = _findDecodedFromSymbols(len);
                    if (n != null) {
                        name = n.getName();
                        _inputPtr += len;
                    } else {
                        name = _decodeShortUnicodeName(len);
                        name = _addDecodedToSymbols(len, name);
                    }
                    if (_seenNames != null) {
                        if (_seenNameCount >= _seenNames.length) {
    	                    _seenNames = _expandSeenNames(_seenNames);
                        }
                        _seenNames[_seenNameCount++] = name;
                    }
                    _parsingContext.setCurrentName(name);
                    return JsonToken.FIELD_NAME;                
                }
            }
            break;
        }
        // Other byte values are illegal
        _reportError("Invalid type marker byte 0x"+Integer.toHexString(ch)+" for expected field name (or END_OBJECT marker)");
        return null;
    }

    /**
     * Method called to try to expand shared name area to fit one more potentially
     * shared String. If area is already at its biggest size, will just clear
     * the area (by setting next-offset to 0)
     */
    private final String[] _expandSeenNames(String[] oldShared)
    {
        int len = oldShared.length;
        String[] newShared;
        if (len == 0) {
            newShared = _smileBufferRecycler.allocSeenNamesBuffer();
            if (newShared == null) {
                newShared = new String[SmileBufferRecycler.DEFAULT_NAME_BUFFER_LENGTH];                
            }
        } else if (len == SmileConstants.MAX_SHARED_NAMES) { // too many? Just flush...
      	   newShared = oldShared;
      	   _seenNameCount = 0; // could also clear, but let's not yet bother
        } else {
            int newSize = (len == SmileBufferRecycler.DEFAULT_STRING_VALUE_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_NAMES;
            newShared = new String[newSize];
            System.arraycopy(oldShared, 0, newShared, 0, oldShared.length);
        }
        return newShared;
    }
    
    private final String _addDecodedToSymbols(int len, String name)
    {
        if (len < 5) {
            return _symbols.addName(name, _quad1, 0).getName();
        }
	if (len < 9) {
    	    return _symbols.addName(name, _quad1, _quad2).getName();
	}
	int qlen = (len + 3) >> 2;
	return _symbols.addName(name, _quadBuffer, qlen).getName();
    }

    private final String _decodeShortAsciiName(int len)
        throws IOException, JsonParseException
    {
        // note: caller ensures we have enough bytes available
        char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
        int outPtr = 0;
        final byte[] inBuf = _inputBuffer;
        int inPtr = _inputPtr;
        
        // loop unrolling seems to help here:
        for (int inEnd = inPtr + len - 3; inPtr < inEnd; ) {
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
        }
        int left = (len & 3);
        if (left > 0) {
            outBuf[outPtr++] = (char) inBuf[inPtr++];
            if (left > 1) {
                outBuf[outPtr++] = (char) inBuf[inPtr++];
                if (left > 2) {
                    outBuf[outPtr++] = (char) inBuf[inPtr++];
                }
            }
        } 
        _inputPtr = inPtr;
        _textBuffer.setCurrentLength(len);
        return _textBuffer.contentsAsString();
    }
    
    /**
     * Helper method used to decode short Unicode string, length for which actual
     * length (in bytes) is known
     * 
     * @param len Length between 1 and 64
     */
    private final String _decodeShortUnicodeName(int len)
        throws IOException, JsonParseException
    {
        // note: caller ensures we have enough bytes available
        int outPtr = 0;
        char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
        int inPtr = _inputPtr;
        _inputPtr += len;
        final int[] codes = SmileConstants.sUtf8UnitLengths;
        final byte[] inBuf = _inputBuffer;
        for (int end = inPtr + len; inPtr < end; ) {
            int i = inBuf[inPtr++] & 0xFF;
            int code = codes[i];
            if (code != 0) {
                // trickiest one, need surrogate handling
                switch (code) {
                case 1:
                    i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F);
                    break;
                case 2:
                    i = ((i & 0x0F) << 12)
                        | ((inBuf[inPtr++] & 0x3F) << 6)
                        | (inBuf[inPtr++] & 0x3F);
                    break;
                case 3:
                    i = ((i & 0x07) << 18)
                    | ((inBuf[inPtr++] & 0x3F) << 12)
                    | ((inBuf[inPtr++] & 0x3F) << 6)
                    | (inBuf[inPtr++] & 0x3F);
                    // note: this is the codepoint value; need to split, too
                    i -= 0x10000;
                    outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
                    i = 0xDC00 | (i & 0x3FF);
                    break;
                default: // invalid
                    _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block");
                }
            }
            outBuf[outPtr++] = (char) i;
        }
        _textBuffer.setCurrentLength(outPtr);
        return _textBuffer.contentsAsString();
    }

    // note: slightly edited copy of UTF8StreamParser.addName()
    private final Name _decodeLongUnicodeName(int[] quads, int byteLen, int quadLen)
        throws IOException, JsonParseException
    {
        int lastQuadBytes = byteLen & 3;
        // Ok: must decode UTF-8 chars. No other validation SHOULD be needed (except bounds checks?)
        /* Note: last quad is not correctly aligned (leading zero bytes instead
         * need to shift a bit, instead of trailing). Only need to shift it
         * for UTF-8 decoding; need revert for storage (since key will not
         * be aligned, to optimize lookup speed)
         */
        int lastQuad;
    
        if (lastQuadBytes < 4) {
            lastQuad = quads[quadLen-1];
            // 8/16/24 bit left shift
            quads[quadLen-1] = (lastQuad << ((4 - lastQuadBytes) << 3));
        } else {
            lastQuad = 0;
        }

        char[] cbuf = _textBuffer.emptyAndGetCurrentSegment();
        int cix = 0;
    
        for (int ix = 0; ix < byteLen; ) {
            int ch = quads[ix >> 2]; // current quad, need to shift+mask
            int byteIx = (ix & 3);
            ch = (ch >> ((3 - byteIx) << 3)) & 0xFF;
            ++ix;
    
            if (ch > 127) { // multi-byte
                int needed;
                if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
                    ch &= 0x1F;
                    needed = 1;
                } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
                    ch &= 0x0F;
                    needed = 2;
                } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all...
                    ch &= 0x07;
                    needed = 3;
                } else { // 5- and 6-byte chars not valid chars
                    _reportInvalidInitial(ch);
                    needed = ch = 1; // never really gets this far
                }
                if ((ix + needed) > byteLen) {
                    _reportInvalidEOF(" in long field name");
                }
                
                // Ok, always need at least one more:
                int ch2 = quads[ix >> 2]; // current quad, need to shift+mask
                byteIx = (ix & 3);
                ch2 = (ch2 >> ((3 - byteIx) << 3));
                ++ix;
                
                if ((ch2 & 0xC0) != 0x080) {
                    _reportInvalidOther(ch2);
                }
                ch = (ch << 6) | (ch2 & 0x3F);
                if (needed > 1) {
                    ch2 = quads[ix >> 2];
                    byteIx = (ix & 3);
                    ch2 = (ch2 >> ((3 - byteIx) << 3));
                    ++ix;
                    
                    if ((ch2 & 0xC0) != 0x080) {
                        _reportInvalidOther(ch2);
                    }
                    ch = (ch << 6) | (ch2 & 0x3F);
                    if (needed > 2) { // 4 bytes? (need surrogates on output)
                        ch2 = quads[ix >> 2];
                        byteIx = (ix & 3);
                        ch2 = (ch2 >> ((3 - byteIx) << 3));
                        ++ix;
                        if ((ch2 & 0xC0) != 0x080) {
                            _reportInvalidOther(ch2 & 0xFF);
                        }
                        ch = (ch << 6) | (ch2 & 0x3F);
                    }
                }
                if (needed > 2) { // surrogate pair? once again, let's output one here, one later on
                    ch -= 0x10000; // to normalize it starting with 0x0
                    if (cix >= cbuf.length) {
                        cbuf = _textBuffer.expandCurrentSegment();
                    }
                    cbuf[cix++] = (char) (0xD800 + (ch >> 10));
                    ch = 0xDC00 | (ch & 0x03FF);
                }
            }
            if (cix >= cbuf.length) {
                cbuf = _textBuffer.expandCurrentSegment();
            }
            cbuf[cix++] = (char) ch;
        }

        // Ok. Now we have the character array, and can construct the String
        String baseName = new String(cbuf, 0, cix);
        // And finally, un-align if necessary
        if (lastQuadBytes < 4) {
            quads[quadLen-1] = lastQuad;
        }
        return _symbols.addName(baseName, quads, quadLen);
    }
    
    private final void _handleLongFieldName() throws IOException, JsonParseException
    {
        // First: gather quads we need, looking for end marker
        final byte[] inBuf = _inputBuffer;
        int quads = 0;
        int bytes = 0;
        int q = 0;

        while (true) {
            byte b = inBuf[_inputPtr++];
            if (BYTE_MARKER_END_OF_STRING == b) {
                bytes = 0;
                break;
            }
            q = ((int) b) & 0xFF;
            b = inBuf[_inputPtr++];
            if (BYTE_MARKER_END_OF_STRING == b) {
                bytes = 1;
                break;
            }
            q = (q << 8) | (b & 0xFF);
            b = inBuf[_inputPtr++];
            if (BYTE_MARKER_END_OF_STRING == b) {
                bytes = 2;
                break;
            }
            q = (q << 8) | (b & 0xFF);
            b = inBuf[_inputPtr++];
            if (BYTE_MARKER_END_OF_STRING == b) {
                bytes = 3;
                break;
            }
            q = (q << 8) | (b & 0xFF);
            if (quads >= _quadBuffer.length) {
                _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256); // grow by 1k
            }
            _quadBuffer[quads++] = q;
        }
        // and if we have more bytes, append those too
        int byteLen = (quads << 2);
        if (bytes > 0) {
            if (quads >= _quadBuffer.length) {
                _quadBuffer = _growArrayTo(_quadBuffer, _quadBuffer.length + 256);
            }
            _quadBuffer[quads++] = q;
            byteLen += bytes;
        }
        
        // Know this name already?
        String name;
        Name n = _symbols.findName(_quadBuffer, quads);
        if (n != null) {
            name = n.getName();
        } else {
            name = _decodeLongUnicodeName(_quadBuffer, byteLen, quads).getName();
        }
        if (_seenNames != null) {
           if (_seenNameCount >= _seenNames.length) {
               _seenNames = _expandSeenNames(_seenNames);
           }
           _seenNames[_seenNameCount++] = name;
        }
        _parsingContext.setCurrentName(name);
    }
    
    /**
     * Helper method for trying to find specified encoded UTF-8 byte sequence
     * from symbol table; if successful avoids actual decoding to String
     */
    private final Name _findDecodedFromSymbols(int len)
    	throws IOException, JsonParseException
    {
        if ((_inputEnd - _inputPtr) < len) {
            _loadToHaveAtLeast(len);
        }
	// First: maybe we already have this name decoded?
	if (len < 5) {
	    int inPtr = _inputPtr;
	    final byte[] inBuf = _inputBuffer;
	    int q = inBuf[inPtr] & 0xFF;
	    if (--len > 0) {
	        q = (q << 8) + (inBuf[++inPtr] & 0xFF);
	        if (--len > 0) {
	            q = (q << 8) + (inBuf[++inPtr] & 0xFF);
	            if (--len > 0) {
	                q = (q << 8) + (inBuf[++inPtr] & 0xFF);
	            }
	        }
	    }
	    _quad1 = q;
	    return _symbols.findName(q);
	}
        if (len < 9) {
            int inPtr = _inputPtr;
            final byte[] inBuf = _inputBuffer;
            // First quadbyte is easy
            int q1 = (inBuf[inPtr] & 0xFF) << 8;
            q1 += (inBuf[++inPtr] & 0xFF);
            q1 <<= 8;
            q1 += (inBuf[++inPtr] & 0xFF);
            q1 <<= 8;
            q1 += (inBuf[++inPtr] & 0xFF);
            int q2 = (inBuf[++inPtr] & 0xFF);
            len -= 5;
            if (len > 0) {
                q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
                if (--len > 0) {
                    q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
                    if (--len > 0) {
                        q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
                    }
                }
            }
            _quad1 = q1;
            _quad2 = q2;
            return _symbols.findName(q1, q2);
        }
        return _findDecodedMedium(len);
    }

    /**
     * Method for locating names longer than 8 bytes (in UTF-8)
     */
    private final Name _findDecodedMedium(int len)
        throws IOException, JsonParseException
    {
    	// first, need enough buffer to store bytes as ints:
        {
            int bufLen = (len + 3) >> 2;
            if (bufLen > _quadBuffer.length) {
                _quadBuffer = _growArrayTo(_quadBuffer, bufLen);
            }
    	}
    	// then decode, full quads first
    	int offset = 0;
    	int inPtr = _inputPtr;
    	final byte[] inBuf = _inputBuffer;
        do {
            int q = (inBuf[inPtr++] & 0xFF) << 8;
            q |= inBuf[inPtr++] & 0xFF;
            q <<= 8;
            q |= inBuf[inPtr++] & 0xFF;
            q <<= 8;
            q |= inBuf[inPtr++] & 0xFF;
            _quadBuffer[offset++] = q;
        } while ((len -= 4) > 3);
        // and then leftovers
        if (len > 0) {
            int q = inBuf[inPtr] & 0xFF;
            if (--len > 0) {
                q = (q << 8) + (inBuf[++inPtr] & 0xFF);
                if (--len > 0) {
                    q = (q << 8) + (inBuf[++inPtr] & 0xFF);
                }
            }
            _quadBuffer[offset++] = q;
        }
        return _symbols.findName(_quadBuffer, offset);
    }
    
    private static int[] _growArrayTo(int[] arr, int minSize)
    {
    	int[] newArray = new int[minSize + 4];
    	if (arr != null) {
            // !!! TODO: JDK 1.6, Arrays.copyOf
            System.arraycopy(arr, 0, newArray, 0, arr.length);
        }
        return newArray;
    }
    
    /*
    /**********************************************************
    /* Internal methods, secondary parsing
    /**********************************************************
     */

    @Override
    protected void _parseNumericValue(int expType)
    	throws IOException, JsonParseException
    {
    	if (_tokenIncomplete) {
    	    int tb = _typeByte;
    	    // ensure we got a numeric type with value that is lazily parsed
            if (((tb >> 5) & 0x7) != 1) {
                _reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors");
            }
            _tokenIncomplete = false;
            _finishNumberToken(tb);
    	}
    }
    
    /**
     * Method called to finish parsing of a token so that token contents
     * are retriable
     */
    protected void _finishToken()
    	throws IOException, JsonParseException
    {
        _tokenIncomplete = false;
    	int tb = _typeByte;

    	int type = ((tb >> 5) & 0x7);
        if (type == 1) { // simple literals, numbers
            _finishNumberToken(tb);
            return;
        }
        if (type <= 3) { // tiny & short ASCII
            _decodeShortAsciiValue(1 + (tb & 0x3F));
            return;
    	}
        if (type <= 5) { // tiny & short Unicode
             // short unicode; note, lengths 2 - 65  (off-by-one compared to ASCII)
            _decodeShortUnicodeValue(2 + (tb & 0x3F));
            return;
    	}
        if (type == 7) {
            tb &= 0x1F;
            // next 3 bytes define subtype
            switch (tb >> 2) {
            case 0: // long variable length ASCII
            	_decodeLongAscii();
            	return;
            case 1: // long variable length unicode
            	_decodeLongUnicode();
            	return;
            case 2: // binary, 7-bit
                _binaryValue = _read7BitBinaryWithLength();
                return;
            case 7: // binary, raw
                _finishRawBinary();
                return;
            }
        }
        // sanity check
    	_throwInternal();
    }

    protected final void _finishNumberToken(int tb)
        throws IOException, JsonParseException
    {
        tb &= 0x1F;
        int type = (tb >> 2);
        if (type == 1) { // VInt (zigzag) or BigDecimal
            int subtype = tb & 0x03;
            if (subtype == 0) { // (v)int
                _finishInt();
            } else if (subtype == 1) { // (v)long
                _finishLong();
            } else if (subtype == 2) {
                _finishBigInteger();
            } else {
                _throwInternal();
            }
            return;
        }
        if (type == 2) { // other numbers
            switch (tb & 0x03) {
            case 0: // float
                _finishFloat();
                return;
            case 1: // double
                _finishDouble();
                return;
            case 2: // big-decimal
                _finishBigDecimal();
                return;
            }
        }
        _throwInternal();
    }
    
    /*
    /**********************************************************
    /* Internal methods, secondary Number parsing
    /**********************************************************
     */
    
    private final void _finishInt() throws IOException, JsonParseException
    {
    	if (_inputPtr >= _inputEnd) {
    	    loadMoreGuaranteed();
    	}
    	int value = _inputBuffer[_inputPtr++];
    	int i;
    	if (value < 0) { // 6 bits
    		value &= 0x3F;
    	} else {
    	    if (_inputPtr >= _inputEnd) {
    	        loadMoreGuaranteed();
    	    }
    	    i = _inputBuffer[_inputPtr++];
    	    if (i >= 0) { // 13 bits
    	        value = (value << 7) + i;
    	        if (_inputPtr >= _inputEnd) {
    	            loadMoreGuaranteed();
    	        }
    	        i = _inputBuffer[_inputPtr++];
    	        if (i >= 0) {
    	            value = (value << 7) + i;
    	            if (_inputPtr >= _inputEnd) {
    	                loadMoreGuaranteed();
    	            }
    	            i = _inputBuffer[_inputPtr++];
    	            if (i >= 0) {
    	                value = (value << 7) + i;
    	                // and then we must get negative
    	                if (_inputPtr >= _inputEnd) {
    	                    loadMoreGuaranteed();
    	                }
    	                i = _inputBuffer[_inputPtr++];
    	                if (i >= 0) {
    	                    _reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
    	                }
    	            }
    	        }
    	    }
    	    value = (value << 6) + (i & 0x3F);
    	}
        _numberInt = SmileUtil.zigzagDecode(value);
    	_numTypesValid = NR_INT;
    }

    private final void  _finishLong()
        throws IOException, JsonParseException
    {
	// Ok, first, will always get 4 full data bytes first; 1 was already passed
	long l = (long) _fourBytesToInt();
    	// and loop for the rest
    	while (true) {
    	    if (_inputPtr >= _inputEnd) {
    	        loadMoreGuaranteed();
    	    }
    	    int value = _inputBuffer[_inputPtr++];
    	    if (value < 0) {
    	        l = (l << 6) + (value & 0x3F);
    	        _numberLong = SmileUtil.zigzagDecode(l);
    	        _numTypesValid = NR_LONG;
    	        return;
    	    }
    	    l = (l << 7) + value;
    	}
    }

    private final void _finishBigInteger()
	throws IOException, JsonParseException
    {
        byte[] raw = _read7BitBinaryWithLength();
        _numberBigInt = new BigInteger(raw);
        _numTypesValid = NR_BIGINT;
    }

    private final void _finishFloat()
        throws IOException, JsonParseException
    {
        // just need 5 bytes to get int32 first; all are unsigned
	int i = _fourBytesToInt();
    	if (_inputPtr >= _inputEnd) {
    		loadMoreGuaranteed();
    	}
    	i = (i << 7) + _inputBuffer[_inputPtr++];
    	float f = Float.intBitsToFloat(i);
	_numberDouble = (double) f;
	_numTypesValid = NR_DOUBLE;
    }

    private final void _finishDouble()
	throws IOException, JsonParseException
    {
        // ok; let's take two sets of 4 bytes (each is int)
	long hi = _fourBytesToInt();
	long value = (hi << 28) + (long) _fourBytesToInt();
	// and then remaining 2 bytes
	if (_inputPtr >= _inputEnd) {
	    loadMoreGuaranteed();
	}
	value = (value << 7) + _inputBuffer[_inputPtr++];
	if (_inputPtr >= _inputEnd) {
	    loadMoreGuaranteed();
	}
	value = (value << 7) + _inputBuffer[_inputPtr++];
	_numberDouble = Double.longBitsToDouble(value);
	_numTypesValid = NR_DOUBLE;
    }

    private final int _fourBytesToInt() 
        throws IOException, JsonParseException
    {
	if (_inputPtr >= _inputEnd) {
		loadMoreGuaranteed();
	}
	int i = _inputBuffer[_inputPtr++]; // first 7 bits
	if (_inputPtr >= _inputEnd) {
		loadMoreGuaranteed();
	}
	i = (i << 7) + _inputBuffer[_inputPtr++]; // 14 bits
	if (_inputPtr >= _inputEnd) {
		loadMoreGuaranteed();
	}
	i = (i << 7) + _inputBuffer[_inputPtr++]; // 21
	if (_inputPtr >= _inputEnd) {
		loadMoreGuaranteed();
	}
	return (i << 7) + _inputBuffer[_inputPtr++];
    }
	
    private final void _finishBigDecimal()
        throws IOException, JsonParseException
    {
        int scale = SmileUtil.zigzagDecode(_readUnsignedVInt());
        byte[] raw = _read7BitBinaryWithLength();
        _numberBigDecimal = new BigDecimal(new BigInteger(raw), scale);
        _numTypesValid = NR_BIGDECIMAL;
    }

    private final int _readUnsignedVInt()
        throws IOException, JsonParseException
    {
        int value = 0;
        while (true) {
            if (_inputPtr >= _inputEnd) {
                loadMoreGuaranteed();
            }
            int i = _inputBuffer[_inputPtr++];
            if (i < 0) { // last byte
                value = (value << 6) + (i & 0x3F);
                return value;
            }
            value = (value << 7) + i;
        }
    }

    private final byte[] _read7BitBinaryWithLength()
        throws IOException, JsonParseException
    {
        int byteLen = _readUnsignedVInt();
        byte[] result = new byte[byteLen];
        int ptr = 0;
        int lastOkPtr = byteLen - 7;
        
        // first, read all 7-by-8 byte chunks
        while (ptr <= lastOkPtr) {
            if ((_inputEnd - _inputPtr) < 8) {
                _loadToHaveAtLeast(8);
            }
            int i1 = (_inputBuffer[_inputPtr++] << 25)
                + (_inputBuffer[_inputPtr++] << 18)
                + (_inputBuffer[_inputPtr++] << 11)
                + (_inputBuffer[_inputPtr++] << 4);
            int x = _inputBuffer[_inputPtr++];
            i1 += x >> 3;
            int i2 = ((x & 0x7) << 21)
                + (_inputBuffer[_inputPtr++] << 14)
                + (_inputBuffer[_inputPtr++] << 7)
                + _inputBuffer[_inputPtr++];
            // Ok: got our 7 bytes, just need to split, copy
            result[ptr++] = (byte)(i1 >> 24);
            result[ptr++] = (byte)(i1 >> 16);
            result[ptr++] = (byte)(i1 >> 8);
            result[ptr++] = (byte)i1;
            result[ptr++] = (byte)(i2 >> 16);
            result[ptr++] = (byte)(i2 >> 8);
            result[ptr++] = (byte)i2;
        }
        // and then leftovers: n+1 bytes to decode n bytes
        int toDecode = (result.length - ptr);
        if (toDecode > 0) {
            if ((_inputEnd - _inputPtr) < (toDecode+1)) {
                _loadToHaveAtLeast(toDecode+1);
            }
            int value = _inputBuffer[_inputPtr++];
            for (int i = 1; i < toDecode; ++i) {
                value = (value << 7) + _inputBuffer[_inputPtr++];
                result[ptr++] = (byte) (value >> (7 - i));
            }
            // last byte is different, has remaining 1 - 6 bits, right-aligned
            value <<= toDecode;
            result[ptr] = (byte) (value + _inputBuffer[_inputPtr++]);
        }
        return result;
    }
    
    /*
    /**********************************************************
    /* Internal methods, secondary String parsing
    /**********************************************************
     */

    protected final void _decodeShortAsciiValue(int len)
        throws IOException, JsonParseException
    {
        if ((_inputEnd - _inputPtr) < len) {
            _loadToHaveAtLeast(len);
        }
        // Note: we count on fact that buffer must have at least 'len' (<= 64) empty char slots
	final char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
        int outPtr = 0;
        final byte[] inBuf = _inputBuffer;
	int inPtr = _inputPtr;
	
        // loop unrolling SHOULD be faster (as with _decodeShortAsciiName), but somehow
	// is NOT; as per testing, benchmarking... very weird.
	/*
        for (int inEnd = inPtr + len - 3; inPtr < inEnd; ) {
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
            outBuf[outPtr++] = (char) inBuf[inPtr++];            
        }
        int left = (len & 3);
        if (left > 0) {
            outBuf[outPtr++] = (char) inBuf[inPtr++];
            if (left > 1) {
                outBuf[outPtr++] = (char) inBuf[inPtr++];
                if (left > 2) {
                    outBuf[outPtr++] = (char) inBuf[inPtr++];
                }
            }
        }
        */

	// meaning: regular tight loop is no slower, typically faster here:
	for (final int end = inPtr + len; inPtr < end; ++inPtr) {
            outBuf[outPtr++] = (char) inBuf[inPtr];            
        }
	
        _inputPtr = inPtr;
	_textBuffer.setCurrentLength(len);
    }

    protected final void _decodeShortUnicodeValue(int len)
        throws IOException, JsonParseException
    {
        if ((_inputEnd - _inputPtr) < len) {
	    _loadToHaveAtLeast(len);
	}
        int outPtr = 0;
        char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
        int inPtr = _inputPtr;
        _inputPtr += len;
        final int[] codes = SmileConstants.sUtf8UnitLengths;
        final byte[] inputBuf = _inputBuffer;
        for (int end = inPtr + len; inPtr < end; ) {
            int i = inputBuf[inPtr++] & 0xFF;
            int code = codes[i];
            if (code != 0) {
                // trickiest one, need surrogate handling
                switch (code) {
                case 1:
                    i = ((i & 0x1F) << 6) | (inputBuf[inPtr++] & 0x3F);
                    break;
	        case 2:
	            i = ((i & 0x0F) << 12)
	                  | ((inputBuf[inPtr++] & 0x3F) << 6)
	                  | (inputBuf[inPtr++] & 0x3F);
	            break;
	        case 3:
	            i = ((i & 0x07) << 18)
	                | ((inputBuf[inPtr++] & 0x3F) << 12)
	                | ((inputBuf[inPtr++] & 0x3F) << 6)
	                | (inputBuf[inPtr++] & 0x3F);
	            // note: this is the codepoint value; need to split, too
	            i -= 0x10000;
	            outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
	            i = 0xDC00 | (i & 0x3FF);
	            break;
	        default: // invalid
	            _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block");
                }
	    }
	    outBuf[outPtr++] = (char) i;
        }        
        _textBuffer.setCurrentLength(outPtr);
    }

    private final void _decodeLongAscii()
        throws IOException, JsonParseException
    {
        int outPtr = 0;
        char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
        main_loop:
        while (true) {
            if (_inputPtr >= _inputEnd) {
                loadMoreGuaranteed();
            }
            int inPtr = _inputPtr;
            int left = _inputEnd - inPtr;
            if (outPtr >= outBuf.length) {
                outBuf = _textBuffer.finishCurrentSegment();
                outPtr = 0;
            }
            left = Math.min(left, outBuf.length - outPtr);
            do {
                byte b = _inputBuffer[inPtr++];
                if (b == SmileConstants.BYTE_MARKER_END_OF_STRING) {
                    _inputPtr = inPtr;
                    break main_loop;
                }
                outBuf[outPtr++] = (char) b;	    		
            } while (--left > 0);
            _inputPtr = inPtr;
        }
        _textBuffer.setCurrentLength(outPtr);
    }

    private final void _decodeLongUnicode()
        throws IOException, JsonParseException
    {
	int outPtr = 0;
	char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
	final int[] codes = SmileConstants.sUtf8UnitLengths;
        int c;
        final byte[] inputBuffer = _inputBuffer;

        main_loop:
        while (true) {
            // First the tight ASCII loop:
            ascii_loop:
            while (true) {
                int ptr = _inputPtr;
                if (ptr >= _inputEnd) {
                    loadMoreGuaranteed();
                    ptr = _inputPtr;
                }
                if (outPtr >= outBuf.length) {
                    outBuf = _textBuffer.finishCurrentSegment();
                    outPtr = 0;
                }
                int max = _inputEnd;
                {
                    int max2 = ptr + (outBuf.length - outPtr);
                    if (max2 < max) {
                        max = max2;
                    }
                }
                while (ptr < max) {
                    c = (int) inputBuffer[ptr++] & 0xFF;
                    if (codes[c] != 0) {
                        _inputPtr = ptr;
                        break ascii_loop;
                    }
                    outBuf[outPtr++] = (char) c;
                }
                _inputPtr = ptr;
            }
            // Ok: end marker, escape or multi-byte?
            if (c == SmileConstants.INT_MARKER_END_OF_STRING) {
                break main_loop;
            }

            switch (codes[c]) {
            case 1: // 2-byte UTF
                c = _decodeUtf8_2(c);
                break;
            case 2: // 3-byte UTF
                if ((_inputEnd - _inputPtr) >= 2) {
                    c = _decodeUtf8_3fast(c);
                } else {
                    c = _decodeUtf8_3(c);
                }
                break;
            case 4: // 4-byte UTF
                c = _decodeUtf8_4(c);
                // Let's add first part right away:
                outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
                if (outPtr >= outBuf.length) {
                    outBuf = _textBuffer.finishCurrentSegment();
                    outPtr = 0;
                }
                c = 0xDC00 | (c & 0x3FF);
                // And let the other char output down below
                break;
            default:
                // Is this good enough error message?
                _reportInvalidChar(c);
            }
            // Need more room?
            if (outPtr >= outBuf.length) {
                outBuf = _textBuffer.finishCurrentSegment();
                outPtr = 0;
            }
            // Ok, let's add char to output:
            outBuf[outPtr++] = (char) c;
        }
        _textBuffer.setCurrentLength(outPtr);
    }

    private final void _finishRawBinary()
        throws IOException, JsonParseException
    {
        int byteLen = _readUnsignedVInt();
        _binaryValue = new byte[byteLen];
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        int ptr = 0;
        while (true) {
            int toAdd = Math.min(byteLen, _inputEnd - _inputPtr);
            System.arraycopy(_inputBuffer, _inputPtr, _binaryValue, ptr, toAdd);
            _inputPtr += toAdd;
            ptr += toAdd;
            byteLen -= toAdd;
            if (byteLen <= 0) {
                return;
            }
            loadMoreGuaranteed();
        }
    }

    /*
    /**********************************************************
    /* Internal methods, skipping
    /**********************************************************
     */

    /**
     * Method called to skip remainders of an incomplete token, when
     * contents themselves will not be needed any more
     */
    protected void _skipIncomplete() throws IOException, JsonParseException
    {
    	_tokenIncomplete = false;
    	int tb = _typeByte;
        switch ((tb >> 5) & 0x7) {
        case 1: // simple literals, numbers
            tb &= 0x1F;
            // next 3 bytes define subtype
            switch (tb >> 2) {
            case 1: // VInt (zigzag)
                // easy, just skip until we see sign bit... (should we try to limit damage?)
                switch (tb & 0x3) {
                case 1: // vlong
                        _skipBytes(4); // min 5 bytes
                        // fall through
                case 0: // vint
                    while (true) {
                        final int end = _inputEnd;
                        final byte[] buf = _inputBuffer;
                        while (_inputPtr < end) {
                                if (buf[_inputPtr++] < 0) {
                                        return;
                                }
                        }
                        loadMoreGuaranteed();                           
                    }
                case 2: // big-int
                    // just has binary data
                    _skip7BitBinary();
                    return;
                }
                break;
            case 2: // other numbers
                switch (tb & 0x3) {
                case 0: // float
                    _skipBytes(5);
                    return;
                case 1: // double
                    _skipBytes(10);
                    return;
                case 2: // big-decimal
                    // first, skip scale
                    _readUnsignedVInt();
                    // then length-prefixed binary serialization
                    _skip7BitBinary();
                    return;
                }
                break;
            }
            break;
        case 2: // tiny ASCII
            // fall through
        case 3: // short ASCII
            _skipBytes(1 + (tb & 0x3F));
            return;
        case 4: // tiny unicode
            // fall through
        case 5: // short unicode
            _skipBytes(2 + (tb & 0x3F));
            return;
        case 7:
            tb &= 0x1F;
            // next 3 bytes define subtype
            switch (tb >> 2) {
            case 0: // long variable length ASCII
            case 1: // long variable length unicode
            	/* Doesn't matter which one, just need to find the end marker
            	 * (note: can potentially skip invalid UTF-8 too)
            	 */
            	while (true) {
            	    final int end = _inputEnd;
            	    final byte[] buf = _inputBuffer;
            	    while (_inputPtr < end) {
            	        if (buf[_inputPtr++] == BYTE_MARKER_END_OF_STRING) {
            	            return;
            	        }
            	    }
            	    loadMoreGuaranteed();
            	}
            	// never gets here
            case 2: // binary, 7-bit
                _skip7BitBinary();
                return;
            case 7: // binary, raw
                _skipBytes(_readUnsignedVInt());
                return;
            }
        }
    	_throwInternal();
    }

    protected void _skipBytes(int len)
        throws IOException, JsonParseException
    {
        while (true) {
            int toAdd = Math.min(len, _inputEnd - _inputPtr);
            _inputPtr += toAdd;
            len -= toAdd;
            if (len <= 0) {
                return;
            }
            loadMoreGuaranteed();
        }
    }

    /**
     * Helper method for skipping length-prefixed binary data
     * section
     */
    protected void _skip7BitBinary()
        throws IOException, JsonParseException
    {
        int origBytes = _readUnsignedVInt();
        // Ok; 8 encoded bytes for 7 payload bytes first
        int chunks = origBytes / 7;
        int encBytes = chunks * 8;
        // and for last 0 - 6 bytes, last+1 (except none if no leftovers)
        origBytes -= 7 * chunks;
        if (origBytes > 0) {
            encBytes += 1 + origBytes;
        }
        _skipBytes(encBytes);
    }
    
    /*
    /**********************************************************
    /* Internal methods, UTF8 decoding
    /**********************************************************
     */

    private final int _decodeUtf8_2(int c)
        throws IOException, JsonParseException
    {
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        int d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        return ((c & 0x1F) << 6) | (d & 0x3F);
    }

    private final int _decodeUtf8_3(int c1)
        throws IOException, JsonParseException
    {
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        c1 &= 0x0F;
        int d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        int c = (c1 << 6) | (d & 0x3F);
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        c = (c << 6) | (d & 0x3F);
        return c;
    }

    private final int _decodeUtf8_3fast(int c1)
        throws IOException, JsonParseException
    {
        c1 &= 0x0F;
        int d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        int c = (c1 << 6) | (d & 0x3F);
        d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        c = (c << 6) | (d & 0x3F);
        return c;
    }

    /**
     * @return Character value minus 0x10000; this so that caller
     *    can readily expand it to actual surrogates
     */
    private final int _decodeUtf8_4(int c)
        throws IOException, JsonParseException
    {
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        int d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        c = ((c & 0x07) << 6) | (d & 0x3F);

        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }
        c = (c << 6) | (d & 0x3F);
        if (_inputPtr >= _inputEnd) {
            loadMoreGuaranteed();
        }
        d = (int) _inputBuffer[_inputPtr++];
        if ((d & 0xC0) != 0x080) {
            _reportInvalidOther(d & 0xFF, _inputPtr);
        }

        /* note: won't change it to negative here, since caller
         * already knows it'll need a surrogate
         */
        return ((c << 6) | (d & 0x3F)) - 0x10000;
    }
    
    /*
    /**********************************************************
    /* Internal methods, error reporting
    /**********************************************************
     */

    protected void _reportInvalidSharedName(int index) throws IOException
    {
        if (_seenNames == null) {
            _reportError("Encountered shared name reference, even though document header explicitly declared no shared name references are included");
        }
       _reportError("Invalid shared name reference "+index+"; only got "+_seenNameCount+" names in buffer (invalid content)");
    }

    protected void _reportInvalidSharedStringValue(int index) throws IOException
    {
        if (_seenStringValues == null) {
            _reportError("Encountered shared text value reference, even though document header did not declared shared text value references may be included");
        }
       _reportError("Invalid shared text value reference "+index+"; only got "+_seenStringValueCount+" names in buffer (invalid content)");
    }
    
    protected void _reportInvalidChar(int c) throws JsonParseException
    {
        // Either invalid WS or illegal UTF-8 start char
        if (c < ' ') {
            _throwInvalidSpace(c);
        }
        _reportInvalidInitial(c);
    }
	
    protected void _reportInvalidInitial(int mask)
        throws JsonParseException
    {
        _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
    }
	
    protected void _reportInvalidOther(int mask)
        throws JsonParseException
    {
        _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
    }
	
    protected void _reportInvalidOther(int mask, int ptr)
        throws JsonParseException
    {
        _inputPtr = ptr;
        _reportInvalidOther(mask);
    }
}
    




© 2015 - 2024 Weber Informatics LLC | Privacy Policy