All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.smile.SmileParserBase Maven / Gradle / Ivy

Go to download

Support for reading and writing Smile ("binary JSON") encoded data using Jackson abstractions (streaming API, data binding, tree model)

There is a newer version: 2.18.1
Show newest version
package com.fasterxml.jackson.dataformat.smile;

import java.io.IOException;
import java.lang.ref.SoftReference;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.ParserMinimalBase;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.io.ContentReference;
import com.fasterxml.jackson.core.io.NumberInput;
import com.fasterxml.jackson.core.json.DupDetector;
import com.fasterxml.jackson.core.json.JsonReadContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.core.util.JacksonFeatureSet;
import com.fasterxml.jackson.core.util.TextBuffer;

/**
 * @since 2.9
 */
public abstract class SmileParserBase extends ParserMinimalBase
{
    protected final static String[] NO_STRINGS = new String[0];

    // 2.12.3: [dataformats-binary#260] Avoid OOME/DoS for bigger binary;
    //  read only up to 250k
    protected final static int LONGEST_NON_CHUNKED_BINARY = 250_000;

    // @since 2.14
    protected final static JacksonFeatureSet SMILE_READ_CAPABILITIES
        = DEFAULT_READ_CAPABILITIES.with(StreamReadCapability.EXACT_FLOATS);

    /*
    /**********************************************************
    /* Config
    /**********************************************************
     */

    /**
     * Bit flag composed of bits that indicate which
     * {@link SmileParser.Feature}s are enabled.
     *

* NOTE: currently the only feature ({@link SmileParser.Feature#REQUIRE_HEADER} * takes effect during bootstrapping. */ protected int _formatFeatures; /** * Flag that indicates whether content can legally have raw (unquoted) * binary data. Since this information is included both in header and * in actual binary data blocks there is redundancy, and we want to * ensure settings are compliant. Using application may also want to * know this setting in case it does some direct (random) access. */ protected boolean _mayContainRawBinary; /* /********************************************************** /* Generic I/O state /********************************************************** */ /** * I/O context for this reader. It handles buffer allocation * for the reader. */ protected final IOContext _ioContext; /** * Flag that indicates whether parser is closed or not. Gets * set when parser is either closed by explicit call * ({@link #close}) or when end-of-input is reached. */ protected boolean _closed; /* /********************************************************** /* Current input data /********************************************************** */ // Note: type of actual buffer depends on sub-class, can't include /** * Pointer to next available character in buffer */ protected int _inputPtr = 0; /** * Index of character after last available one in the buffer. */ protected int _inputEnd = 0; /* /********************************************************** /* Parsing state, location /********************************************************** */ /** * Number of characters/bytes that were contained in previous blocks * (blocks that were already processed prior to the current buffer). */ protected long _currInputProcessed; /** * Alternative to {@code _tokenInputTotal} that will only contain * offset within input buffer, as int. */ protected int _tokenOffsetForTotal; /** * Information about parser context, context in which * the next token is to be parsed (root, array, object). *

* NOTE: before 2.13 was "_parsingContext" */ protected JsonReadContext _streamReadContext; /* /********************************************************** /* Decoded values, text, binary /********************************************************** */ /** * Buffer that contains contents of String values, including * field names if necessary (name split across boundary, * contains escape sequence, or access needed to char array) */ protected final TextBuffer _textBuffer; /** * Temporary buffer that is needed if field name is accessed * using {@link #getTextCharacters} method (instead of String * returning alternatives) */ protected char[] _nameCopyBuffer; /** * Flag set to indicate whether the field name is available * from the name copy buffer or not (in addition to its String * representation being available via read context) */ protected boolean _nameCopied; /** * We will hold on to decoded binary data, for duration of * current event, so that multiple calls to * {@link #getBinaryValue} will not need to decode data more * than once. */ protected byte[] _binaryValue; /* /********************************************************** /* Decoded values, numbers /********************************************************** */ protected NumberType _numberType; /** * Bitfield that indicates which numeric representations * have been calculated for the current type */ protected int _numTypesValid = NR_UNKNOWN; protected BigInteger _numberBigInt; protected BigDecimal _numberBigDecimal; protected int _numberInt; protected float _numberFloat; protected long _numberLong; protected double _numberDouble; /* /********************************************************** /* Symbol handling, decoding /********************************************************** */ /** * Symbol table that contains field names encountered so far */ protected final ByteQuadsCanonicalizer _symbols; /** * Temporary buffer used for name parsing. */ protected int[] _quadBuffer = NO_INTS; /** * Quads used for hash calculation */ protected int _quad1, _quad2, _quad3; /** * Array of recently seen field names, which may be back referenced * by later fields. * Defaults set to enable handling even if no header found. */ protected String[] _seenNames = NO_STRINGS; protected int _seenNameCount = 0; /** * Array of recently seen field names, which may be back referenced * by later fields * Defaults set to disable handling if no header found. */ protected String[] _seenStringValues = null; protected int _seenStringValueCount = -1; /** * Marker flag to indicate that standard symbol handling is used * (one with symbol table assisted canonicalization. May be disabled * in which case alternate stream-line, non-canonicalizing handling * is used: usually due to set of symbols * (Object property names) is unbounded and will not benefit from * canonicalization attempts. * * @since 2.13 */ protected final boolean _symbolsCanonical; /* /********************************************************** /* Thread-local recycling /********************************************************** */ /** * ThreadLocal contains a {@link java.lang.ref.SoftReference} * to a buffer recycler used to provide a low-cost * buffer recycling for Smile-specific buffers. */ protected final static ThreadLocal>> _smileRecyclerRef = new ThreadLocal>>(); /** * Helper object used for low-level recycling of Smile-generator * specific buffers. */ protected final SmileBufferRecycler _smileBufferRecycler; /* /********************************************************** /* Life-cycle /********************************************************** */ public SmileParserBase(IOContext ctxt, int parserFeatures, int formatFeatures, ByteQuadsCanonicalizer sym) { super(parserFeatures); _formatFeatures = formatFeatures; _ioContext = ctxt; _symbols = sym; _symbolsCanonical = sym.isCanonicalizing(); DupDetector dups = Feature.STRICT_DUPLICATE_DETECTION.enabledIn(parserFeatures) ? DupDetector.rootDetector(this) : null; _streamReadContext = JsonReadContext.createRootContext(dups); _textBuffer = ctxt.constructReadConstrainedTextBuffer(); _smileBufferRecycler = _smileBufferRecycler(); } @Override public StreamReadConstraints streamReadConstraints() { return _ioContext.streamReadConstraints(); } protected final static SmileBufferRecycler _smileBufferRecycler() { SoftReference> ref = _smileRecyclerRef.get(); SmileBufferRecycler br = (ref == null) ? null : ref.get(); if (br == null) { br = new SmileBufferRecycler(); _smileRecyclerRef.set(new SoftReference>(br)); } return br; } /* /********************************************************** /* Versioned /********************************************************** */ @Override public final Version version() { return PackageVersion.VERSION; } /* /********************************************************************** /* Extended API /********************************************************************** */ public final boolean mayContainRawBinary() { return _mayContainRawBinary; } /* /********************************************************** /* FormatFeature support /********************************************************** */ @Override public final int getFormatFeatures() { return _formatFeatures; } @Override public final JsonParser overrideFormatFeatures(int values, int mask) { _formatFeatures = (_formatFeatures & ~mask) | (values & mask); return this; } @Override // since 2.12 public JacksonFeatureSet getReadCapabilities() { return SMILE_READ_CAPABILITIES; } /* /********************************************************** /* Abstract methods for sub-classes to provide /********************************************************** */ protected abstract void _closeInput() throws IOException; protected abstract void _parseNumericValue() throws IOException; // public abstract int releaseBuffered(OutputStream out) throws IOException; // public abstract Object getInputSource(); /* /********************************************************** /* Abstract impls /********************************************************** */ /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public final JsonLocation getTokenLocation() { // token location is correctly managed... long total = _currInputProcessed + _tokenOffsetForTotal; // 2.4: used to be: _tokenInputTotal return new JsonLocation(_ioContext.contentReference(), total, // bytes -1, -1, (int) total); // char offset, line, column } /** * Overridden since we do not really have character-based locations, * but we do have byte offset to specify. */ @Override public final JsonLocation getCurrentLocation() { final long offset = _currInputProcessed + _inputPtr; return new JsonLocation(_ioContext.contentReference(), offset, // bytes -1, -1, (int) offset); // char offset, line, column } /** * Method that can be called to get the name associated with * the current event. */ @Override public final String getCurrentName() throws IOException { if (_currToken == JsonToken.START_OBJECT || _currToken == JsonToken.START_ARRAY) { return _streamReadContext.getParent().getCurrentName(); } return _streamReadContext.getCurrentName(); } @Override public final void overrideCurrentName(String name) { // Simple, but need to look for START_OBJECT/ARRAY's "off-by-one" thing: JsonReadContext ctxt = _streamReadContext; if (_currToken == JsonToken.START_OBJECT || _currToken == JsonToken.START_ARRAY) { ctxt = ctxt.getParent(); } // Unfortunate, but since we did not expose exceptions, need to wrap try { ctxt.setCurrentName(name); } catch (IOException e) { throw new IllegalStateException(e); } } @Override public final void close() throws IOException { if (!_closed) { _closed = true; _inputEnd = 0; _symbols.release(); try { _closeInput(); } finally { // Also, internal buffer(s) can now be released as well _releaseBuffers(); } } } protected final void _releaseBuffers() throws IOException { _textBuffer.releaseBuffers(); char[] buf = _nameCopyBuffer; if (buf != null) { _nameCopyBuffer = null; _ioContext.releaseNameCopyBuffer(buf); } String[] nameBuf = _seenNames; if (nameBuf != null && nameBuf.length > 0) { _seenNames = null; // 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; // but we only need to clear up to count as it is not a hash area if (_seenNameCount > 0) { Arrays.fill(nameBuf, 0, _seenNameCount, null); } _smileBufferRecycler.releaseSeenNamesBuffer(nameBuf); } String[] valueBuf = _seenStringValues; if (valueBuf != null && valueBuf.length > 0) { _seenStringValues = null; // 28-Jun-2011, tatu: With 1.9, caller needs to clear the buffer; // but we only need to clear up to count as it is not a hash area if (_seenStringValueCount > 0) { Arrays.fill(valueBuf, 0, _seenStringValueCount, null); } _smileBufferRecycler.releaseSeenStringValuesBuffer(valueBuf); } _releaseBuffers2(); } protected abstract void _releaseBuffers2(); @Override public final boolean isClosed() { return _closed; } @Override public final JsonReadContext getParsingContext() { return _streamReadContext; } /* /********************************************************** /* Numeric accessors of public API /********************************************************** */ @Override // since 2.9 public final boolean isNaN() throws IOException { if (_currToken == JsonToken.VALUE_NUMBER_FLOAT) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if (_numberType == NumberType.DOUBLE) { // 10-Mar-2017, tatu: Alas, `Double.isFinite(d)` only added in JDK 8 double d = _numberDouble; return Double.isNaN(d) || Double.isInfinite(d); } if (_numberType == NumberType.FLOAT) { float f = _numberFloat; return Float.isNaN(f) || Float.isInfinite(f); } } return false; } @Override public final Number getNumberValue() throws IOException { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } switch (_numberType) { case INT: return _numberInt; case LONG: return _numberLong; case BIG_INTEGER: return _numberBigInt; case FLOAT: return _numberFloat; case DOUBLE: return _numberDouble; case BIG_DECIMAL: default: return _numberBigDecimal; } } @Override // @since 2.12 -- for (most?) binary formats exactness guaranteed anyway public final Number getNumberValueExact() throws IOException { return getNumberValue(); } @Override public final NumberType getNumberType() throws IOException { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } return _numberType; } @Override public final int getIntValue() throws IOException { if ((_numTypesValid & NR_INT) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_INT) == 0) { // wasn't an int natively? convertNumberToInt(); // let's make it so, if possible } } return _numberInt; } @Override public final long getLongValue() throws IOException { if ((_numTypesValid & NR_LONG) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_LONG) == 0) { convertNumberToLong(); } } return _numberLong; } @Override public final BigInteger getBigIntegerValue() throws IOException { if ((_numTypesValid & NR_BIGINT) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_BIGINT) == 0) { convertNumberToBigInteger(); } } return _numberBigInt; } @Override public final float getFloatValue() throws IOException { if ((_numTypesValid & NR_FLOAT) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_FLOAT) == 0) { convertNumberToFloat(); } } // Bounds/range checks would be tricky here, so let's not bother even trying... /* if (value < -Float.MAX_VALUE || value > MAX_FLOAT_D) { _reportError("Numeric value (%s) out of range of Java float", getText()); } */ return _numberFloat; } @Override public final double getDoubleValue() throws IOException { if ((_numTypesValid & NR_DOUBLE) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_DOUBLE) == 0) { convertNumberToDouble(); } } return _numberDouble; } @Override public final BigDecimal getDecimalValue() throws IOException { if ((_numTypesValid & NR_BIGDECIMAL) == 0) { if (_numTypesValid == NR_UNKNOWN) { _parseNumericValue(); // will also check event type } if ((_numTypesValid & NR_BIGDECIMAL) == 0) { convertNumberToBigDecimal(); } } return _numberBigDecimal; } /* /********************************************************** /* Numeric conversions /********************************************************** */ protected final void convertNumberToInt() throws IOException { // First, converting from long ought to be easy if ((_numTypesValid & NR_LONG) != 0) { // Let's verify it's lossless conversion by simple roundtrip int result = (int) _numberLong; if (((long) result) != _numberLong) { _reportError("Numeric value (%s) out of range of int", getText()); } _numberInt = result; } else if ((_numTypesValid & NR_BIGINT) != 0) { if (BI_MIN_INT.compareTo(_numberBigInt) > 0 || BI_MAX_INT.compareTo(_numberBigInt) < 0) { reportOverflowInt(); } _numberInt = _numberBigInt.intValue(); } else if ((_numTypesValid & NR_DOUBLE) != 0) { // Need to check boundaries if (_numberDouble < MIN_INT_D || _numberDouble > MAX_INT_D) { reportOverflowInt(); } _numberInt = (int) _numberDouble; } else if ((_numTypesValid & NR_FLOAT) != 0) { if (_numberFloat < MIN_INT_D || _numberFloat > MAX_INT_D) { reportOverflowInt(); } _numberInt = (int) _numberFloat; } else if ((_numTypesValid & NR_BIGDECIMAL) != 0) { if (BD_MIN_INT.compareTo(_numberBigDecimal) > 0 || BD_MAX_INT.compareTo(_numberBigDecimal) < 0) { reportOverflowInt(); } _numberInt = _numberBigDecimal.intValue(); } else { _throwInternal(); } _numTypesValid |= NR_INT; } protected final void convertNumberToLong() throws IOException { int v = _numTypesValid; if ((v & NR_INT) != 0) { _numberLong = (long) _numberInt; } else if ((v & NR_BIGINT) != 0) { if (BI_MIN_LONG.compareTo(_numberBigInt) > 0 || BI_MAX_LONG.compareTo(_numberBigInt) < 0) { reportOverflowLong(); } _numberLong = _numberBigInt.longValue(); } else if ((v & NR_DOUBLE) != 0) { if (_numberDouble < MIN_LONG_D || _numberDouble > MAX_LONG_D) { reportOverflowLong(); } _numberLong = (long) _numberDouble; } else if ((v & NR_FLOAT) != 0) { if (_numberFloat < MIN_LONG_D || _numberFloat > MAX_LONG_D) { reportOverflowInt(); } _numberLong = (long) _numberFloat; } else if ((v & NR_BIGDECIMAL) != 0) { if (BD_MIN_LONG.compareTo(_numberBigDecimal) > 0 || BD_MAX_LONG.compareTo(_numberBigDecimal) < 0) { reportOverflowLong(); } _numberLong = _numberBigDecimal.longValue(); } else { _throwInternal(); } _numTypesValid |= NR_LONG; } protected final void convertNumberToBigInteger() throws IOException { if ((_numTypesValid & NR_BIGDECIMAL) != 0) { // here it'll just get truncated, no exceptions thrown streamReadConstraints().validateBigIntegerScale(_numberBigDecimal.scale()); _numberBigInt = _numberBigDecimal.toBigInteger(); } else if ((_numTypesValid & NR_LONG) != 0) { _numberBigInt = BigInteger.valueOf(_numberLong); } else if ((_numTypesValid & NR_INT) != 0) { _numberBigInt = BigInteger.valueOf(_numberInt); } else if ((_numTypesValid & NR_DOUBLE) != 0) { _numberBigInt = BigDecimal.valueOf(_numberDouble).toBigInteger(); } else if ((_numTypesValid & NR_FLOAT) != 0) { _numberBigInt = BigDecimal.valueOf(_numberFloat).toBigInteger(); } else { _throwInternal(); } _numTypesValid |= NR_BIGINT; } protected final void convertNumberToFloat() throws IOException { // Note: this MUST start with more accurate representations, since we don't know which // value is the original one (others get generated when requested) if ((_numTypesValid & NR_BIGDECIMAL) != 0) { _numberFloat = _numberBigDecimal.floatValue(); } else if ((_numTypesValid & NR_BIGINT) != 0) { _numberFloat = _numberBigInt.floatValue(); } else if ((_numTypesValid & NR_DOUBLE) != 0) { _numberFloat = (float) _numberDouble; } else if ((_numTypesValid & NR_LONG) != 0) { _numberFloat = (float) _numberLong; } else if ((_numTypesValid & NR_INT) != 0) { _numberFloat = (float) _numberInt; } else { _throwInternal(); } _numTypesValid |= NR_FLOAT; } protected final void convertNumberToDouble() throws IOException { // Note: this MUST start with more accurate representations, since we don't know which // value is the original one (others get generated when requested) if ((_numTypesValid & NR_BIGDECIMAL) != 0) { _numberDouble = _numberBigDecimal.doubleValue(); } else if ((_numTypesValid & NR_FLOAT) != 0) { _numberDouble = (double) _numberFloat; } else if ((_numTypesValid & NR_BIGINT) != 0) { _numberDouble = _numberBigInt.doubleValue(); } else if ((_numTypesValid & NR_LONG) != 0) { _numberDouble = (double) _numberLong; } else if ((_numTypesValid & NR_INT) != 0) { _numberDouble = (double) _numberInt; } else { _throwInternal(); } _numTypesValid |= NR_DOUBLE; } protected final void convertNumberToBigDecimal() throws IOException { // Note: this MUST start with more accurate representations, since we don't know which // value is the original one (others get generated when requested) if ((_numTypesValid & (NR_DOUBLE | NR_FLOAT)) != 0) { // Let's parse from String representation, to avoid rounding errors that //non-decimal floating operations would incur final String text = getText(); streamReadConstraints().validateFPLength(text.length()); _numberBigDecimal = NumberInput.parseBigDecimal( text, isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)); } else if ((_numTypesValid & NR_BIGINT) != 0) { _numberBigDecimal = new BigDecimal(_numberBigInt); } else if ((_numTypesValid & NR_LONG) != 0) { _numberBigDecimal = BigDecimal.valueOf(_numberLong); } else if ((_numTypesValid & NR_INT) != 0) { _numberBigDecimal = BigDecimal.valueOf(_numberInt); } else { _throwInternal(); } _numTypesValid |= NR_BIGDECIMAL; } /* /********************************************************** /* Internal/package methods: other /********************************************************** */ /** * Method called when an EOF is encountered between tokens. * If so, it may be a legitimate EOF, but only iff there * is no open non-root context. */ @Override protected void _handleEOF() throws JsonParseException { if (!_streamReadContext.inRoot()) { String marker = _streamReadContext.inArray() ? "Array" : "Object"; _reportInvalidEOF(String.format( ": expected close marker for %s (start marker at %s)", marker, _streamReadContext.startLocation(_sourceReference())), null); } } protected void _reportMismatchedEndMarker(int actCh, char expCh) throws JsonParseException { JsonReadContext ctxt = getParsingContext(); _reportError(String.format( "Unexpected close marker '%s': expected '%c' (for %s starting at %s)", (char) actCh, expCh, ctxt.typeDesc(), ctxt.startLocation(_sourceReference()))); } /** * Helper method used to encapsulate logic of including (or not) of * "source reference" when constructing {@link JsonLocation} instances. * * @since 2.13 */ protected ContentReference _sourceReference() { if (isEnabled(StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION)) { return _ioContext.contentReference(); } return ContentReference.unknown(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy