com.fasterxml.jackson.dataformat.smile.SmileParserBase Maven / Gradle / Ivy
Show all versions of jackson-dataformat-smile Show documentation
package com.fasterxml.jackson.dataformat.smile;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.ParserMinimalBase;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.io.ContentReference;
import com.fasterxml.jackson.core.json.DupDetector;
import com.fasterxml.jackson.core.json.JsonReadContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.core.util.JacksonFeatureSet;
import com.fasterxml.jackson.core.util.TextBuffer;
/**
* @since 2.9
*/
public abstract class SmileParserBase extends ParserMinimalBase
{
protected final static String[] NO_STRINGS = new String[0];
// 2.12.3: [dataformats-binary#260] Avoid OOME/DoS for bigger binary;
// read only up to 250k
protected final static int LONGEST_NON_CHUNKED_BINARY = 250_000;
// @since 2.16
protected final static int DEFAULT_NAME_BUFFER_LENGTH = 64;
// @since 2.16
protected final static int DEFAULT_STRING_VALUE_BUFFER_LENGTH = 64;
// @since 2.14
protected final static JacksonFeatureSet SMILE_READ_CAPABILITIES
= DEFAULT_READ_CAPABILITIES.with(StreamReadCapability.EXACT_FLOATS);
/*
/**********************************************************************
/* Config
/**********************************************************************
*/
/**
* Bit flag composed of bits that indicate which
* {@link SmileParser.Feature}s are enabled.
*
* NOTE: currently the only feature ({@link SmileParser.Feature#REQUIRE_HEADER}
* takes effect during bootstrapping.
*/
protected int _formatFeatures;
/**
* Flag that indicates whether content can legally have raw (unquoted)
* binary data. Since this information is included both in header and
* in actual binary data blocks there is redundancy, and we want to
* ensure settings are compliant. Using application may also want to
* know this setting in case it does some direct (random) access.
*/
protected boolean _mayContainRawBinary;
/*
/**********************************************************************
/* Generic I/O state
/**********************************************************************
*/
/**
* I/O context for this reader. It handles buffer allocation
* for the reader.
*/
protected final IOContext _ioContext;
/**
* @since 2.17
*/
protected final StreamReadConstraints _streamReadConstraints;
/**
* Flag that indicates whether parser is closed or not. Gets
* set when parser is either closed by explicit call
* ({@link #close}) or when end-of-input is reached.
*/
protected boolean _closed;
/*
/**********************************************************************
/* Current input data
/**********************************************************************
*/
// Note: type of actual buffer depends on sub-class, can't include
/**
* Pointer to next available character in buffer
*/
protected int _inputPtr = 0;
/**
* Index of character after last available one in the buffer.
*/
protected int _inputEnd = 0;
/*
/**********************************************************************
/* Parsing state, location
/**********************************************************************
*/
/**
* Number of characters/bytes that were contained in previous blocks
* (blocks that were already processed prior to the current buffer).
*/
protected long _currInputProcessed;
/**
* Alternative to {@code _tokenInputTotal} that will only contain
* offset within input buffer, as int.
*/
protected int _tokenOffsetForTotal;
/**
* Information about parser context, context in which
* the next token is to be parsed (root, array, object).
*
* NOTE: before 2.13 was "_parsingContext"
*/
protected JsonReadContext _streamReadContext;
/*
/**********************************************************************
/* Decoded values, text, binary
/**********************************************************************
*/
/**
* Buffer that contains contents of String values, including
* field names if necessary (name split across boundary,
* contains escape sequence, or access needed to char array)
*/
protected final TextBuffer _textBuffer;
/**
* Temporary buffer that is needed if field name is accessed
* using {@link #getTextCharacters} method (instead of String
* returning alternatives)
*/
protected char[] _nameCopyBuffer;
/**
* Flag set to indicate whether the field name is available
* from the name copy buffer or not (in addition to its String
* representation being available via read context)
*/
protected boolean _nameCopied;
/**
* We will hold on to decoded binary data, for duration of
* current event, so that multiple calls to
* {@link #getBinaryValue} will not need to decode data more
* than once.
*/
protected byte[] _binaryValue;
/*
/**********************************************************************
/* Decoded values, numbers
/**********************************************************************
*/
protected NumberType _numberType;
/**
* Bitfield that indicates which numeric representations
* have been calculated for the current type
*/
protected int _numTypesValid = NR_UNKNOWN;
protected BigInteger _numberBigInt;
protected BigDecimal _numberBigDecimal;
protected int _numberInt;
protected float _numberFloat;
protected long _numberLong;
protected double _numberDouble;
/*
/**********************************************************************
/* Symbol handling, decoding
/**********************************************************************
*/
/**
* Symbol table that contains field names encountered so far
*/
protected final ByteQuadsCanonicalizer _symbols;
/**
* Temporary buffer used for name parsing.
*/
protected int[] _quadBuffer = NO_INTS;
/**
* Quads used for hash calculation
*/
protected int _quad1, _quad2, _quad3;
/**
* Marker flag to indicate that standard symbol handling is used
* (one with symbol table assisted canonicalization. May be disabled
* in which case alternate stream-line, non-canonicalizing handling
* is used: usually due to set of symbols
* (Object property names) is unbounded and will not benefit from
* canonicalization attempts.
*
* @since 2.13
*/
protected final boolean _symbolsCanonical;
/*
/**********************************************************************
/* Back-references
/**********************************************************************
*/
/**
* Array of recently seen field names, which may be back referenced
* by later fields.
* Defaults set to enable handling even if no header found.
*/
protected String[] _seenNames = NO_STRINGS;
protected int _seenNameCount = 0;
/**
* Array of recently seen field names, which may be back referenced
* by later fields
* Defaults set to disable handling if no header found.
*/
protected String[] _seenStringValues = null;
protected int _seenStringValueCount = -1;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
protected SmileParserBase(IOContext ctxt, int parserFeatures, int formatFeatures,
ByteQuadsCanonicalizer sym)
{
super(parserFeatures);
_formatFeatures = formatFeatures;
_ioContext = ctxt;
_streamReadConstraints = ctxt.streamReadConstraints();
_symbols = sym;
_symbolsCanonical = sym.isCanonicalizing();
DupDetector dups = Feature.STRICT_DUPLICATE_DETECTION.enabledIn(parserFeatures)
? DupDetector.rootDetector(this) : null;
_streamReadContext = JsonReadContext.createRootContext(dups);
_textBuffer = ctxt.constructReadConstrainedTextBuffer();
}
@Override
public StreamReadConstraints streamReadConstraints() {
return _streamReadConstraints;
}
/*
/**********************************************************
/* Versioned
/**********************************************************
*/
@Override
public final Version version() {
return PackageVersion.VERSION;
}
/*
/**********************************************************************
/* Extended API
/**********************************************************************
*/
public final boolean mayContainRawBinary() {
return _mayContainRawBinary;
}
/*
/**********************************************************
/* FormatFeature support
/**********************************************************
*/
@Override
public final int getFormatFeatures() {
return _formatFeatures;
}
@Override
public final JsonParser overrideFormatFeatures(int values, int mask) {
_formatFeatures = (_formatFeatures & ~mask) | (values & mask);
return this;
}
@Override // since 2.12
public JacksonFeatureSet getReadCapabilities() {
return SMILE_READ_CAPABILITIES;
}
/*
/**********************************************************
/* Abstract methods for sub-classes to provide
/**********************************************************
*/
protected abstract void _closeInput() throws IOException;
protected abstract void _parseNumericValue() throws IOException;
// public abstract int releaseBuffered(OutputStream out) throws IOException;
// public abstract Object getInputSource();
/*
/**********************************************************
/* Abstract impls
/**********************************************************
*/
/**
* Overridden since we do not really have character-based locations,
* but we do have byte offset to specify.
*/
@Override
public final JsonLocation currentLocation()
{
final long offset = _currInputProcessed + _inputPtr;
return new JsonLocation(_ioContext.contentReference(),
offset, // bytes
-1, -1, (int) offset); // char offset, line, column
}
/**
* Overridden since we do not really have character-based locations,
* but we do have byte offset to specify.
*/
@Override
public final JsonLocation currentTokenLocation()
{
// token location is correctly managed...
long total = _currInputProcessed + _tokenOffsetForTotal;
// 2.4: used to be: _tokenInputTotal
return new JsonLocation(_ioContext.contentReference(),
total, // bytes
-1, -1, (int) total); // char offset, line, column
}
@Deprecated // since 2.17
@Override
public JsonLocation getCurrentLocation() { return currentLocation(); }
@Deprecated // since 2.17
@Override
public JsonLocation getTokenLocation() { return currentTokenLocation(); }
/**
* Method that can be called to get the name associated with
* the current event.
*/
@Override // since 2.17
public String currentName() throws IOException
{
if (_currToken == JsonToken.START_OBJECT || _currToken == JsonToken.START_ARRAY) {
return _streamReadContext.getParent().getCurrentName();
}
return _streamReadContext.getCurrentName();
}
@Deprecated // since 2.17
@Override
public String getCurrentName() throws IOException { return currentName(); }
@Override
public final void overrideCurrentName(String name)
{
// Simple, but need to look for START_OBJECT/ARRAY's "off-by-one" thing:
JsonReadContext ctxt = _streamReadContext;
if (_currToken == JsonToken.START_OBJECT || _currToken == JsonToken.START_ARRAY) {
ctxt = ctxt.getParent();
}
// Unfortunate, but since we did not expose exceptions, need to wrap
try {
ctxt.setCurrentName(name);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public final void close() throws IOException {
if (!_closed) {
_closed = true;
_inputEnd = 0;
_symbols.release();
try {
_closeInput();
} finally {
// Also, internal buffer(s) can now be released as well
_releaseBuffers();
}
_ioContext.close();
}
}
protected final void _releaseBuffers() throws IOException {
_textBuffer.releaseBuffers();
char[] buf = _nameCopyBuffer;
if (buf != null) {
_nameCopyBuffer = null;
_ioContext.releaseNameCopyBuffer(buf);
}
_releaseBuffers2();
}
protected abstract void _releaseBuffers2();
@Override public final boolean isClosed() { return _closed; }
@Override public final JsonReadContext getParsingContext() { return _streamReadContext; }
/*
/**********************************************************
/* Numeric accessors of public API
/**********************************************************
*/
@Override // since 2.9
public final boolean isNaN() throws IOException {
if (_currToken == JsonToken.VALUE_NUMBER_FLOAT) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_DOUBLE) != 0) {
return !Double.isFinite(_numberDouble);
}
if ((_numTypesValid & NR_FLOAT) != 0) {
return !Float.isFinite(_numberFloat);
}
}
return false;
}
@Override
public final Number getNumberValue() throws IOException
{
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
switch (_numberType) {
case INT:
return _numberInt;
case LONG:
return _numberLong;
case BIG_INTEGER:
return _numberBigInt;
case FLOAT:
return _numberFloat;
case DOUBLE:
return _numberDouble;
case BIG_DECIMAL:
default:
return _numberBigDecimal;
}
}
@Override // @since 2.12 -- for (most?) binary formats exactness guaranteed anyway
public final Number getNumberValueExact() throws IOException {
return getNumberValue();
}
@Override
public final NumberType getNumberType() throws IOException
{
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
return _numberType;
}
@Override // since 2.17
public NumberTypeFP getNumberTypeFP() throws IOException {
if (_currToken == JsonToken.VALUE_NUMBER_FLOAT) {
// Some decoding is done lazily so need to:
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if (_numberType == NumberType.BIG_DECIMAL) {
return NumberTypeFP.BIG_DECIMAL;
}
if (_numberType == NumberType.DOUBLE) {
return NumberTypeFP.DOUBLE64;
}
if (_numberType == NumberType.FLOAT) {
return NumberTypeFP.FLOAT32;
}
}
return NumberTypeFP.UNKNOWN;
}
@Override
public final int getIntValue() throws IOException
{
if ((_numTypesValid & NR_INT) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_INT) == 0) { // wasn't an int natively?
convertNumberToInt(); // let's make it so, if possible
}
}
return _numberInt;
}
@Override
public final long getLongValue() throws IOException
{
if ((_numTypesValid & NR_LONG) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_LONG) == 0) {
convertNumberToLong();
}
}
return _numberLong;
}
@Override
public final BigInteger getBigIntegerValue() throws IOException
{
if ((_numTypesValid & NR_BIGINT) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_BIGINT) == 0) {
convertNumberToBigInteger();
}
}
return _numberBigInt;
}
@Override
public final float getFloatValue() throws IOException
{
if ((_numTypesValid & NR_FLOAT) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_FLOAT) == 0) {
convertNumberToFloat();
}
}
// Bounds/range checks would be tricky here, so let's not bother even trying...
/*
if (value < -Float.MAX_VALUE || value > MAX_FLOAT_D) {
_reportError("Numeric value (%s) out of range of Java float", getText());
}
*/
return _numberFloat;
}
@Override
public final double getDoubleValue() throws IOException
{
if ((_numTypesValid & NR_DOUBLE) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_DOUBLE) == 0) {
convertNumberToDouble();
}
}
return _numberDouble;
}
@Override
public final BigDecimal getDecimalValue() throws IOException
{
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(); // will also check event type
}
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
convertNumberToBigDecimal();
}
}
return _numberBigDecimal;
}
/*
/**********************************************************
/* Numeric conversions
/**********************************************************
*/
protected final void convertNumberToInt() throws IOException
{
// First, converting from long ought to be easy
if ((_numTypesValid & NR_LONG) != 0) {
// Let's verify it's lossless conversion by simple roundtrip
int result = (int) _numberLong;
if (((long) result) != _numberLong) {
reportOverflowInt(String.valueOf(_numberLong));
}
_numberInt = result;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
if (BI_MIN_INT.compareTo(_numberBigInt) > 0
|| BI_MAX_INT.compareTo(_numberBigInt) < 0) {
reportOverflowInt(String.valueOf(_numberBigInt));
}
_numberInt = _numberBigInt.intValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
// Need to check boundaries
if (_numberDouble < MIN_INT_D || _numberDouble > MAX_INT_D) {
reportOverflowInt(String.valueOf(_numberDouble));
}
_numberInt = (int) _numberDouble;
} else if ((_numTypesValid & NR_FLOAT) != 0) {
if (_numberFloat < MIN_INT_D || _numberFloat > MAX_INT_D) {
reportOverflowInt(String.valueOf(_numberFloat));
}
_numberInt = (int) _numberFloat;
} else if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
if (BD_MIN_INT.compareTo(_numberBigDecimal) > 0
|| BD_MAX_INT.compareTo(_numberBigDecimal) < 0) {
reportOverflowInt(String.valueOf(_numberBigDecimal));
}
_numberInt = _numberBigDecimal.intValue();
} else {
_throwInternal();
}
_numTypesValid |= NR_INT;
}
protected final void convertNumberToLong() throws IOException
{
int v = _numTypesValid;
if ((v & NR_INT) != 0) {
_numberLong = (long) _numberInt;
} else if ((v & NR_BIGINT) != 0) {
if (BI_MIN_LONG.compareTo(_numberBigInt) > 0
|| BI_MAX_LONG.compareTo(_numberBigInt) < 0) {
reportOverflowLong(String.valueOf(_numberBigInt));
}
_numberLong = _numberBigInt.longValue();
} else if ((v & NR_DOUBLE) != 0) {
if (_numberDouble < MIN_LONG_D || _numberDouble > MAX_LONG_D) {
reportOverflowLong(String.valueOf(_numberDouble));
}
_numberLong = (long) _numberDouble;
} else if ((v & NR_FLOAT) != 0) {
if (_numberFloat < MIN_LONG_D || _numberFloat > MAX_LONG_D) {
reportOverflowLong(String.valueOf(_numberFloat));
}
_numberLong = (long) _numberFloat;
} else if ((v & NR_BIGDECIMAL) != 0) {
if (BD_MIN_LONG.compareTo(_numberBigDecimal) > 0
|| BD_MAX_LONG.compareTo(_numberBigDecimal) < 0) {
reportOverflowLong(String.valueOf(_numberBigDecimal));
}
_numberLong = _numberBigDecimal.longValue();
} else {
_throwInternal();
}
_numTypesValid |= NR_LONG;
}
protected final void convertNumberToBigInteger() throws IOException
{
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
// here it'll just get truncated, no exceptions thrown
_streamReadConstraints.validateBigIntegerScale(_numberBigDecimal.scale());
_numberBigInt = _numberBigDecimal.toBigInteger();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigInt = BigInteger.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigInt = BigInteger.valueOf(_numberInt);
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
_numberBigInt = BigDecimal.valueOf(_numberDouble).toBigInteger();
} else if ((_numTypesValid & NR_FLOAT) != 0) {
_numberBigInt = BigDecimal.valueOf(_numberFloat).toBigInteger();
} else {
_throwInternal();
}
_numTypesValid |= NR_BIGINT;
}
protected final void convertNumberToFloat() throws IOException
{
// Note: this MUST start with more accurate representations, since we don't know which
// value is the original one (others get generated when requested)
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
_numberFloat = _numberBigDecimal.floatValue();
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberFloat = _numberBigInt.floatValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
_numberFloat = (float) _numberDouble;
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberFloat = (float) _numberLong;
} else if ((_numTypesValid & NR_INT) != 0) {
_numberFloat = (float) _numberInt;
} else {
_throwInternal();
}
_numTypesValid |= NR_FLOAT;
}
protected final void convertNumberToDouble() throws IOException
{
// Note: this MUST start with more accurate representations, since we don't know which
// value is the original one (others get generated when requested)
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
_numberDouble = _numberBigDecimal.doubleValue();
} else if ((_numTypesValid & NR_FLOAT) != 0) {
_numberDouble = (double) _numberFloat;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberDouble = _numberBigInt.doubleValue();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberDouble = (double) _numberLong;
} else if ((_numTypesValid & NR_INT) != 0) {
_numberDouble = (double) _numberInt;
} else {
_throwInternal();
}
_numTypesValid |= NR_DOUBLE;
}
protected final void convertNumberToBigDecimal() throws IOException
{
// Note: this MUST start with more accurate representations, since we don't know which
// value is the original one (others get generated when requested)
if ((_numTypesValid & NR_DOUBLE) != 0) {
// 15-Dec-2023, tatu: Should NOT try to use String representation
// since we already have decoded into double
_numberBigDecimal = new BigDecimal(_numberDouble);
} else if ((_numTypesValid & NR_FLOAT) != 0) {
// 15-Dec-2023, tatu: Should NOT try to use String representation
// since we already have decoded into float
_numberBigDecimal = new BigDecimal(_numberFloat);
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberBigDecimal = new BigDecimal(_numberBigInt);
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberInt);
} else {
_throwInternal();
}
_numTypesValid |= NR_BIGDECIMAL;
}
/*
/**********************************************************
/* Internal/package methods: other
/**********************************************************
*/
/**
* Method called when an EOF is encountered between tokens.
* If so, it may be a legitimate EOF, but only iff there
* is no open non-root context.
*/
@Override
protected void _handleEOF() throws JsonParseException {
if (!_streamReadContext.inRoot()) {
String marker = _streamReadContext.inArray() ? "Array" : "Object";
_reportInvalidEOF(String.format(
": expected close marker for %s (start marker at %s)",
marker,
_streamReadContext.startLocation(_sourceReference())),
null);
}
}
protected void _reportMismatchedEndMarker(int actCh, char expCh) throws JsonParseException {
JsonReadContext ctxt = getParsingContext();
_reportError(String.format(
"Unexpected close marker '%s': expected '%c' (for %s starting at %s)",
(char) actCh, expCh, ctxt.typeDesc(), ctxt.startLocation(_sourceReference())));
}
/**
* Helper method used to encapsulate logic of including (or not) of
* "source reference" when constructing {@link JsonLocation} instances.
*
* @since 2.13
*/
protected ContentReference _sourceReference() {
if (isEnabled(StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION)) {
return _ioContext.contentReference();
}
return ContentReference.unknown();
}
}