com.fasterxml.jackson.dataformat.yaml.YAMLParser Maven / Gradle / Ivy
Show all versions of jackson-dataformat-yaml Show documentation
package com.fasterxml.jackson.dataformat.yaml;
import java.io.*;
import java.math.BigInteger;
import com.fasterxml.jackson.core.io.NumberInput;
import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.error.Mark;
import org.yaml.snakeyaml.events.*;
import org.yaml.snakeyaml.nodes.NodeId;
import org.yaml.snakeyaml.nodes.Tag;
import org.yaml.snakeyaml.parser.ParserImpl;
import org.yaml.snakeyaml.reader.StreamReader;
import org.yaml.snakeyaml.resolver.Resolver;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.ParserBase;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.util.BufferRecycler;
import com.fasterxml.jackson.core.util.JacksonFeatureSet;
/**
* {@link JsonParser} implementation used to expose YAML documents
* in form that allows other Jackson functionality to process YAML content,
* such as binding POJOs to and from it, and building tree representations.
*/
public class YAMLParser extends ParserBase
{
/**
* Enumeration that defines all togglable features for YAML parsers.
*/
public enum Feature implements FormatFeature // in 2.9
{
/**
* Feature that determines whether an empty {@link String} will be parsed
* as {@code null}. Logic is part of YAML 1.1
* Null Language-Independent Type.
*
* Feature is enabled by default in Jackson 2.12 for backwards-compatibility
* reasons.
*/
EMPTY_STRING_AS_NULL(true),
/**
* Feature that determines whether to parse boolean-like words as strings instead of booleans.
* When enabled, the following words will be parsed as strings instead of booleans: yes, no, on, off.
*
* Feature is disabled by default for backwards-compatibility: works the
* same as YAML specification default and previous Jackson versions.
*
* @since 2.15
*/
PARSE_BOOLEAN_LIKE_WORDS_AS_STRINGS(false),
;
final boolean _defaultState;
final int _mask;
/**
* Method that calculates bit set (flags) of all features that
* are enabled by default.
*/
public static int collectDefaults()
{
int flags = 0;
for (Feature f : values()) {
if (f.enabledByDefault()) {
flags |= f.getMask();
}
}
return flags;
}
private Feature(boolean defaultState) {
_defaultState = defaultState;
_mask = (1 << ordinal());
}
@Override
public boolean enabledByDefault() { return _defaultState; }
@Override
public boolean enabledIn(int flags) { return (flags & _mask) != 0; }
@Override
public int getMask() { return _mask; }
}
// note: does NOT include '0', handled separately
// private final static Pattern PATTERN_INT = Pattern.compile("-?[1-9][0-9]*");
// 22-Nov-2020, tatu: Not needed as of 2.12 since SnakeYAML tags
// doubles correctly
// private final static Pattern PATTERN_FLOAT = Pattern.compile(
// "[-+]?([0-9][0-9_]*)?\\.[0-9]*([eE][-+][0-9]+)?");
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
/**
* Codec used for data binding when (if) requested.
*/
protected ObjectCodec _objectCodec;
protected int _formatFeatures;
// @since 2.12
protected boolean _cfgEmptyStringsToNull;
/*
/**********************************************************************
/* Input sources
/**********************************************************************
*/
/**
* Need to keep track of underlying {@link Reader} to be able to
* auto-close it (if required to)
*/
protected final Reader _reader;
protected final ParserImpl _yamlParser;
protected final Resolver _yamlResolver = new Resolver();
/*
/**********************************************************************
/* State
/**********************************************************************
*/
/**
* Keep track of the last event read, to get access to Location info
*/
protected Event _lastEvent;
/**
* To keep track of tags ("type ids"), need to either get tags for all
* events, or, keep tag of relevant event that might have it: this is
* different from {@code _lastEvent} in some cases.
*
* @since 2.12
*/
protected Event _lastTagEvent;
/**
* We need to keep track of text values.
*/
protected String _textValue;
/**
* For some tokens (specifically, numbers), we'll have cleaned up version,
* mostly free of underscores
*/
protected String _cleanedTextValue;
/**
* Let's also have a local copy of the current field name
*/
protected String _currentFieldName;
/**
* Flag that is set when current token was derived from an Alias
* (reference to another value's anchor)
*
* @since 2.1
*/
protected boolean _currentIsAlias;
/**
* Anchor for the value that parser currently points to: in case of
* structured types, value whose first token current token is.
*/
protected String _currentAnchor;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
/**
* @deprecated since 2.14, use other constructor
*/
@Deprecated
public YAMLParser(IOContext ctxt, BufferRecycler br,
int parserFeatures, int formatFeatures,
ObjectCodec codec, Reader reader)
{
this(ctxt, parserFeatures, formatFeatures, null, codec, reader);
}
public YAMLParser(IOContext ctxt, int parserFeatures, int formatFeatures,
LoaderOptions loaderOptions, ObjectCodec codec, Reader reader)
{
super(ctxt, parserFeatures);
_objectCodec = codec;
_formatFeatures = formatFeatures;
_reader = reader;
if (loaderOptions == null) {
loaderOptions = new LoaderOptions();
}
_yamlParser = new ParserImpl(new StreamReader(reader), loaderOptions);
_cfgEmptyStringsToNull = Feature.EMPTY_STRING_AS_NULL.enabledIn(formatFeatures);
}
@Override
public ObjectCodec getCodec() {
return _objectCodec;
}
@Override
public void setCodec(ObjectCodec c) {
_objectCodec = c;
}
/*
/**********************************************************
/* Extended YAML-specific API
/**********************************************************
*/
/**
* Method that can be used to check whether current token was
* created from YAML Alias token (reference to an anchor).
*
* @since 2.1
*/
public boolean isCurrentAlias() {
return _currentIsAlias;
}
/**
* Method that can be used to check if the current token has an
* associated anchor (id to reference via Alias)
*
* @deprecated Since 2.3 (was added in 2.1) -- use {@link #getObjectId} instead
*/
@Deprecated
public String getCurrentAnchor() {
return _currentAnchor;
}
/*
/**********************************************************
/* Versioned
/**********************************************************
*/
@Override
public Version version() {
return PackageVersion.VERSION;
}
/*
/**********************************************************
/* Overrides: capability introspection methods
/**********************************************************
*/
@Override
public boolean requiresCustomCodec() { return false;}
@Override
public boolean canReadObjectId() { // yup
return true;
}
@Override
public boolean canReadTypeId() {
return true; // yes, YAML got 'em
}
@Override
public JacksonFeatureSet getReadCapabilities() {
// Defaults are fine; YAML actually has typed scalars (to a degree)
// unlike CSV, Properties and XML
return DEFAULT_READ_CAPABILITIES;
}
/*
/**********************************************************
/* ParserBase method impls
/**********************************************************
*/
@Override
protected void _closeInput() throws IOException {
/* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close()
* on the underlying Reader, unless we "own" it, or auto-closing
* feature is enabled.
* One downside is that when using our optimized
* Reader (granted, we only do that for UTF-32...) this
* means that buffer recycling won't work correctly.
*/
if (_ioContext.isResourceManaged() || isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE)) {
_reader.close();
}
}
/*
/**********************************************************
/* FormatFeature support
/**********************************************************
*/
@Override
public int getFormatFeatures() {
return _formatFeatures;
}
@Override
public JsonParser overrideFormatFeatures(int values, int mask) {
_formatFeatures = (_formatFeatures & ~mask) | (values & mask);
_cfgEmptyStringsToNull = Feature.EMPTY_STRING_AS_NULL.enabledIn(_formatFeatures);
return this;
}
/*
/***************************************************
/* Public API, configuration
/***************************************************
*/
/**
* Method for enabling specified CSV feature
* (check {@link Feature} for list of features)
*/
public JsonParser enable(YAMLParser.Feature f)
{
_formatFeatures |= f.getMask();
_cfgEmptyStringsToNull = Feature.EMPTY_STRING_AS_NULL.enabledIn(_formatFeatures);
return this;
}
/**
* Method for disabling specified CSV feature
* (check {@link Feature} for list of features)
*/
public JsonParser disable(YAMLParser.Feature f)
{
_formatFeatures &= ~f.getMask();
_cfgEmptyStringsToNull = Feature.EMPTY_STRING_AS_NULL.enabledIn(_formatFeatures);
return this;
}
/**
* Method for enabling or disabling specified CSV feature
* (check {@link Feature} for list of features)
*/
public JsonParser configure(YAMLParser.Feature f, boolean state)
{
if (state) {
enable(f);
} else {
disable(f);
}
return this;
}
/**
* Method for checking whether specified CSV {@link Feature}
* is enabled.
*/
public boolean isEnabled(YAMLParser.Feature f) {
return (_formatFeatures & f.getMask()) != 0;
}
// @Override public CsvSchema getSchema()
/*
/**********************************************************
/* Location info
/**********************************************************
*/
@Override // since 2.17
public JsonLocation currentLocation() {
// can assume we are at the end of token now...
if (_lastEvent == null) {
return JsonLocation.NA;
}
return _locationFor(_lastEvent.getEndMark());
}
@Override // since 2.17
public JsonLocation currentTokenLocation()
{
if (_lastEvent == null) {
return JsonLocation.NA;
}
return _locationFor(_lastEvent.getStartMark());
}
@Deprecated // since 2.17
@Override
public JsonLocation getCurrentLocation() { return currentLocation(); }
@Deprecated // since 2.17
@Override
public JsonLocation getTokenLocation() { return currentTokenLocation(); }
protected JsonLocation _locationFor(Mark m)
{
if (m == null) {
return new JsonLocation(_ioContext.contentReference(),
-1, -1, -1);
}
return new JsonLocation(_ioContext.contentReference(),
m.getIndex(),
m.getLine() + 1, // from 0- to 1-based
m.getColumn() + 1); // ditto
}
// Note: SHOULD override 'getTokenLineNr', 'getTokenColumnNr', but those are final in 2.0
/*
/**********************************************************
/* Parsing
/**********************************************************
*/
@SuppressWarnings("deprecation")
@Override
public JsonToken nextToken() throws IOException
{
_currentIsAlias = false;
_binaryValue = null;
if (_closed) {
return null;
}
while (true) {
Event evt;
try {
evt = _yamlParser.getEvent();
} catch (org.yaml.snakeyaml.error.YAMLException e) {
if (e instanceof org.yaml.snakeyaml.error.MarkedYAMLException) {
throw com.fasterxml.jackson.dataformat.yaml.snakeyaml.error.MarkedYAMLException.from
(this, (org.yaml.snakeyaml.error.MarkedYAMLException) e);
}
throw new JacksonYAMLParseException(this, e.getMessage(), e);
} catch (NumberFormatException e) {
// 12-Jan-2024, tatu: As per https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63274
// we seem to have unhandled case by SnakeYAML
throw _constructError(String.format(
"Malformed Number token: failed to tokenize due to (%s): %s",
e.getClass().getName(), e.getMessage()),
e);
}
// is null ok? Assume it is, for now, consider to be same as end-of-doc
if (evt == null) {
_currentAnchor = null;
_lastTagEvent = null;
return (_currToken = null);
}
_lastEvent = evt;
// One complication: field names are only inferred from the fact that we are
// in Object context; they are just ScalarEvents (but separate and NOT just tagged
// on values)
if (_parsingContext.inObject()) {
if (_currToken != JsonToken.FIELD_NAME) {
if (!evt.is(Event.ID.Scalar)) {
_currentAnchor = null;
_lastTagEvent = null;
// end is fine
if (evt.is(Event.ID.MappingEnd)) {
if (!_parsingContext.inObject()) { // sanity check is optional, but let's do it for now
_reportMismatchedEndMarker('}', ']');
}
_parsingContext = _parsingContext.getParent();
return (_currToken = JsonToken.END_OBJECT);
}
_reportError("Expected a field name (Scalar value in YAML), got this instead: "+evt);
}
// 20-Feb-2019, tatu: [dataformats-text#123] Looks like YAML exposes Anchor for Object at point
// where we return START_OBJECT (which makes sense), but, alas, Jackson expects that at point
// after first FIELD_NAME. So we will need to defer clearing of the anchor slightly,
// just for the very first entry; and only if no anchor for name found.
// ... not even 100% sure this is correct, or robust, but does appear to work for specific
// test case given.
final ScalarEvent scalar = (ScalarEvent) evt;
final String newAnchor = scalar.getAnchor();
final boolean firstEntry = (_currToken == JsonToken.START_OBJECT);
if ((newAnchor != null) || !firstEntry) {
_currentAnchor = scalar.getAnchor();
}
// 23-Nov-2020, tatu: [dataformats-text#232] shows case where ref to type id
// needs to be similarly deferred...
if (!firstEntry) {
_lastTagEvent = evt;
}
final String name = scalar.getValue();
_currentFieldName = name;
_parsingContext.setCurrentName(name);
return (_currToken = JsonToken.FIELD_NAME);
}
} else if (_parsingContext.inArray()) {
_parsingContext.expectComma();
}
// Ugh. Why not expose id, to be able to Switch?
_currentAnchor = null;
_lastTagEvent = evt;
// scalar values are probably the commonest:
if (evt.is(Event.ID.Scalar)) {
JsonToken t = _decodeScalar((ScalarEvent) evt);
_currToken = t;
return t;
}
// followed by maps, then arrays
if (evt.is(Event.ID.MappingStart)) {
Mark m = evt.getStartMark();
MappingStartEvent map = (MappingStartEvent) evt;
_currentAnchor = map.getAnchor();
createChildObjectContext(m.getLine(), m.getColumn());
return (_currToken = JsonToken.START_OBJECT);
}
if (evt.is(Event.ID.MappingEnd)) { // actually error; can not have map-end here
_reportError("Not expecting END_OBJECT but a value");
}
if (evt.is(Event.ID.SequenceStart)) {
Mark m = evt.getStartMark();
_currentAnchor = ((NodeEvent)evt).getAnchor();
createChildArrayContext(m.getLine(), m.getColumn());
return (_currToken = JsonToken.START_ARRAY);
}
if (evt.is(Event.ID.SequenceEnd)) {
if (!_parsingContext.inArray()) { // sanity check is optional, but let's do it for now
_reportMismatchedEndMarker(']', '}');
}
_parsingContext = _parsingContext.getParent();
return (_currToken = JsonToken.END_ARRAY);
}
// after this, less common tokens:
if (evt.is(Event.ID.DocumentEnd)) {
// [dataformat-yaml#72]: logical end of doc; fine. Two choices; either skip,
// or return null as marker (but do NOT close). Earlier returned `null`, but
// to allow multi-document reading should actually just skip.
// return (_currToken = null);
continue;
}
if (evt.is(Event.ID.DocumentStart)) {
// DocumentStartEvent dd = (DocumentStartEvent) evt;
// does this matter? Shouldn't, should it?
continue;
}
if (evt.is(Event.ID.Alias)) {
AliasEvent alias = (AliasEvent) evt;
_currentIsAlias = true;
_textValue = alias.getAnchor();
_cleanedTextValue = null;
// for now, nothing to do: in future, maybe try to expose as ObjectIds?
return (_currToken = JsonToken.VALUE_STRING);
}
if (evt.is(Event.ID.StreamEnd)) { // end-of-input; force closure
close();
return (_currToken = null);
}
}
}
protected JsonToken _decodeScalar(ScalarEvent scalar) throws IOException
{
String value = scalar.getValue();
_textValue = value;
_cleanedTextValue = null;
// [dataformats-text#130]: Allow determining whether empty String is
// coerced into null or not
if (!_cfgEmptyStringsToNull && value.isEmpty()) {
return JsonToken.VALUE_STRING;
}
// we may get an explicit tag, if so, use for corroborating...
String typeTag = scalar.getTag();
final int len = value.length();
if (typeTag == null || typeTag.equals("!")) { // no, implicit
Tag nodeTag = _yamlResolver.resolve(NodeId.scalar, value, scalar.getImplicit().canOmitTagInPlainScalar());
if (nodeTag == Tag.STR) {
return JsonToken.VALUE_STRING;
}
if (nodeTag == Tag.INT) {
return _decodeNumberScalar(value, len);
}
if (nodeTag == Tag.FLOAT) {
_numTypesValid = 0;
return _cleanYamlFloat(value);
}
if (nodeTag == Tag.BOOL) {
Boolean B = _matchYAMLBoolean(value, len);
if (B != null) {
return B ? JsonToken.VALUE_TRUE : JsonToken.VALUE_FALSE;
}
} else if (nodeTag == Tag.NULL) {
return JsonToken.VALUE_NULL;
} else {
// what to do with timestamp and binary and merge etc.
return JsonToken.VALUE_STRING;
}
} else { // yes, got type tag
if (typeTag.startsWith("tag:yaml.org,2002:")) {
typeTag = typeTag.substring("tag:yaml.org,2002:".length());
if (typeTag.contains(",")) {
final String[] tags = typeTag.split(",");
typeTag = (tags.length == 0) ? "" : tags[0];
}
}
// [dataformats-text#39]: support binary type
if ("binary".equals(typeTag)) {
// 15-Dec-2017, tatu: 2.9.4 uses Jackson's codec because SnakeYAML does
// not export its codec via OSGi (breaking 2.9.3). Note that trailing
// whitespace is ok with core 2.9.4, but not earlier, so we'll trim
// on purpose here
value = value.trim();
try {
_binaryValue = Base64Variants.MIME.decode(value);
} catch (IllegalArgumentException e) {
_reportError(e.getMessage());
}
return JsonToken.VALUE_EMBEDDED_OBJECT;
}
// canonical values by YAML are actually 'y' and 'n'; but plenty more unofficial:
if ("bool".equals(typeTag)) { // must be "true" or "false"
Boolean B = _matchYAMLBoolean(value, len);
if (B != null) {
return B ? JsonToken.VALUE_TRUE : JsonToken.VALUE_FALSE;
}
} else {
// 17-Sep-2022, tatu: empty String value is not valid number;
// so we could indicate exception or... for now, report as
// String value?
if (len > 0) {
if ("int".equals(typeTag)) {
return _decodeNumberScalar(value, len);
}
if ("float".equals(typeTag)) {
_numTypesValid = 0;
return _cleanYamlFloat(value);
}
if ("null".equals(typeTag)) {
return JsonToken.VALUE_NULL;
}
}
}
}
// any way to figure out actual type? No?
return JsonToken.VALUE_STRING;
}
protected Boolean _matchYAMLBoolean(String value, int len)
{
if (isEnabled(Feature.PARSE_BOOLEAN_LIKE_WORDS_AS_STRINGS)) {
if ("true".equalsIgnoreCase(value)) return Boolean.TRUE;
if ("false".equalsIgnoreCase(value)) return Boolean.FALSE;
} else {
switch (len) {
case 1:
switch (value.charAt(0)) {
case 'y': case 'Y': return Boolean.TRUE;
case 'n': case 'N': return Boolean.FALSE;
}
break;
case 2:
if ("no".equalsIgnoreCase(value)) return Boolean.FALSE;
if ("on".equalsIgnoreCase(value)) return Boolean.TRUE;
break;
case 3:
if ("yes".equalsIgnoreCase(value)) return Boolean.TRUE;
if ("off".equalsIgnoreCase(value)) return Boolean.FALSE;
break;
case 4:
if ("true".equalsIgnoreCase(value)) return Boolean.TRUE;
break;
case 5:
if ("false".equalsIgnoreCase(value)) return Boolean.FALSE;
break;
}
}
return null;
}
protected JsonToken _decodeNumberScalar(String value, final int len)
throws IOException
{
// 05-May-2012, tatu: Turns out this is a hot spot; so let's write it
// out and avoid regexp overhead...
//if (PATTERN_INT.matcher(value).matches()) {
int i;
char ch = value.charAt(0);
if (ch == '-') {
_numberNegative = true;
i = 1;
} else if (ch == '+') {
_numberNegative = false;
if (len == 1) {
return null;
}
i = 1;
} else {
_numberNegative = false;
i = 0;
}
if (len == i) { // should not occur but play it safe
return null;
}
// Next: either "0" ("-0" and "+0" also accepted), or non-decimal. So:
if (value.charAt(i) == '0') {
if (++i == len) {
// can leave "_numberNegative" as is, does not matter
_numberInt = 0;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
ch = value.charAt(i);
switch (ch) {
case 'b': case 'B': // binary
return _decodeNumberIntBinary(value, i+1, len, _numberNegative);
case 'x': case 'X': // hex
return _decodeNumberIntHex(value, i+1, len, _numberNegative);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '_':
return _decodeNumberIntOctal(value, i, len, _numberNegative);
default:
}
// should never occur, but in abundance of caution, let's not
// throw exception but just return as String
return JsonToken.VALUE_STRING;
}
// 23-Nov-2020, tatu: will now check and support all formats EXCEPT
// for 60-base; 60-base is trickier not just because decoding gets
// more involved but also because it can accidentally "detect" values
// that we most likely expressing either Times or IP numbers.
boolean underscores = false;
while (true) {
int c = value.charAt(i);
if (c > '9' || c < '0') {
if (c == '_') {
underscores = true;
} else {
break;
}
}
if (++i == len) {
_numTypesValid = 0;
if (underscores) {
return _cleanYamlInt(value);
}
_cleanedTextValue = _textValue;
return JsonToken.VALUE_NUMBER_INT;
}
}
// 22-Nov-2020, tatu: Should not be needed; SnakeYAML does not
// tag things this way...
// if (PATTERN_FLOAT.matcher(value).matches()) {
// _numTypesValid = 0;
// return _cleanYamlFloat(_textValue);
// }
// 25-Aug-2016, tatu: If we can't actually match it to valid number,
// consider String; better than claiming there's not token
return JsonToken.VALUE_STRING;
}
// @since 2.12
protected JsonToken _decodeNumberIntBinary(final String value, int i, final int origLen,
boolean negative)
throws IOException
{
final String cleansed = _cleanUnderscores(value, i, origLen);
int digitLen = cleansed.length();
if (digitLen <= 31) {
int v = _decodeInt(cleansed, 2);
if (negative) {
v = -v;
}
_numberInt = v;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
if (digitLen <= 63) {
return _decodeFromLong(_decodeLong(cleansed, 2), negative,
(digitLen == 32));
}
return _decodeFromBigInteger(_decodeBigInt(cleansed, 2), negative);
}
// @since 2.12
protected JsonToken _decodeNumberIntOctal(final String value, int i, final int origLen,
boolean negative)
throws IOException
{
final String cleansed = _cleanUnderscores(value, i, origLen);
int digitLen = cleansed.length();
if (digitLen <= 10) { // 30 bits
int v = _decodeInt(cleansed, 8);
if (negative) {
v = -v;
}
_numberInt = v;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
if (digitLen <= 21) { // 63 bits
return _decodeFromLong(_decodeLong(cleansed, 8), negative, false);
}
return _decodeFromBigInteger(_decodeBigInt(cleansed, 8), negative);
}
// @since 2.12
protected JsonToken _decodeNumberIntHex(final String value, int i, final int origLen,
boolean negative)
throws IOException
{
final String cleansed = _cleanUnderscores(value, i, origLen);
int digitLen = cleansed.length();
if (digitLen <= 7) { // 28 bits
int v = _decodeInt(cleansed, 16);
if (negative) {
v = -v;
}
_numberInt = v;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
if (digitLen <= 15) { // 60 bits
return _decodeFromLong(_decodeLong(cleansed, 16), negative,
(digitLen == 8));
}
return _decodeFromBigInteger(_decodeBigInt(cleansed, 16), negative);
}
private JsonToken _decodeFromLong(long unsignedValue, boolean negative,
boolean checkIfInt)
{
long actualValue;
if (negative) {
actualValue = -unsignedValue;
if (checkIfInt && (actualValue >= MIN_INT_L)) {
_numberInt = (int) actualValue;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
} else {
if (checkIfInt && (unsignedValue < MAX_INT_L)) {
_numberInt = (int) unsignedValue;
_numTypesValid = NR_INT;
return JsonToken.VALUE_NUMBER_INT;
}
actualValue = unsignedValue;
}
_numberLong = actualValue;
_numTypesValid = NR_LONG;
return JsonToken.VALUE_NUMBER_INT;
}
private JsonToken _decodeFromBigInteger(BigInteger unsignedValue, boolean negative)
{
// Should we check for bounds here too? Let's not bother yet
if (negative) {
_numberBigInt = unsignedValue.negate();
} else {
_numberBigInt = unsignedValue;
}
_numTypesValid = NR_BIGINT;
return JsonToken.VALUE_NUMBER_INT;
}
// @since 2.14
private int _decodeInt(String str, int base) throws IOException {
try {
return Integer.parseInt(str, base);
} catch (NumberFormatException e) {
return _reportInvalidNumber(str, base, e);
}
}
// @since 2.14
private long _decodeLong(String str, int base) throws IOException {
try {
return Long.parseLong(str, base);
} catch (NumberFormatException e) {
return _reportInvalidNumber(str, base, e);
}
}
// @since 2.14
private BigInteger _decodeBigInt(String numStr, int base) throws IOException {
streamReadConstraints().validateIntegerLength(numStr.length());
try {
return base == 10 ?
NumberInput.parseBigInteger(numStr, isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)) :
NumberInput.parseBigIntegerWithRadix(
numStr, base, isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
} catch (NumberFormatException e) {
return _reportInvalidNumber(numStr, base, e);
}
}
// @since 2.14
private T _reportInvalidNumber(String str, int base, Exception e) throws IOException {
_reportError(String.format("Invalid base-%d number ('%s'), problem: %s",
base, str, e.getMessage()));
return null; // never gets here
}
/*
/**********************************************************
/* String value handling
/**********************************************************
*/
@Override // since 2.17
public String currentName() throws IOException
{
if (_currToken == JsonToken.FIELD_NAME) {
return _currentFieldName;
}
return super.currentName();
}
// NOTE: must override just to avoid problems b/w this and `currentName()`
// calls wrt parent definitions
@Deprecated // since 2.17
@Override
public String getCurrentName() throws IOException {
if (_currToken == JsonToken.FIELD_NAME) {
return _currentFieldName;
}
return super.getCurrentName();
}
// For now we do not store char[] representation...
@Override
public boolean hasTextCharacters() {
return false;
}
@Override
public String getText() throws IOException
{
if (_currToken == JsonToken.VALUE_STRING) {
return _textValue;
}
if (_currToken == JsonToken.FIELD_NAME) {
return _currentFieldName;
}
if (_currToken != null) {
if (_currToken.isScalarValue()) {
return _textValue;
}
return _currToken.asString();
}
return null;
}
@Override
public char[] getTextCharacters() throws IOException {
String text = getText();
return (text == null) ? null : text.toCharArray();
}
@Override
public int getTextLength() throws IOException {
String text = getText();
return (text == null) ? 0 : text.length();
}
@Override
public int getTextOffset() throws IOException {
return 0;
}
@Override // since 2.8
public int getText(Writer writer) throws IOException
{
String str = getText();
if (str == null) {
return 0;
}
writer.write(str);
return str.length();
}
/*
/**********************************************************************
/* Binary (base64)
/**********************************************************************
*/
@Override
public Object getEmbeddedObject() throws IOException {
if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
return _binaryValue;
}
return null;
}
// Base impl from `ParserBase` works fine here:
// public byte[] getBinaryValue(Base64Variant variant) throws IOException
@Override
public int readBinaryValue(Base64Variant b64variant, OutputStream out) throws IOException
{
byte[] b = getBinaryValue(b64variant);
out.write(b);
return b.length;
}
/*
/**********************************************************************
/* Number accessor overrides
/**********************************************************************
*/
@Override // added in 2.17
public NumberTypeFP getNumberTypeFP() throws IOException {
return NumberTypeFP.UNKNOWN;
}
@Override
public Object getNumberValueDeferred() throws IOException {
// 01-Feb-2023, tatu: ParserBase implementation does not quite work
// due to refactoring. So let's try to cobble something together
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
// We might already have suitable value?
if ((_numTypesValid & NR_INT) != 0) {
return _numberInt;
}
if ((_numTypesValid & NR_LONG) != 0) {
return _numberLong;
}
if ((_numTypesValid & NR_BIGINT) != 0) {
return _getBigInteger();
}
if (_cleanedTextValue == null) {
_reportError("Internal number decoding error: `_cleanedTextValue` null when nothing decoded for `JsonToken.VALUE_NUMBER_INT`");
}
return _cleanedTextValue;
}
if (_currToken != JsonToken.VALUE_NUMBER_FLOAT) {
_reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors");
}
// For FP, see if we might have decoded values already
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
return _getBigDecimal();
}
if ((_numTypesValid & NR_DOUBLE) != 0) {
return _getNumberDouble();
}
if ((_numTypesValid & NR_FLOAT) != 0) {
return _getNumberFloat();
}
// But if not, same as BigInteger, let lazy/deferred handling be done
return _cleanedTextValue;
}
@Override
protected void _parseNumericValue(int expType) throws IOException
{
// Int or float?
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
int len = _cleanedTextValue.length();
if (_numberNegative) {
len--;
}
if (len <= 9) { // definitely fits in int
_numberInt = _decodeInt(_cleanedTextValue, 10);
_numTypesValid = NR_INT;
return;
}
if (len <= 18) { // definitely fits AND is easy to parse using 2 int parse calls
long l = Long.parseLong(_cleanedTextValue);
// [JACKSON-230] Could still fit in int, need to check
if (len == 10) {
if (_numberNegative) {
if (l >= Integer.MIN_VALUE) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
} else {
if (l <= Integer.MAX_VALUE) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
}
}
_numberLong = l;
_numTypesValid = NR_LONG;
return;
}
// !!! TODO: implement proper bounds checks; now we'll just use BigInteger for convenience
final String numStr = _cleanedTextValue;
try {
streamReadConstraints().validateIntegerLength(numStr.length());
BigInteger n = NumberInput.parseBigInteger(
numStr, isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
// Could still fit in a long, need to check
if (len == 19 && n.bitLength() <= 63) {
_numberLong = n.longValue();
_numTypesValid = NR_LONG;
return;
}
_numberBigInt = n;
_numTypesValid = NR_BIGINT;
return;
} catch (NumberFormatException nex) {
// NOTE: pass non-cleaned variant for error message
// Can this ever occur? Due to overflow, maybe?
_wrapError("Malformed numeric value '"+_textValue+"'", nex);
}
}
if (_currToken == JsonToken.VALUE_NUMBER_FLOAT) {
// strip out optional underscores, if any:
final String numStr = _cleanedTextValue;
try {
if (expType == NR_BIGDECIMAL) {
streamReadConstraints().validateFPLength(numStr.length());
_numberBigDecimal = NumberInput.parseBigDecimal(
numStr, isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
_numTypesValid = NR_BIGDECIMAL;
} else {
// Otherwise double has to do
streamReadConstraints().validateFPLength(numStr.length());
_numberDouble = NumberInput.parseDouble(numStr, isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
_numTypesValid = NR_DOUBLE;
}
} catch (NumberFormatException nex) {
// Can this ever occur? Due to overflow, maybe?
// NOTE: pass non-cleaned variant for error message
_wrapError("Malformed numeric value '"+_textValue+"'", nex);
}
return;
}
_reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors");
}
@Override
protected int _parseIntValue() throws IOException
{
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
int len = _cleanedTextValue.length();
if (_numberNegative) {
len--;
}
if (len <= 9) { // definitely fits in int
_numberInt = _decodeInt(_cleanedTextValue, 10);
_numTypesValid = NR_INT;
return _numberInt;
}
}
_parseNumericValue(NR_INT);
if ((_numTypesValid & NR_INT) == 0) {
convertNumberToInt();
}
return _numberInt;
}
/*
/**********************************************************************
/* Native id (type id) access
/**********************************************************************
*/
@Override
public String getObjectId() throws IOException
{
return _currentAnchor;
}
@Override
public String getTypeId() throws IOException
{
String tag;
if (_lastTagEvent instanceof CollectionStartEvent) {
tag = ((CollectionStartEvent) _lastTagEvent).getTag();
//System.err.println("getTypeId() at "+currentToken()+", last was collection ("+_lastTagEvent.getClass().getSimpleName()+") -> "+tag);
} else if (_lastTagEvent instanceof ScalarEvent) {
tag = ((ScalarEvent) _lastTagEvent).getTag();
//System.err.println("getTypeId() at "+currentToken()+", last was scalar -> "+tag+", scalar == "+_lastEvent);
} else {
//System.err.println("getTypeId(), something else, curr token: "+currentToken());
return null;
}
if (tag != null) {
// 04-Aug-2013, tatu: Looks like YAML parser's expose these in... somewhat exotic
// ways sometimes. So let's prepare to peel off some wrappings:
while (tag.startsWith("!")) {
tag = tag.substring(1);
}
return tag;
}
return null;
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
/**
* Helper method used to clean up YAML integer value so it can be parsed
* using standard JDK classes.
* Currently this just means stripping out optional underscores.
*/
private JsonToken _cleanYamlInt(String str) throws IOException
{
// Here we already know there is either plus sign, or underscore (or both) so
final int len = str.length();
StringBuilder sb = new StringBuilder(len);
// first: do we have a leading plus sign to skip?
int i = (str.charAt(0) == '+') ? 1 : 0;
for (; i < len; ++i) {
char c = str.charAt(i);
if (c != '_') {
sb.append(c);
}
}
_cleanedTextValue = sb.toString();
if (_cleanedTextValue.isEmpty() || "-".equals(_cleanedTextValue)) {
_reportError(String.format("Invalid number ('%s')", str));
}
return JsonToken.VALUE_NUMBER_INT;
}
private String _cleanUnderscores(String str, int i, final int len)
{
final StringBuilder sb = new StringBuilder(len);
for (; i < len; ++i) {
char ch = str.charAt(i);
if (ch != '_') {
sb.append(ch);
}
}
// tiny optimization: if nothing was trimmed, return String
if (sb.length() == len) {
return str;
}
return sb.toString();
}
private JsonToken _cleanYamlFloat(String str)
{
// Here we do NOT yet know whether we might have underscores so check
final int len = str.length();
int ix = str.indexOf('_');
if (ix < 0 || len == 0) {
_cleanedTextValue = str;
return JsonToken.VALUE_NUMBER_FLOAT;
}
StringBuilder sb = new StringBuilder(len);
// first: do we have a leading plus sign to skip?
int i = (str.charAt(0) == '+') ? 1 : 0;
for (; i < len; ++i) {
char c = str.charAt(i);
if (c != '_') {
sb.append(c);
}
}
_cleanedTextValue = sb.toString();
return JsonToken.VALUE_NUMBER_FLOAT;
}
}