All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.toml.Parser Maven / Gradle / Ivy

package com.fasterxml.jackson.dataformat.toml;

import com.fasterxml.jackson.core.StreamReadFeature;
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.io.NumberInput;
import com.fasterxml.jackson.core.util.VersionUtil;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.ValueNode;

import java.io.IOException;
import java.io.Reader;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.temporal.Temporal;

class Parser {
    private static final JsonNodeFactory factory = new JsonNodeFactoryImpl();
    private static final int MAX_CHARS_TO_REPORT = 1000;

    private final TomlFactory tomlFactory;

    private final TomlStreamReadException.ErrorContext errorContext;
    private final int options;
    private final Lexer lexer;

    private TomlToken next;

    private Parser(
            TomlFactory tomlFactory,
            IOContext ioContext,
            TomlStreamReadException.ErrorContext errorContext,
            int options,
            Reader reader
    ) throws IOException {
        this.tomlFactory = tomlFactory;
        this.errorContext = errorContext;
        this.options = options;
        this.lexer = new Lexer(reader, ioContext, errorContext);
        lexer.prohibitInternalBufferAllocate = (options & TomlWriteFeature.INTERNAL_PROHIBIT_INTERNAL_BUFFER_ALLOCATE) != 0;
        this.next = lexer.yylex();
    }

    @Deprecated // v2.15
    public static ObjectNode parse(
            final IOContext ioContext,
            final int options,
            final Reader reader
    ) throws IOException {
        Parser parser = new Parser(new TomlFactory(), ioContext,
                new TomlStreamReadException.ErrorContext(ioContext.contentReference(), null), options, reader);
        try {
            final ObjectNode node = parser.parse();
            assert parser.getNestingDepth() == 0;
            return node;
        } finally {
            parser.lexer.releaseBuffers();
        }
    }

    /**
     * @param tomlFactory factory with configuration
     * @param ioContext I/O context
     * @param reader character stream
     * @return parsed ObjectNode
     * @throws IOException if there are I/O issues
     * @since v2.15
     */
    public static ObjectNode parse(
            final TomlFactory tomlFactory,
            final IOContext ioContext,
            final Reader reader
    ) throws IOException {
        final TomlFactory factory = tomlFactory == null ? new TomlFactory() : tomlFactory;
        Parser parser = new Parser(factory, ioContext,
                new TomlStreamReadException.ErrorContext(ioContext.contentReference(), null),
                factory.getFormatParserFeatures(), reader);
        try {
            final ObjectNode node = parser.parse();
            assert parser.getNestingDepth() == 0;
            return node;
        } finally {
            parser.lexer.releaseBuffers();
        }
    }

    int getNestingDepth() {
        return lexer.getNestingDepth();
    }

    private TomlToken peek() throws TomlStreamReadException {
        TomlToken here = this.next;
        if (here == null) throw errorContext.atPosition(lexer).generic("Premature end of file");
        return here;
    }

    /**
     * Note: Polling also lexes the next token, so methods like {@link Lexer#yytext()} will not work afterwards
     */
    private TomlToken poll(int nextState) throws IOException {
        TomlToken here = peek();
        lexer.yybegin(nextState);
        next = lexer.yylex();
        return here;
    }

    private void pollExpected(TomlToken expected, int nextState) throws IOException {
        TomlToken actual = poll(nextState);
        if (actual != expected) {
            throw errorContext.atPosition(lexer).unexpectedToken(actual, expected.toString());
        }
    }

    public ObjectNode parse() throws IOException {
        TomlObjectNode root = (TomlObjectNode) factory.objectNode();
        TomlObjectNode currentTable = root;
        while (next != null) {
            TomlToken token = peek();
            if (token == TomlToken.UNQUOTED_KEY || token == TomlToken.STRING) {
                parseKeyVal(currentTable, Lexer.EXPECT_EOL);
            } else if (token == TomlToken.STD_TABLE_OPEN) {
                pollExpected(TomlToken.STD_TABLE_OPEN, Lexer.EXPECT_INLINE_KEY);
                FieldRef fieldRef = parseAndEnterKey(root, true);
                currentTable = getOrCreateObject(fieldRef.object, fieldRef.key);
                if (currentTable.defined) {
                    throw errorContext.atPosition(lexer).generic("Table redefined");
                }
                currentTable.defined = true;
                pollExpected(TomlToken.STD_TABLE_CLOSE, Lexer.EXPECT_EOL);
            } else if (token == TomlToken.ARRAY_TABLE_OPEN) {
                pollExpected(TomlToken.ARRAY_TABLE_OPEN, Lexer.EXPECT_INLINE_KEY);
                FieldRef fieldRef = parseAndEnterKey(root, true);
                TomlArrayNode array = getOrCreateArray(fieldRef.object, fieldRef.key);
                if (array.closed) {
                    throw errorContext.atPosition(lexer).generic("Array already finished");
                }
                currentTable = (TomlObjectNode) array.addObject();
                pollExpected(TomlToken.ARRAY_TABLE_CLOSE, Lexer.EXPECT_EOL);
            } else {
                throw errorContext.atPosition(lexer).unexpectedToken(token, "key or table");
            }
        }
        assert lexer.yyatEOF();
        int eofState = lexer.yystate();
        if (eofState != Lexer.EXPECT_EXPRESSION && eofState != Lexer.EXPECT_EOL) {
            throw errorContext.atPosition(lexer).generic("EOF in wrong state");
        }
        return root;
    }

    private FieldRef parseAndEnterKey(
            TomlObjectNode outer,
            boolean forTable
    ) throws IOException {
        TomlObjectNode node = outer;
        while (true) {
            if (node.closed) {
                throw errorContext.atPosition(lexer).generic("Object already closed");
            }
            if (!forTable) {
                /* "Dotted keys create and define a table for each key part before the last one, provided that such
                 * tables were not previously created." */
                node.defined = true;
            }

            TomlToken partToken = peek();
            String part;
            if (partToken == TomlToken.STRING) {
                part = lexer.textBuffer.contentsAsString();
            } else if (partToken == TomlToken.UNQUOTED_KEY) {
                part = lexer.yytext();
            } else {
                throw errorContext.atPosition(lexer).unexpectedToken(partToken, "quoted or unquoted key");
            }
            pollExpected(partToken, Lexer.EXPECT_INLINE_KEY);
            if (peek() != TomlToken.DOT_SEP) {
                return new FieldRef(node, part);
            }
            pollExpected(TomlToken.DOT_SEP, Lexer.EXPECT_INLINE_KEY);

            JsonNode existing = node.get(part);
            if (existing == null) {
                node = (TomlObjectNode) node.putObject(part);
            } else if (existing.isObject()) {
                node = (TomlObjectNode) existing;
            } else if (existing.isArray()) {
                /* "Any reference to an array of tables points to the most recently defined table element of the array.
                 * This allows you to define sub-tables, and even sub-arrays of tables, inside the most recent table."
                 *
                 * I interpret this somewhat broadly: I accept such references even if there were unrelated tables
                 * in between, and I accept them for simple dotted keys as well (not just for tables). These cases don't
                 * seem to be covered by the specification.
                 */
                TomlArrayNode array = (TomlArrayNode) existing;
                if (array.closed) {
                    throw errorContext.atPosition(lexer).generic("Array already closed");
                }
                // Only arrays declared by array tables are not closed, and those are always arrays of objects.
                node = (TomlObjectNode) array.get(array.size() - 1);
            } else {
                throw errorContext.atPosition(lexer).generic("Path into existing non-object value of type " + existing.getNodeType());
            }
        }
    }

    private JsonNode parseValue(int nextState) throws IOException {
        TomlToken firstToken = peek();
        switch (firstToken) {
            case STRING:
                String text = lexer.textBuffer.contentsAsString();
                pollExpected(TomlToken.STRING, nextState);
                return factory.textNode(text);
            case TRUE:
                pollExpected(TomlToken.TRUE, nextState);
                return factory.booleanNode(true);
            case FALSE:
                pollExpected(TomlToken.FALSE, nextState);
                return factory.booleanNode(false);
            case OFFSET_DATE_TIME:
            case LOCAL_DATE_TIME:
            case LOCAL_DATE:
            case LOCAL_TIME:
                return parseDateTime(nextState);
            case FLOAT:
                return parseFloat(nextState);
            case INTEGER:
                return parseInt(nextState);
            case ARRAY_OPEN:
                return parseArray(nextState);
            case INLINE_TABLE_OPEN:
                return parseInlineTable(nextState);
            default:
                throw errorContext.atPosition(lexer).unexpectedToken(firstToken, "value");
        }
    }

    private JsonNode parseDateTime(int nextState) throws IOException {
        String text = lexer.yytext();
        TomlToken token = poll(nextState);
        // the time-delim index can be [Tt ]. java.time supports only [Tt]
        if ((token == TomlToken.LOCAL_DATE_TIME || token == TomlToken.OFFSET_DATE_TIME) && text.charAt(10) == ' ') {
            text = text.substring(0, 10) + 'T' + text.substring(11);
        }

        if (TomlReadFeature.PARSE_JAVA_TIME.enabledIn(options)) {
            Temporal value;
            if (token == TomlToken.LOCAL_DATE) {
                value = LocalDate.parse(text);
            } else if (token == TomlToken.LOCAL_TIME) {
                value = LocalTime.parse(text);
            } else {
                if (token == TomlToken.LOCAL_DATE_TIME) {
                    value = LocalDateTime.parse(text);
                } else if (token == TomlToken.OFFSET_DATE_TIME) {
                    value = OffsetDateTime.parse(text);
                } else {
                    VersionUtil.throwInternal();
                    throw new AssertionError();
                }
            }
            return factory.pojoNode(value);
        } else {
            return factory.textNode(text);
        }
    }

    private JsonNode parseInt(int nextState) throws IOException {
        char[] buffer = lexer.getTextBuffer();
        int start = lexer.getTextBufferStart();
        int length = lexer.getTextBufferEnd() - lexer.getTextBufferStart();

        for (int i = 0; i < length; i++) {
            if (buffer[start + i] == '_') {
                // slow path to remove underscores
                buffer = new String(buffer, start, length).replace("_", "").toCharArray();
                start = 0;
                length = buffer.length;
                break;
            }
        }

        ValueNode node = parseIntFromBuffer(buffer, start, length);
        pollExpected(TomlToken.INTEGER, nextState);
        return node;
    }

    private ValueNode parseIntFromBuffer(char[] buffer, int start, int length) throws TomlStreamReadException {
        if (length > 2) {
            char baseChar = buffer[start + 1];

            if (baseChar == 'x' || baseChar == 'o' || baseChar == 'b') {
                start += 2;
                length -= 2;
                String text = new String(buffer, start, length);

                try {
                    // note: we parse all these as unsigned. Hence the weird int limits.
                    // hex
                    if (baseChar == 'x') {
                        if (length <= 31 / 4) {
                            return factory.numberNode(Integer.parseInt(text, 16));
                        } else if (length <= 63 / 4) {
                            return factory.numberNode(Long.parseLong(text, 16));
                        } else {
                            return factory.numberNode(NumberInput.parseBigIntegerWithRadix(
                                    text, 16, tomlFactory.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)));
                        }
                    }
                    // octal
                    if (baseChar == 'o') {
                        // this is a bit conservative, but who uses octal anyway?
                        if (length <= 31 / 3) {
                            return factory.numberNode(Integer.parseInt(text, 8));
                        } else if (text.length() <= 63 / 3) {
                            return factory.numberNode(Long.parseLong(text, 8));
                        } else {
                            return factory.numberNode(NumberInput.parseBigIntegerWithRadix(
                                    text, 8, tomlFactory.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)));
                        }
                    }
                    // binary
                    assert baseChar == 'b';
                    if (length <= 31) {
                        return factory.numberNode(Integer.parseUnsignedInt(text, 2));
                    } else if (length <= 63) {
                        return factory.numberNode(Long.parseUnsignedLong(text, 2));
                    } else {
                        return factory.numberNode(NumberInput.parseBigIntegerWithRadix(
                                text, 2, tomlFactory.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)));
                    }
                } catch (NumberFormatException e) {
                    throw errorContext.atPosition(lexer).invalidNumber(e, text);
                }
            }
        }
        // decimal
        boolean negative;
        if (buffer[start] == '-') {
            start++;
            length--;
            negative = true;
        } else if (buffer[start] == '+') {
            start++;
            length--;
            negative = false;
        } else {
            negative = false;
        }
        // adapted from JsonParserBase
        if (length <= 9) {
            int v = NumberInput.parseInt(buffer, start, length);
            if (negative) v = -v;
            return factory.numberNode(v);
        }
        if (length <= 18 || NumberInput.inLongRange(buffer, start, length, negative)) {
            long v = NumberInput.parseLong(buffer, start, length);
            if (negative) v = -v;
            // Might still fit in int, need to check
            if ((int) v == v) {
                return factory.numberNode((int) v);
            } else {
                return factory.numberNode(v);
            }
        }
        String text = null;
        try {
            tomlFactory.streamReadConstraints().validateIntegerLength(length);
        } catch (NumberFormatException | StreamConstraintsException e) {
            final String reportNum = length <= MAX_CHARS_TO_REPORT ?
                    text == null ? new String(buffer, start, length) : text :
                    (text == null ? new String(buffer, start, MAX_CHARS_TO_REPORT) : text.substring(0, MAX_CHARS_TO_REPORT))
                            + " [truncated]";
            throw errorContext.atPosition(lexer).invalidNumber(e, reportNum);
        }
        text = new String(buffer, start, length);
        return factory.numberNode(NumberInput.parseBigInteger(
                text, tomlFactory.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)));
    }

    private JsonNode parseFloat(int nextState) throws IOException {
        final String text = lexer.yytext().replace("_", "");
        pollExpected(TomlToken.FLOAT, nextState);
        if (text.endsWith("nan")) {
            return factory.numberNode(Double.NaN);
        } else if (text.endsWith("inf")) {
            return factory.numberNode(text.startsWith("-") ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY);
        } else {
            try {
                // Related to [databind#4250], need to pre-validate
                if (!NumberInput.looksLikeValidNumber(text)) {
                    throw new NumberFormatException("Not a valid Number representation");
                }
                tomlFactory.streamReadConstraints().validateFPLength(text.length());
                BigDecimal dec = NumberInput.parseBigDecimal(
                        text, tomlFactory.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
                return factory.numberNode(dec);
            } catch (NumberFormatException | StreamConstraintsException e) {
                final String reportNum = text.length() <= MAX_CHARS_TO_REPORT ?
                        text :
                        text.substring(0, MAX_CHARS_TO_REPORT) + " [truncated]";
                throw errorContext.atPosition(lexer).invalidNumber(e, reportNum);
            }
        }
    }

    private ObjectNode parseInlineTable(int nextState) throws IOException {
        // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
        // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
        pollExpected(TomlToken.INLINE_TABLE_OPEN, Lexer.EXPECT_INLINE_KEY);
        TomlObjectNode node = (TomlObjectNode) factory.objectNode();
        while (true) {
            TomlToken token = peek();
            if (token == TomlToken.INLINE_TABLE_CLOSE) {
                if (node.isEmpty()) {
                    break;
                } else {
                    // "A terminating comma (also called trailing comma) is not permitted after the last key/value pair
                    // in an inline table."
                    throw errorContext.atPosition(lexer).generic("Trailing comma not permitted for inline tables");
                }
            }
            parseKeyVal(node, Lexer.EXPECT_TABLE_SEP);
            TomlToken sepToken = peek();
            if (sepToken == TomlToken.INLINE_TABLE_CLOSE) {
                break;
            } else if (sepToken == TomlToken.COMMA) {
                pollExpected(TomlToken.COMMA, Lexer.EXPECT_INLINE_KEY);
            } else {
                throw errorContext.atPosition(lexer).unexpectedToken(sepToken, "comma or table end");
            }
        }
        pollExpected(TomlToken.INLINE_TABLE_CLOSE, nextState);
        node.closed = true;
        node.defined = true;
        return node;
    }

    private ArrayNode parseArray(int nextState) throws IOException {
        // array = array-open [ array-values ] ws-comment-newline array-close
        // array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
        // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
        pollExpected(TomlToken.ARRAY_OPEN, Lexer.EXPECT_VALUE);
        TomlArrayNode node = (TomlArrayNode) factory.arrayNode();
        while (true) {
            TomlToken token = peek();
            if (token == TomlToken.ARRAY_CLOSE) {
                break;
            }
            JsonNode value = parseValue(Lexer.EXPECT_ARRAY_SEP);
            node.add(value);
            TomlToken sepToken = peek();
            if (sepToken == TomlToken.ARRAY_CLOSE) {
                break;
            } else if (sepToken == TomlToken.COMMA) {
                pollExpected(TomlToken.COMMA, Lexer.EXPECT_VALUE);
            } else {
                throw errorContext.atPosition(lexer).unexpectedToken(sepToken, "comma or array end");
            }
        }
        pollExpected(TomlToken.ARRAY_CLOSE, nextState);
        node.closed = true;
        return node;
    }

    private void parseKeyVal(TomlObjectNode target, int nextState) throws IOException {
        // keyval = key keyval-sep val
        FieldRef fieldRef = parseAndEnterKey(target, false);
        pollExpected(TomlToken.KEY_VAL_SEP, Lexer.EXPECT_VALUE);
        JsonNode value = parseValue(nextState);
        if (fieldRef.object.has(fieldRef.key)) {
            throw errorContext.atPosition(lexer).generic("Duplicate key");
        }
        fieldRef.object.set(fieldRef.key, value);
    }

    private TomlObjectNode getOrCreateObject(ObjectNode node, String field) throws TomlStreamReadException {
        JsonNode existing = node.get(field);
        if (existing == null) {
            return (TomlObjectNode) node.putObject(field);
        } else if (existing.isObject()) {
            return (TomlObjectNode) existing;
        } else {
            throw errorContext.atPosition(lexer).generic("Path into existing non-object value of type " + existing.getNodeType());
        }
    }

    private TomlArrayNode getOrCreateArray(ObjectNode node, String field) throws TomlStreamReadException {
        JsonNode existing = node.get(field);
        if (existing == null) {
            return (TomlArrayNode) node.putArray(field);
        } else if (existing.isArray()) {
            return (TomlArrayNode) existing;
        } else {
            throw errorContext.atPosition(lexer).generic("Path into existing non-array value of type " + node.getNodeType());
        }
    }

    private static class FieldRef {
        final TomlObjectNode object;
        final String key;

        FieldRef(TomlObjectNode object, String key) {
            this.object = object;
            this.key = key;
        }
    }

    @SuppressWarnings("serial") // only used internally, no need to be JDK serializable
    private static class TomlObjectNode extends ObjectNode {
        boolean closed = false;
        boolean defined = false;

        TomlObjectNode(JsonNodeFactory nc) {
            super(nc);
        }
    }

    @SuppressWarnings("serial") // only used internally, no need to be JDK serializable
    private static class TomlArrayNode extends ArrayNode {
        boolean closed = false;

        TomlArrayNode(JsonNodeFactory nf) {
            super(nf);
        }

        TomlArrayNode(JsonNodeFactory nf, int capacity) {
            super(nf, capacity);
        }
    }

    @SuppressWarnings("serial") // only used internally, no need to be JDK serializable
    private static class JsonNodeFactoryImpl extends JsonNodeFactory {
        public JsonNodeFactoryImpl() {
            super(true); // exact bigdecimals
        }

        @Override
        public ArrayNode arrayNode() {
            return new TomlArrayNode(this);
        }

        @Override
        public ArrayNode arrayNode(int capacity) {
            return new TomlArrayNode(this, capacity);
        }

        @Override
        public ObjectNode objectNode() {
            return new TomlObjectNode(this);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy