All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.awssdk.codegen.jmespath.parser.JmesPathParser Maven / Gradle / Ivy

/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package software.amazon.awssdk.codegen.jmespath.parser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.function.BiFunction;
import software.amazon.awssdk.codegen.internal.Jackson;
import software.amazon.awssdk.codegen.jmespath.component.AndExpression;
import software.amazon.awssdk.codegen.jmespath.component.BracketSpecifier;
import software.amazon.awssdk.codegen.jmespath.component.BracketSpecifierWithQuestionMark;
import software.amazon.awssdk.codegen.jmespath.component.Comparator;
import software.amazon.awssdk.codegen.jmespath.component.ComparatorExpression;
import software.amazon.awssdk.codegen.jmespath.component.CurrentNode;
import software.amazon.awssdk.codegen.jmespath.component.Expression;
import software.amazon.awssdk.codegen.jmespath.component.ExpressionType;
import software.amazon.awssdk.codegen.jmespath.component.FunctionArg;
import software.amazon.awssdk.codegen.jmespath.component.FunctionExpression;
import software.amazon.awssdk.codegen.jmespath.component.IndexExpression;
import software.amazon.awssdk.codegen.jmespath.component.KeyValueExpression;
import software.amazon.awssdk.codegen.jmespath.component.Literal;
import software.amazon.awssdk.codegen.jmespath.component.MultiSelectHash;
import software.amazon.awssdk.codegen.jmespath.component.MultiSelectList;
import software.amazon.awssdk.codegen.jmespath.component.NotExpression;
import software.amazon.awssdk.codegen.jmespath.component.OrExpression;
import software.amazon.awssdk.codegen.jmespath.component.ParenExpression;
import software.amazon.awssdk.codegen.jmespath.component.PipeExpression;
import software.amazon.awssdk.codegen.jmespath.component.SliceExpression;
import software.amazon.awssdk.codegen.jmespath.component.SubExpression;
import software.amazon.awssdk.codegen.jmespath.component.SubExpressionRight;
import software.amazon.awssdk.codegen.jmespath.component.WildcardExpression;
import software.amazon.awssdk.codegen.jmespath.parser.util.CompositeParser;
import software.amazon.awssdk.utils.Logger;

/**
 * Parses a JMESPath expression string into an {@link Expression}.
 *
 * This implements the grammar described here: https://jmespath.org/specification.html#grammar
 */
public class JmesPathParser {
    private static final Logger log = Logger.loggerFor(JmesPathParser.class);

    private final String input;

    private JmesPathParser(String input) {
        this.input = input;
    }

    /**
     * Parses a JMESPath expression string into a {@link Expression}.
     */
    public static Expression parse(String jmesPathString) {
        return new JmesPathParser(jmesPathString).parse();
    }

    private Expression parse() {
        ParseResult expression = parseExpression(0, input.length());
        if (!expression.hasResult()) {
            throw new IllegalArgumentException("Failed to parse expression.");
        }

        return expression.result();
    }

    /**
     * expression        = sub-expression / index-expression  / comparator-expression
     * expression        =/ or-expression / identifier
     * expression        =/ and-expression / not-expression / paren-expression
     * expression        =/ "*" / multi-select-list / multi-select-hash / literal
     * expression        =/ function-expression / pipe-expression / raw-string
     * expression        =/ current-node
     */
    private ParseResult parseExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        if (startPosition < 0 || endPosition > input.length() + 1) {
            return ParseResult.error();
        }

        return CompositeParser.firstTry(this::parseSubExpression, Expression::subExpression)
                              .thenTry(this::parseIndexExpression, Expression::indexExpression)
                              .thenTry(this::parseNotExpression, Expression::notExpression)
                              .thenTry(this::parseAndExpression, Expression::andExpression)
                              .thenTry(this::parseOrExpression, Expression::orExpression)
                              .thenTry(this::parseComparatorExpression, Expression::comparatorExpression)
                              .thenTry(this::parsePipeExpression, Expression::pipeExpression)
                              .thenTry(this::parseIdentifier, Expression::identifier)
                              .thenTry(this::parseParenExpression, Expression::parenExpression)
                              .thenTry(this::parseWildcardExpression, Expression::wildcardExpression)
                              .thenTry(this::parseMultiSelectList, Expression::multiSelectList)
                              .thenTry(this::parseMultiSelectHash, Expression::multiSelectHash)
                              .thenTry(this::parseLiteral, Expression::literal)
                              .thenTry(this::parseFunctionExpression, Expression::functionExpression)
                              .thenTry(this::parseRawString, Expression::rawString)
                              .thenTry(this::parseCurrentNode, Expression::currentNode)
                              .parse(startPosition, endPosition);
    }

    /**
     * sub-expression    = expression "." ( identifier /
     * multi-select-list /
     * multi-select-hash /
     * function-expression /
     * "*" )
     */
    private ParseResult parseSubExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        List dotPositions = findCharacters(startPosition + 1, endPosition - 1, ".");
        for (Integer dotPosition : dotPositions) {
            ParseResult leftSide = parseExpression(startPosition, dotPosition);
            if (!leftSide.hasResult()) {
                continue;
            }

            ParseResult rightSide =
                CompositeParser.firstTry(this::parseIdentifier, SubExpressionRight::identifier)
                               .thenTry(this::parseMultiSelectList, SubExpressionRight::multiSelectList)
                               .thenTry(this::parseMultiSelectHash, SubExpressionRight::multiSelectHash)
                               .thenTry(this::parseFunctionExpression, SubExpressionRight::functionExpression)
                               .thenTry(this::parseWildcardExpression, SubExpressionRight::wildcardExpression)
                               .parse(dotPosition + 1, endPosition);

            if (!rightSide.hasResult()) {
                continue;
            }

            return ParseResult.success(new SubExpression(leftSide.result(), rightSide.result()));
        }

        logError("sub-expression", "Invalid sub-expression", startPosition);
        return ParseResult.error();
    }

    /**
     * pipe-expression   = expression "|" expression
     */
    private ParseResult parsePipeExpression(int startPosition, int endPosition) {
        return parseBinaryExpression(startPosition, endPosition, "|", PipeExpression::new);
    }

    /**
     * or-expression     = expression "||" expression
     */
    private ParseResult parseOrExpression(int startPosition, int endPosition) {
        return parseBinaryExpression(startPosition, endPosition, "||", OrExpression::new);
    }

    /**
     * and-expression    = expression "&&" expression
     */
    private ParseResult parseAndExpression(int startPosition, int endPosition) {
        return parseBinaryExpression(startPosition, endPosition, "&&", AndExpression::new);
    }

    private  ParseResult parseBinaryExpression(int startPosition, int endPosition, String delimiter,
                                                     BiFunction constructor) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        List delimiterPositions = findCharacters(startPosition + 1, endPosition - 1, delimiter);
        for (Integer delimiterPosition : delimiterPositions) {
            ParseResult leftSide = parseExpression(startPosition, delimiterPosition);
            if (!leftSide.hasResult()) {
                continue;
            }

            ParseResult rightSide = parseExpression(delimiterPosition + delimiter.length(), endPosition);
            if (!rightSide.hasResult()) {
                continue;
            }

            return ParseResult.success(constructor.apply(leftSide.result(), rightSide.result()));
        }

        logError("binary-expression", "Invalid binary-expression", startPosition);
        return ParseResult.error();
    }

    /**
     * not-expression    = "!" expression
     */
    private ParseResult parseNotExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        if (!startsWith(startPosition, '!')) {
            logError("not-expression", "Expected '!'", startPosition);
            return ParseResult.error();
        }

        return parseExpression(startPosition + 1, endPosition).mapResult(NotExpression::new);
    }

    /**
     * paren-expression  = "(" expression ")"
     */
    private ParseResult parseParenExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        if (!startsAndEndsWith(startPosition, endPosition, '(', ')')) {
            logError("paren-expression", "Expected '(' and ')'", startPosition);
            return ParseResult.error();
        }

        return parseExpression(startPosition + 1, endPosition - 1).mapResult(ParenExpression::new);
    }

    /**
     * index-expression  = expression bracket-specifier / bracket-specifier
     */
    private ParseResult parseIndexExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        return CompositeParser.firstTry(this::parseIndexExpressionWithLhsExpression)
                              .thenTry(this::parseBracketSpecifier, b -> IndexExpression.indexExpression(null, b))
                              .parse(startPosition, endPosition);
    }

    /**
     * expression bracket-specifier
     */
    private ParseResult parseIndexExpressionWithLhsExpression(int startPosition, int endPosition) {
        startPosition = trimLeftWhitespace(startPosition, endPosition);
        endPosition = trimRightWhitespace(startPosition, endPosition);

        List bracketPositions = findCharacters(startPosition + 1, endPosition - 1, "[");
        for (Integer bracketPosition : bracketPositions) {
            ParseResult leftSide = parseExpression(startPosition, bracketPosition);
            if (!leftSide.hasResult()) {
                continue;
            }

            ParseResult rightSide = parseBracketSpecifier(bracketPosition, endPosition);
            if (!rightSide.hasResult()) {
                continue;
            }

            return ParseResult.success(IndexExpression.indexExpression(leftSide.result(), rightSide.result()));
        }

        logError("index-expression with lhs-expression", "Invalid index-expression with lhs-expression", startPosition);
        return ParseResult.error();
    }

    /**
     * multi-select-list = "[" ( expression *( "," expression ) ) "]"
     */
    private ParseResult parseMultiSelectList(int startPosition, int endPosition) {
        return parseMultiSelect(startPosition, endPosition, '[', ']', this::parseExpression)
            .mapResult(MultiSelectList::new);
    }

    /**
     * multi-select-hash = "{" ( keyval-expr *( "," keyval-expr ) ) "}"
     */
    private ParseResult parseMultiSelectHash(int startPosition, int endPosition) {
        return parseMultiSelect(startPosition, endPosition, '{', '}', this::parseKeyValueExpression)
            .mapResult(MultiSelectHash::new);
    }

    /**
     * Parses "startDelimiter" ( entryParserType *( "," entryParserType ) ) "endDelimiter"
     * 

* Used by {@link #parseMultiSelectHash}, {@link #parseMultiSelectList}. */ private ParseResult> parseMultiSelect(int startPosition, int endPosition, char startDelimiter, char endDelimiter, Parser entryParser) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (!startsAndEndsWith(startPosition, endPosition, startDelimiter, endDelimiter)) { logError("multi-select", "Expected '" + startDelimiter + "' and '" + endDelimiter + "'", startPosition); return ParseResult.error(); } List commaPositions = findCharacters(startPosition + 1, endPosition - 1, ","); if (commaPositions.isEmpty()) { return entryParser.parse(startPosition + 1, endPosition - 1).mapResult(Collections::singletonList); } List results = new ArrayList<>(); // Find first valid entries before a comma int startOfSecondEntry = -1; for (Integer comma : commaPositions) { ParseResult result = entryParser.parse(startPosition + 1, comma); if (!result.hasResult()) { continue; } results.add(result.result()); startOfSecondEntry = comma + 1; } if (results.size() == 0) { logError("multi-select", "Invalid value", startPosition + 1); return ParseResult.error(); } if (results.size() > 1) { logError("multi-select", "Ambiguous separation", startPosition); return ParseResult.error(); } // Find any subsequent entries int startPositionAfterComma = startOfSecondEntry; for (Integer commaPosition : commaPositions) { if (startPositionAfterComma > commaPosition) { continue; } ParseResult entry = entryParser.parse(startPositionAfterComma, commaPosition); if (!entry.hasResult()) { continue; } results.add(entry.result()); startPositionAfterComma = commaPosition + 1; } ParseResult entry = entryParser.parse(startPositionAfterComma, endPosition - 1); if (!entry.hasResult()) { logError("multi-select", "Ambiguous separation", startPosition); return ParseResult.error(); } results.add(entry.result()); return ParseResult.success(results); } /** * keyval-expr = identifier ":" expression */ private ParseResult parseKeyValueExpression(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); List delimiterPositions = findCharacters(startPosition + 1, endPosition - 1, ":"); for (Integer delimiterPosition : delimiterPositions) { ParseResult identifier = parseIdentifier(startPosition, delimiterPosition); if (!identifier.hasResult()) { continue; } ParseResult expression = parseExpression(delimiterPosition + 1, endPosition); if (!expression.hasResult()) { continue; } return ParseResult.success(new KeyValueExpression(identifier.result(), expression.result())); } logError("keyval-expr", "Invalid keyval-expr", startPosition); return ParseResult.error(); } /** * bracket-specifier = "[" (number / "*" / slice-expression) "]" / "[]" * bracket-specifier =/ "[?" expression "]" */ private ParseResult parseBracketSpecifier(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (!startsAndEndsWith(startPosition, endPosition, '[', ']')) { logError("bracket-specifier", "Expecting '[' and ']'", startPosition); return ParseResult.error(); } // "[]" if (charsInRange(startPosition, endPosition) == 2) { return ParseResult.success(BracketSpecifier.withoutContents()); } // "[?" expression "]" if (input.charAt(startPosition + 1) == '?') { return parseExpression(startPosition + 2, endPosition - 1) .mapResult(e -> BracketSpecifier.withQuestionMark(new BracketSpecifierWithQuestionMark(e))); } // "[" (number / "*" / slice-expression) "]" return CompositeParser.firstTry(this::parseNumber, BracketSpecifier::withNumberContents) .thenTry(this::parseWildcardExpression, BracketSpecifier::withWildcardExpressionContents) .thenTry(this::parseSliceExpression, BracketSpecifier::withSliceExpressionContents) .parse(startPosition + 1, endPosition - 1); } /** * comparator-expression = expression comparator expression */ private ParseResult parseComparatorExpression(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); for (Comparator comparator : Comparator.values()) { List comparatorPositions = findCharacters(startPosition, endPosition, comparator.tokenSymbol()); for (Integer comparatorPosition : comparatorPositions) { ParseResult lhsExpression = parseExpression(startPosition, comparatorPosition); if (!lhsExpression.hasResult()) { continue; } ParseResult rhsExpression = parseExpression(comparatorPosition + comparator.tokenSymbol().length(), endPosition); if (!rhsExpression.hasResult()) { continue; } return ParseResult.success(new ComparatorExpression(lhsExpression.result(), comparator, rhsExpression.result())); } } logError("comparator-expression", "Invalid comparator expression", startPosition); return ParseResult.error(); } /** * slice-expression = [number] ":" [number] [ ":" [number] ] */ private ParseResult parseSliceExpression(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); // Find the first colon int firstColonIndex = input.indexOf(':', startPosition); if (firstColonIndex < 0 || firstColonIndex >= endPosition) { logError("slice-expression", "Expected slice expression", startPosition); return ParseResult.error(); } // Find the second colon (if it exists) int maybeSecondColonIndex = input.indexOf(':', firstColonIndex + 1); OptionalInt secondColonIndex = maybeSecondColonIndex < 0 || maybeSecondColonIndex >= endPosition ? OptionalInt.empty() : OptionalInt.of(maybeSecondColonIndex); // Find the first number bounds (if it exists) int firstNumberStart = startPosition; int firstNumberEnd = firstColonIndex; // Find the second number bounds (if it exists) int secondNumberStart = firstColonIndex + 1; int secondNumberEnd = secondColonIndex.orElse(endPosition); // Find the third number bounds (if it exists) int thirdNumberStart = secondColonIndex.orElse(endPosition) + 1; int thirdNumberEnd = endPosition; // Parse the first number (if it exists) Optional firstNumber = Optional.empty(); if (firstNumberStart < firstNumberEnd) { ParseResult firstNumberParse = parseNumber(firstNumberStart, firstNumberEnd); if (!firstNumberParse.hasResult()) { return ParseResult.error(); } firstNumber = Optional.of(firstNumberParse.result()); } // Parse the second number (if it exists) Optional secondNumber = Optional.empty(); if (secondNumberStart < secondNumberEnd) { ParseResult secondNumberParse = parseNumber(secondNumberStart, secondNumberEnd); if (!secondNumberParse.hasResult()) { return ParseResult.error(); } secondNumber = Optional.of(secondNumberParse.result()); } // Parse the third number (if it exists) Optional thirdNumber = Optional.empty(); if (thirdNumberStart < thirdNumberEnd) { ParseResult thirdNumberParse = parseNumber(thirdNumberStart, thirdNumberEnd); if (!thirdNumberParse.hasResult()) { return ParseResult.error(); } thirdNumber = Optional.of(thirdNumberParse.result()); } return ParseResult.success(new SliceExpression(firstNumber.orElse(null), secondNumber.orElse(null), thirdNumber.orElse(null))); } /** * function-expression = unquoted-string ( no-args / one-or-more-args ) */ private ParseResult parseFunctionExpression(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); int paramIndex = input.indexOf('(', startPosition); if (paramIndex <= 0) { logError("function-expression", "Expected function", startPosition); return ParseResult.error(); } ParseResult functionNameParse = parseUnquotedString(startPosition, paramIndex); if (!functionNameParse.hasResult()) { logError("function-expression", "Expected valid function name", startPosition); return ParseResult.error(); } return CompositeParser.firstTry(this::parseNoArgs) .thenTry(this::parseOneOrMoreArgs) .parse(paramIndex, endPosition) .mapResult(args -> new FunctionExpression(functionNameParse.result(), args)); } /** * no-args = "(" ")" */ private ParseResult> parseNoArgs(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (!startsWith(startPosition, '(')) { logError("no-args", "Expected '('", startPosition); return ParseResult.error(); } int closePosition = trimLeftWhitespace(startPosition + 1, endPosition); if (input.charAt(closePosition) != ')') { logError("no-args", "Expected ')'", closePosition); return ParseResult.error(); } if (closePosition + 1 != endPosition) { logError("no-args", "Unexpected character", closePosition + 1); return ParseResult.error(); } return ParseResult.success(Collections.emptyList()); } /** * one-or-more-args = "(" ( function-arg *( "," function-arg ) ) ")" */ private ParseResult> parseOneOrMoreArgs(int startPosition, int endPosition) { return parseMultiSelect(startPosition, endPosition, '(', ')', this::parseFunctionArg); } /** * function-arg = expression / expression-type */ private ParseResult parseFunctionArg(int startPosition, int endPosition) { return CompositeParser.firstTry(this::parseExpression, FunctionArg::expression) .thenTry(this::parseExpressionType, FunctionArg::expressionType) .parse(startPosition, endPosition); } /** * current-node = "@" */ private ParseResult parseCurrentNode(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); return parseExpectedToken("current-node", startPosition, endPosition, '@').mapResult(x -> new CurrentNode()); } /** * expression-type = "&" expression */ private ParseResult parseExpressionType(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (!startsWith(startPosition, '&')) { logError("expression-type", "Expected '&'", startPosition); return ParseResult.error(); } return parseExpression(startPosition + 1, endPosition).mapResult(ExpressionType::new); } /** * raw-string = "'" *raw-string-char "'" */ private ParseResult parseRawString(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (charsInRange(startPosition, endPosition) < 2) { logError("raw-string", "Invalid length", startPosition); return ParseResult.error(); } if (!startsAndEndsWith(startPosition, endPosition, '\'', '\'')) { logError("raw-string", "Expected opening and closing \"'\"", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) == 2) { return ParseResult.success(""); } return parseRawStringChars(startPosition + 1, endPosition - 1); } /** * raw-string-char = (%x20-26 / %x28-5B / %x5D-10FFFF) / preserved-escape / raw-string-escape */ private ParseResult parseRawStringChars(int startPosition, int endPosition) { StringBuilder result = new StringBuilder(); for (int i = startPosition; i < endPosition; i++) { ParseResult rawStringChar = parseLegalRawStringChar(i, i + 1); if (rawStringChar.hasResult()) { result.append(rawStringChar.result()); continue; } ParseResult preservedEscape = parsePreservedEscape(i, i + 2); if (preservedEscape.hasResult()) { result.append(preservedEscape.result()); ++i; continue; } ParseResult rawStringEscape = parseRawStringEscape(i, i + 2); if (rawStringEscape.hasResult()) { result.append(rawStringEscape.result()); ++i; continue; } logError("raw-string", "Unexpected character", i); return ParseResult.error(); } return ParseResult.success(result.toString()); } /** * %x20-26 / %x28-5B / %x5D-10FFFF */ private ParseResult parseLegalRawStringChar(int startPosition, int endPosition) { if (charsInRange(startPosition, endPosition) != 1) { logError("raw-string-chars", "Invalid bounds", startPosition); return ParseResult.error(); } if (!isLegalRawStringChar(input.charAt(startPosition))) { logError("raw-string-chars", "Invalid character in sequence", startPosition); return ParseResult.error(); } return ParseResult.success(input.substring(startPosition, endPosition)); } private boolean isLegalRawStringChar(char c) { return (c >= 0x20 && c <= 0x26) || (c >= 0x28 && c <= 0x5B) || (c >= 0x5D); } /** * preserved-escape = escape (%x20-26 / %28-5B / %x5D-10FFFF) */ private ParseResult parsePreservedEscape(int startPosition, int endPosition) { if (endPosition > input.length()) { logError("preserved-escape", "Invalid end position", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) != 2) { logError("preserved-escape", "Invalid length", startPosition); return ParseResult.error(); } if (!startsWith(startPosition, '\\')) { logError("preserved-escape", "Expected \\", startPosition); return ParseResult.error(); } return parseLegalRawStringChar(startPosition + 1, endPosition).mapResult(v -> "\\" + v); } /** * raw-string-escape = escape ("'" / escape) */ private ParseResult parseRawStringEscape(int startPosition, int endPosition) { if (endPosition > input.length()) { logError("preserved-escape", "Invalid end position", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) != 2) { logError("raw-string-escape", "Invalid length", startPosition); return ParseResult.error(); } if (!startsWith(startPosition, '\\')) { logError("raw-string-escape", "Expected '\\'", startPosition); return ParseResult.error(); } if (input.charAt(startPosition + 1) != '\'' && input.charAt(startPosition + 1) != '\\') { logError("raw-string-escape", "Expected \"'\" or \"\\\"", startPosition); return ParseResult.error(); } return ParseResult.success(input.substring(startPosition, endPosition)); } /** * literal = "`" json-value "`" */ private ParseResult parseLiteral(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (charsInRange(startPosition, endPosition) < 2) { logError("literal", "Invalid bounds", startPosition); return ParseResult.error(); } if (!startsAndEndsWith(startPosition, endPosition, '`', '`')) { logError("literal", "Expected opening and closing '`'", startPosition); return ParseResult.error(); } StringBuilder jsonString = new StringBuilder(); for (int i = startPosition + 1; i < endPosition - 1; i++) { char character = input.charAt(i); if (character == '`') { int lastChar = i - 1; if (lastChar <= 0) { logError("literal", "Unexpected '`'", startPosition); return ParseResult.error(); } int escapeCount = 0; for (int j = i - 1; j >= startPosition; j--) { if (input.charAt(j) == '\\') { ++escapeCount; } else { break; } } if (escapeCount % 2 == 0) { logError("literal", "Unescaped '`'", startPosition); return ParseResult.error(); } jsonString.setLength(jsonString.length() - 1); // Remove escape. jsonString.append('`'); } else { jsonString.append(character); } } try { return ParseResult.success(new Literal(Jackson.readJrsValue(jsonString.toString()))); } catch (IOException e) { logError("literal", "Invalid JSON: " + e.getMessage(), startPosition); return ParseResult.error(); } } /** * number = ["-"]1*digit * digit = %x30-39 */ private ParseResult parseNumber(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (startsWith(startPosition, '-')) { return parseNonNegativeNumber(startPosition + 1, endPosition).mapResult(i -> -i); } return parseNonNegativeNumber(startPosition, endPosition); } private ParseResult parseNonNegativeNumber(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (charsInRange(startPosition, endPosition) < 1) { logError("number", "Expected number", startPosition); return ParseResult.error(); } try { return ParseResult.success(Integer.parseInt(input.substring(startPosition, endPosition))); } catch (NumberFormatException e) { logError("number", "Expected number", startPosition); return ParseResult.error(); } } /** * identifier = unquoted-string / quoted-string */ private ParseResult parseIdentifier(int startPosition, int endPosition) { return CompositeParser.firstTry(this::parseUnquotedString) .thenTry(this::parseQuotedString) .parse(startPosition, endPosition); } /** * unquoted-string = (%x41-5A / %x61-7A / %x5F) *( ; A-Za-z_ * %x30-39 / ; 0-9 * %x41-5A / ; A-Z * %x5F / ; _ * %x61-7A) ; a-z */ private ParseResult parseUnquotedString(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (charsInRange(startPosition, endPosition) < 1) { logError("unquoted-string", "Invalid unquoted-string", startPosition); return ParseResult.error(); } char firstToken = input.charAt(startPosition); if (!Character.isLetter(firstToken) && firstToken != '_') { logError("unquoted-string", "Unescaped strings must start with [A-Za-z_]", startPosition); return ParseResult.error(); } for (int i = startPosition; i < endPosition; i++) { char c = input.charAt(i); if (!Character.isLetterOrDigit(c) && c != '_') { logError("unquoted-string", "Invalid character in unescaped-string", i); return ParseResult.error(); } } return ParseResult.success(input.substring(startPosition, endPosition)); } /** * quoted-string = quote 1*(unescaped-char / escaped-char) quote * quote = '"' */ private ParseResult parseQuotedString(int startPosition, int endPosition) { startPosition = trimLeftWhitespace(startPosition, endPosition); endPosition = trimRightWhitespace(startPosition, endPosition); if (!startsAndEndsWith(startPosition, endPosition, '"', '"')) { logError("quoted-string", "Expected opening and closing '\"'", startPosition); return ParseResult.error(); } int stringStart = startPosition + 1; int stringEnd = endPosition - 1; int stringTokenCount = charsInRange(stringStart, stringEnd); if (stringTokenCount < 1) { logError("quoted-string", "Invalid quoted-string", startPosition); return ParseResult.error(); } StringBuilder result = new StringBuilder(); for (int i = stringStart; i < stringEnd; i++) { ParseResult unescapedChar = parseUnescapedChar(i, i + 1); if (unescapedChar.hasResult()) { result.append(unescapedChar.result()); continue; } ParseResult escapedChar = parseEscapedChar(i, i + 2); if (escapedChar.hasResult()) { result.append(escapedChar.result()); ++i; continue; } ParseResult escapedUnicodeSequence = parseEscapedUnicodeSequence(i, i + 6); if (escapedUnicodeSequence.hasResult()) { result.append(escapedUnicodeSequence.result()); i += 5; continue; } if (input.charAt(i) == '\\') { logError("quoted-string", "Unsupported escape sequence", i); } else { logError("quoted-string", "Unexpected character", i); } return ParseResult.error(); } return ParseResult.success(result.toString()); } /** * unescaped-char = %x20-21 / %x23-5B / %x5D-10FFFF */ private ParseResult parseUnescapedChar(int startPosition, int endPosition) { for (int i = startPosition; i < endPosition; i++) { if (!isLegalUnescapedChar(input.charAt(i))) { logError("unescaped-char", "Invalid character in sequence", startPosition); return ParseResult.error(); } } return ParseResult.success(input.substring(startPosition, endPosition)); } private boolean isLegalUnescapedChar(char c) { return (c >= 0x20 && c <= 0x21) || (c >= 0x23 && c <= 0x5B) || (c >= 0x5D); } /** * escaped-char = escape ( * %x22 / ; " quotation mark U+0022 * %x5C / ; \ reverse solidus U+005C * %x2F / ; / solidus U+002F * %x62 / ; b backspace U+0008 * %x66 / ; f form feed U+000C * %x6E / ; n line feed U+000A * %x72 / ; r carriage return U+000D * %x74 / ; t tab U+0009 * %x75 4HEXDIG ) ; uXXXX U+XXXX (this is handled as part of parseEscapedUnicodeSequence) */ private ParseResult parseEscapedChar(int startPosition, int endPosition) { if (endPosition > input.length()) { logError("escaped-char", "Invalid end position", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) != 2) { logError("escaped-char", "Invalid length", startPosition); return ParseResult.error(); } if (!startsWith(startPosition, '\\')) { logError("escaped-char", "Expected '\\'", startPosition); return ParseResult.error(); } char escapedChar = input.charAt(startPosition + 1); switch (escapedChar) { case '"': return ParseResult.success("\""); case '\\': return ParseResult.success("\\"); case '/': return ParseResult.success("/"); case 'b': return ParseResult.success("\b"); case 'f': return ParseResult.success("\f"); case 'n': return ParseResult.success("\n"); case 'r': return ParseResult.success("\r"); case 't': return ParseResult.success("\t"); default: logError("escaped-char", "Invalid escape sequence", startPosition); return ParseResult.error(); } } private ParseResult parseEscapedUnicodeSequence(int startPosition, int endPosition) { if (endPosition > input.length()) { logError("escaped-unicode-sequence", "Invalid end position", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) != 6) { logError("escaped-unicode-sequence", "Invalid length", startPosition); return ParseResult.error(); } if (input.charAt(startPosition) != '\\') { logError("escaped-unicode-sequence", "Expected '\\'", startPosition); return ParseResult.error(); } char escapedChar = input.charAt(startPosition + 1); if (escapedChar != 'u') { logError("escaped-unicode-sequence", "Invalid escape sequence", startPosition); return ParseResult.error(); } String unicodePattern = input.substring(startPosition + 2, startPosition + 2 + 4); char unicodeChar; try { unicodeChar = (char) Integer.parseInt(unicodePattern, 16); } catch (NumberFormatException e) { logError("escaped-unicode-sequence", "Invalid unicode hex sequence", startPosition); return ParseResult.error(); } return ParseResult.success(String.valueOf(unicodeChar)); } /** * "*" */ private ParseResult parseWildcardExpression(int startPosition, int endPosition) { return parseExpectedToken("star-expression", startPosition, endPosition, '*').mapResult(v -> new WildcardExpression()); } private int charsInRange(int startPosition, int endPosition) { return endPosition - startPosition; } private List findCharacters(int startPosition, int endPosition, String symbol) { List results = new ArrayList<>(); int start = startPosition; while (true) { int match = input.indexOf(symbol, start); if (match < 0 || match >= endPosition) { break; } results.add(match); start = match + 1; } return results; } private ParseResult parseExpectedToken(String parser, int startPosition, int endPosition, char expectedToken) { if (input.charAt(startPosition) != expectedToken) { logError(parser, "Expected '" + expectedToken + "'", startPosition); return ParseResult.error(); } if (charsInRange(startPosition, endPosition) != 1) { logError(parser, "Unexpected character", startPosition + 1); return ParseResult.error(); } return ParseResult.success(expectedToken); } private int trimLeftWhitespace(int startPosition, int endPosition) { while (input.charAt(startPosition) == ' ' && startPosition < endPosition - 1) { ++startPosition; } return startPosition; } private int trimRightWhitespace(int startPosition, int endPosition) { while (input.charAt(endPosition - 1) == ' ' && startPosition < endPosition - 1) { --endPosition; } return endPosition; } private boolean startsWith(int startPosition, char character) { return input.charAt(startPosition) == character; } private boolean endsWith(int endPosition, char character) { return input.charAt(endPosition - 1) == character; } private boolean startsAndEndsWith(int startPosition, int endPosition, char startChar, char endChar) { return startsWith(startPosition, startChar) && endsWith(endPosition, endChar); } private void logError(String parser, String message, int position) { log.debug(() -> parser + " at " + position + ": " + message); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy