All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.truffle.js.builtins.json.TruffleJSONParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * The Universal Permissive License (UPL), Version 1.0
 *
 * Subject to the condition set forth below, permission is hereby granted to any
 * person obtaining a copy of this software, associated documentation and/or
 * data (collectively the "Software"), free of charge and under any and all
 * copyright rights in the Software, and any and all patent rights owned or
 * freely licensable by each licensor hereunder covering either (i) the
 * unmodified Software as contributed to or provided by such licensor, or (ii)
 * the Larger Works (as defined below), to deal in both
 *
 * (a) the Software, and
 *
 * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
 * one is included with the Software each a "Larger Work" to which the Software
 * is contributed by such licensors),
 *
 * without restriction, including without limitation the rights to copy, create
 * derivative works of, display, perform, and distribute the Software and make,
 * use, sell, offer for sale, import, export, have made, and have sold the
 * Software and the Larger Work(s), and to sublicense the foregoing rights on
 * either these or other terms.
 *
 * This license is subject to the following condition:
 *
 * The above copyright notice and either this complete permission notice or at a
 * minimum a reference to the UPL must be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.oracle.truffle.js.builtins.json;

import com.oracle.js.parser.ParserException;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.api.strings.TruffleStringBuilderUTF16;
import com.oracle.truffle.js.runtime.Errors;
import com.oracle.truffle.js.runtime.JSContext;
import com.oracle.truffle.js.runtime.JSException;
import com.oracle.truffle.js.runtime.JSRealm;
import com.oracle.truffle.js.runtime.JSRuntime;
import com.oracle.truffle.js.runtime.Strings;
import com.oracle.truffle.js.runtime.array.ScriptArray;
import com.oracle.truffle.js.runtime.builtins.JSAbstractArray;
import com.oracle.truffle.js.runtime.builtins.JSArray;
import com.oracle.truffle.js.runtime.builtins.JSArrayObject;
import com.oracle.truffle.js.runtime.builtins.JSOrdinary;
import com.oracle.truffle.js.runtime.objects.JSObject;
import com.oracle.truffle.js.runtime.objects.Null;

public final class TruffleJSONParser {

    protected final Mode mode;
    protected final boolean withSource;
    protected final JSContext context;

    protected int pos;
    protected int len;
    protected TruffleString parseStr;
    protected int parseDepth;

    protected static final int MAX_PARSE_DEPTH = 100000;

    private static final String MALFORMED_NUMBER = "malformed number";

    public enum Mode {
        RawJSON,
        WithoutReviver,
        WithReviver,
        WithReviverAndSource;
    }

    public TruffleJSONParser(JSContext context) {
        this(context, Mode.WithoutReviver);
    }

    public TruffleJSONParser(JSContext context, Mode mode) {
        this.context = context;
        this.mode = mode;
        this.withSource = mode == Mode.WithReviverAndSource;
    }

    public Object parse(TruffleString value, JSRealm realm) {
        this.pos = 0;
        this.parseDepth = 0;
        this.parseStr = value;
        this.len = Strings.length(parseStr);
        try {
            if (mode != Mode.RawJSON) {
                skipWhitespace();
            } else if (isObjectOrArrayStart(get())) {
                throw unexpectedToken();
            }
            Object result = parseJSONValue(realm);
            if (mode != Mode.RawJSON) {
                skipWhitespace();
            }
            if (posValid()) {
                throw error("JSON cannot be fully parsed");
            }
            return result;
        } catch (StackOverflowError ex) {
            throw stackOverflowError();
        } catch (JSException ex) {
            throw ex;
        } catch (IndexOutOfBoundsException ex) {
            throw syntaxError(unexpectedEndOfInputMessage());
        } catch (Exception ex) {
            throw syntaxError(null);
        } finally {
            parseStr = null;
        }
    }

    private String unexpectedEndOfInputMessage() {
        return context.isOptionNashornCompatibilityMode() ? "Unexpected end of input" : "Unexpected end of JSON input";
    }

    protected Object parseJSONValue(JSRealm realm) {
        char c = get();

        if (isStringQuote(c)) {
            return parseJSONString();
        } else if (isObjectStart(c)) {
            return parseJSONObject(realm);
        } else if (isArrayStart(c)) {
            return parseJSONArray(realm);
        } else if (isNullLiteral(c)) {
            return parseNullLiteral();
        } else if (isBooleanLiteral(c)) {
            return parseBooleanLiteral();
        } else if (isNumber(c)) {
            return parseJSONNumber();
        }
        throw unexpectedToken();
    }

    protected static boolean isNumber(char cur) {
        return cur == '-' || JSRuntime.isAsciiDigit(cur);
    }

    protected static boolean isObjectStart(char c) {
        return c == '{';
    }

    protected static boolean isArrayStart(char c) {
        return c == '[';
    }

    protected static boolean isObjectOrArrayStart(char c) {
        return isObjectStart(c) || isArrayStart(c);
    }

    private Object parseJSONObject(JSRealm realm) {
        assert isObjectStart(get());
        incDepth();
        skipChar('{');
        skipWhitespace();
        JSObject object = JSOrdinary.create(context, realm);
        JSONParseRecord parseRecord = null;
        if (withSource) {
            parseRecord = JSONParseRecord.forObject(object);
        }
        if (get() != '}') {
            parseJSONMemberList(object, realm, parseRecord);
            if (get() != '}') {
                if (get() == '"') {
                    throw unexpectedString();
                } else {
                    throw unexpectedToken();
                }
            }
        }
        skipWhitespace();
        skipChar('}');
        decDepth();
        if (withSource) {
            return parseRecord;
        }
        return object;
    }

    private void parseJSONMemberList(JSObject object, JSRealm realm, JSONParseRecord parseRecord) {
        while (true) {
            parseJSONMember(object, realm, parseRecord);

            skipWhitespace();
            if (get() == ',') {
                skipChar(',');
                skipWhitespace();
            } else {
                break;
            }
        }
    }

    private void parseJSONMember(JSObject object, JSRealm realm, JSONParseRecord parseRecord) {
        TruffleString jsonKey = getJSONString();
        skipWhitespace();
        if (get(pos) != ':') {
            throw error("Expected ':' after property name in JSON");
        }
        skipChar(':');
        skipWhitespace();
        Object jsonValue = parseJSONValue(realm);
        if (withSource) {
            JSONParseRecord entryRecord = (JSONParseRecord) jsonValue;
            jsonValue = entryRecord.value();
            parseRecord.entries().put(jsonKey, entryRecord);
        }
        JSRuntime.createDataProperty(object, jsonKey, jsonValue);
    }

    private Object parseJSONArray(JSRealm realm) {
        assert isArrayStart(get());
        incDepth();
        skipChar('[');
        skipWhitespace();
        JSArrayObject array = JSArray.createEmptyZeroLength(context, realm);
        JSONParseRecord parseRecord = null;
        if (withSource) {
            parseRecord = JSONParseRecord.forArray(array);
        }
        if (get() != ']') {
            parseJSONElementList(array, realm, parseRecord);
            if (get() != ']') {
                throw error("closing quote ] expected");
            }
        }
        skipWhitespace();
        skipChar(']');
        decDepth();
        if (withSource) {
            return parseRecord;
        }
        return array;
    }

    private void incDepth() {
        this.parseDepth++;
        if (this.parseDepth > MAX_PARSE_DEPTH) {
            throw stackOverflowError();
        }
    }

    protected static RuntimeException stackOverflowError() {
        throw Errors.createRangeError("Cannot parse JSON constructs nested that deep");
    }

    protected static RuntimeException syntaxError(String msg) {
        throw Errors.createSyntaxError(msg == null ? "Cannot parse JSON" : msg);
    }

    protected void decDepth() {
        this.parseDepth--;
    }

    protected void parseJSONElementList(JSArrayObject arrayObject, JSRealm realm, JSONParseRecord parseRecord) {
        int index = 0;
        ScriptArray scriptArray = JSAbstractArray.arrayGetArrayType(arrayObject);
        while (true) {
            Object jsonValue = parseJSONValue(realm);
            if (withSource) {
                JSONParseRecord elementRecord = (JSONParseRecord) jsonValue;
                jsonValue = elementRecord.value();
                assert parseRecord.elements().size() == index;
                parseRecord.elements().add(elementRecord);
            }
            scriptArray = scriptArray.setElement(arrayObject, index, jsonValue, false);

            skipWhitespace();
            if (get() == ',') {
                skipChar(',');
                skipWhitespace();
                index++;
            } else {
                break;
            }
        }
        JSAbstractArray.arraySetArrayType(arrayObject, scriptArray);
    }

    protected Object parseJSONString() {
        int startPos = pos;
        TruffleString parsedString = getJSONString();
        if (withSource) {
            return parseRecordForLiteral(parsedString, startPos);
        } else {
            return parsedString;
        }
    }

    protected TruffleString getJSONString() {
        if (!isStringQuote(get())) {
            if (isDigit(get())) {
                throw unexpectedNumber();
            } else {
                throw unexpectedToken();
            }
        }
        skipChar('"');
        TruffleString str = parseJSONStringCharacters();
        if (!isStringQuote(get())) {
            throw error("String quote expected");
        }
        skipChar('"');
        return str;
    }

    protected static boolean isStringQuote(char c) {
        return c == '"';
    }

    protected static boolean isDigit(char c) {
        return '0' <= c && c <= '9';
    }

    protected TruffleString parseJSONStringCharacters() {
        int startPos = pos;
        boolean hasEscapes = false;
        int firstEscape = -1;
        char c = get();
        while (!isStringQuote(c)) {
            if (c < ' ') {
                throw error("invalid string");
            } else if (c == '\\') {
                if (!hasEscapes) {
                    firstEscape = pos;
                }
                hasEscapes = true;
                skipChar('\\');
                pos++;
            } else {
                skipChar();
            }
            c = get();
        }
        int sLength = pos - startPos;
        TruffleString s = Strings.lazySubstring(parseStr, startPos, sLength);
        if (hasEscapes) {
            return unquoteJSON(s, sLength, firstEscape - startPos);
        } else {
            return s;
        }
    }

    protected TruffleString unquoteJSON(TruffleString string, int sLength, int posFirstBackslash) {
        assert sLength == Strings.length(string);
        assert posFirstBackslash >= 0; // guaranteed by caller
        int posBackslash = posFirstBackslash;

        int curPos = 0;
        var builder = Strings.builderCreate(sLength);
        while (posBackslash >= 0) {
            Strings.builderAppend(builder, string, curPos, posBackslash);
            curPos = posBackslash;
            char c = Strings.charAt(string, posBackslash + 1);
            switch (c) {
                case '"':
                    Strings.builderAppend(builder, '"');
                    break;
                case '\\':
                    Strings.builderAppend(builder, '\\');
                    break;
                case 'b':
                    Strings.builderAppend(builder, '\b');
                    break;
                case 'f':
                    Strings.builderAppend(builder, '\f');
                    break;
                case 'n':
                    Strings.builderAppend(builder, '\n');
                    break;
                case 'r':
                    Strings.builderAppend(builder, '\r');
                    break;
                case 't':
                    Strings.builderAppend(builder, '\t');
                    break;
                case '/':
                    Strings.builderAppend(builder, '/');
                    break;
                case 'u':
                    unquoteJSONUnicode(string, posBackslash, builder);
                    curPos += 4; // 6 chars for this escape, 2 are added below
                    break;
                default:
                    throw error("wrong escape sequence");
            }
            curPos += 2; // valid for all escapes
            posBackslash = Strings.indexOf(string, '\\', curPos);
        }
        if (curPos < sLength) {
            Strings.builderAppend(builder, string, curPos, sLength);
        }
        return Strings.builderToString(builder);
    }

    protected int hexDigitValue(char c) {
        int value = JSRuntime.valueInHex(c);
        if (value < 0) {
            throw error("invalid string");
        }
        return value;
    }

    protected void unquoteJSONUnicode(TruffleString string, int posBackslash, TruffleStringBuilderUTF16 builder) {
        char c1 = Strings.charAt(string, posBackslash + 2);
        char c2 = Strings.charAt(string, posBackslash + 3);
        char c3 = Strings.charAt(string, posBackslash + 4);
        char c4 = Strings.charAt(string, posBackslash + 5);
        char unencodedC = (char) ((hexDigitValue(c1) << 12) | (hexDigitValue(c2) << 8) | (hexDigitValue(c3) << 4) | hexDigitValue(c4));
        Strings.builderAppend(builder, unencodedC);
    }

    protected Object parseJSONNumber() {
        int startPos = pos;
        Number parsedNumber = getJSONNumber();
        if (withSource) {
            return parseRecordForLiteral(parsedNumber, startPos);
        } else {
            return parsedNumber;
        }
    }

    protected Number getJSONNumber() {
        int sign = 1;
        if (get() == '-') {
            skipChar();
            sign = -1;
        }
        if (!posValid()) {
            throw error(MALFORMED_NUMBER);
        }
        final int startPos = pos;
        int fractionPos = -1;
        boolean firstPosIsZero = false;
        char c = get();
        while (JSRuntime.isAsciiDigit(c) || c == '.') {
            if (c == '.') {
                if (fractionPos >= 0) {
                    throw error(MALFORMED_NUMBER);
                }
                fractionPos = pos;
            } else if (pos == startPos && c == '0') {
                firstPosIsZero = true;
            }
            skipChar();
            if (!posValid()) {
                break;
            }
            c = get(pos); // don't use cache here
        }
        if (pos == startPos) {
            throw unexpectedToken();
        } else if (firstPosIsZero) {
            // "0" should be parsable, but "08" not
            if ((startPos + 1) < len) {
                c = get(startPos + 1);
                if (c == 'x' || c == 'X' || JSRuntime.isAsciiDigit(c)) {
                    throw error("octal and hexadecimal not allowed");
                }
            }
        }
        if (fractionPos == startPos || fractionPos == (pos - 1)) {
            throw error(MALFORMED_NUMBER);
        }

        boolean hasExponent = false;
        if (posValid() && isExponentPart()) {
            hasExponent = true;
            skipExponent();
        }

        final int endPos = pos;
        if (firstPosIsZero && (endPos - startPos == 1)) {
            if (sign == 1) {
                return 0;
            } else {
                return -0.0;
            }
        } else if (fractionPos == -1 && !hasExponent && (endPos - startPos <= JSRuntime.MAX_SAFE_INTEGER_DIGITS)) {
            // safe integer but not zero
            final int radix = 10;
            long safeInt = JSRuntime.parseSafeInteger(parseStr, startPos, endPos, radix);
            assert safeInt != 0;
            if (safeInt != JSRuntime.INVALID_SAFE_INTEGER) {
                safeInt *= sign;
                if (JSRuntime.longIsRepresentableAsInt(safeInt)) {
                    return (int) safeInt;
                } else {
                    return (double) safeInt;
                }
            }
        }
        TruffleString valueStr = Strings.lazySubstring(parseStr, startPos, endPos - startPos);
        return parseAsDouble(sign, valueStr);
    }

    protected Number parseAsDouble(int sign, TruffleString valueStr) {
        try {
            return Strings.parseDouble(valueStr) * sign;
        } catch (TruffleString.NumberFormatException e) {
            throw error(MALFORMED_NUMBER);
        }
    }

    protected void skipExponent() {
        skipChar(); // e or E
        char cur = get();
        if (cur == '-') {
            skipChar('-');
        } else if (cur == '+') {
            skipChar('+');
        }
        if (!posValid()) {
            throw error(MALFORMED_NUMBER);
        }
        cur = get();
        int startPos = pos;
        while (JSRuntime.isAsciiDigit(cur)) {
            skipChar();
            if (!posValid()) {
                break;
            }
            cur = get(pos); // don't use cache here
        }
        if (pos == startPos) {
            throw error("Expected number but found ident");
        }
    }

    protected boolean isExponentPart() {
        return get() == 'e' || get() == 'E';
    }

    protected boolean isNullLiteral(char c) {
        return c == 'n' && isLiteral(Strings.NULL, 1);
    }

    protected Object parseNullLiteral() {
        int startPos = pos;
        Object parsedNull = getNullLiteral();
        if (withSource) {
            return parseRecordForLiteral(parsedNull, startPos);
        } else {
            return parsedNull;
        }
    }

    protected Object getNullLiteral() {
        assert isNullLiteral(get());
        skipString(Strings.NULL);
        return Null.instance;
    }

    protected boolean isBooleanLiteral(char c) {
        return (c == 't' && isLiteral(Strings.TRUE, 1)) || (c == 'f' && isLiteral(Strings.FALSE, 1));
    }

    protected Object parseBooleanLiteral() {
        int startPos = pos;
        boolean parsedBoolean = getBooleanLiteral();
        if (withSource) {
            return parseRecordForLiteral(parsedBoolean, startPos);
        } else {
            return parsedBoolean;
        }
    }

    protected boolean getBooleanLiteral() {
        assert isBooleanLiteral(get());
        if (get() == 't') {
            skipString(Strings.TRUE);
            return true;
        } else if (get() == 'f') {
            skipString(Strings.FALSE);
            return false;
        } else {
            throw error("cannot parse JSONBooleanLiteral");
        }
    }

    protected static boolean isWhitespace(char c) {
        return c == ' ' || c == '\n' || c == '\r' || c == '\t';
    }

    protected RuntimeException error(String message) {
        if (context.isOptionNashornCompatibilityMode()) {
            // use the Nashorn parser to get the proper error
            NashornJSONParser parser = new NashornJSONParser(parseStr, context);
            try {
                parser.parse(); // should throw
            } catch (ParserException ex) {
                String msg = ex.getMessage().replace("\r\n", "\n");
                throw Errors.createSyntaxError("Invalid JSON: " + msg);
            }
            throw Errors.shouldNotReachHere("JSON parser did not throw error as expected");
        } else {
            assert !message.contains("at position");
            throw Errors.createSyntaxError(message + " at position " + pos);
        }
    }

    private RuntimeException unexpectedToken() {
        throw error("Unexpected token " + get() + " in JSON");
    }

    private RuntimeException unexpectedString() {
        throw error("Unexpected string in JSON");
    }

    private RuntimeException unexpectedNumber() {
        throw error("Unexpected number in JSON");
    }

    // ************************* Helper Functions ****************************************//

    protected char get() {
        return get(pos);
    }

    protected char get(int posParam) {
        return Strings.charAt(parseStr, posParam);
    }

    // needs to be checked by the caller already that the content matches!
    protected void skipString(TruffleString expected) {
        int length = Strings.length(expected);
        assert len >= pos + length;
        assert Strings.equals(Strings.lazySubstring(parseStr, pos, length), expected);
        pos += length;
    }

    protected void expectChar(char expected) {
        if (get(pos) != expected) {
            throw error(expected + " expected");
        }
        skipChar(expected);
    }

    protected void skipChar() {
        assert posValid();
        pos++;
    }

    // needs to be checked by the caller already that the content matches!
    protected void skipChar(char expected) {
        assert get(pos) == expected;
        skipChar();
    }

    protected void skipWhitespace() {
        while (posValid() && isWhitespace(get())) {
            pos++;
        }
    }

    protected boolean posValid() {
        return pos < len;
    }

    protected boolean isLiteral(TruffleString literalStr, int literalPos) {
        if (len < pos + Strings.length(literalStr)) {
            return false;
        }
        int startPos = pos + literalPos;
        return TruffleString.RegionEqualByteIndexNode.getUncached().execute(
                        parseStr, startPos << 1,
                        literalStr, literalPos << 1,
                        literalStr.byteLength(TruffleString.Encoding.UTF_16) - (literalPos << 1),
                        TruffleString.Encoding.UTF_16);
    }

    private Object parseRecordForLiteral(Object parsedValue, int startPos) {
        TruffleString source = Strings.lazySubstring(parseStr, startPos, pos - startPos);
        return JSONParseRecord.forLiteral(parsedValue, source);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy