All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.ma.json.JsonParser Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.ma.json;

import net.sf.saxon.expr.StaticProperty;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.expr.parser.ExplicitLocation;
import net.sf.saxon.expr.parser.RoleDiagnostic;
import net.sf.saxon.ma.map.HashTrieMap;
import net.sf.saxon.ma.map.MapItem;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.om.SequenceTool;
import net.sf.saxon.trans.Err;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.iter.AtomicIterator;
import net.sf.saxon.tree.util.FastStringBuffer;
import net.sf.saxon.type.SpecificFunctionType;
import net.sf.saxon.type.StringToDouble;
import net.sf.saxon.type.TypeHierarchy;
import net.sf.saxon.value.*;
import net.sf.saxon.z.IntPredicate;

import java.util.HashMap;
import java.util.Map;

/**
 * Parser for JSON, which notifies parsing events to a JsonHandler
 */
public class JsonParser {

    private IntPredicate charChecker;

    public static final int ESCAPE = 1;
    public static final int ALLOW_ANY_TOP_LEVEL = 2;
    public static final int LIBERAL = 4;
    public static final int VALIDATE = 8;
    public static final int DEBUG = 16;
    public static final int DUPLICATES_RETAINED = 32;
    public static final int DUPLICATES_LAST = 64;
    public static final int DUPLICATES_FIRST = 128;
    public static final int DUPLICATES_REJECTED = 256;

    public static final int DUPLICATES_SPECIFIED = DUPLICATES_FIRST | DUPLICATES_LAST | DUPLICATES_RETAINED | DUPLICATES_REJECTED;

    private static final String ERR_GRAMMAR = "FOJS0001";
    private static final String ERR_DUPLICATE = "FOJS0003";
    private static final String ERR_SCHEMA = "FOJS0004";
    private static final String ERR_OPTIONS = "FOJS0005";

    /**
     * Create a JSON parser
     * @param charChecker a predicate that determines which characters are accepted
     */

    public JsonParser(IntPredicate charChecker) {
        this.charChecker = charChecker;
    }

    /**
     * Parse the JSON string according to supplied options
     *
     *
     * @param input   JSON input string
     * @param flags options for the conversion as a map of xs:string : value pairs
     * @param handler event handler to which parsing events are notified
     * @param context XPath evaluation context
     * @throws XPathException if the syntax of the input is incorrect
     */
    public void parse(String input, int flags, JsonHandler handler, XPathContext context) throws XPathException {
        if (input.isEmpty()) {
            invalidJSON("An empty string is not valid JSON", ERR_GRAMMAR);
        }

        JsonTokenizer t = new JsonTokenizer(input);
        t.next();

        parseConstruct(handler, t, flags, true, context);

        if (t.next() != JsonTokenizer.EOF) {
            invalidJSON("Unexpected token beyond end of JSON input", ERR_GRAMMAR);
        }

    }


    public static int getFlags(MapItem options, XPathContext context, Boolean allowValidate) throws XPathException {
        int flags = 0;
        if (getBooleanOption(options, "debug", false)) {
            flags |= DEBUG;
        }

        boolean escape = getBooleanOption(options, "escape", false);
        if (escape) {
            flags |= ESCAPE;
            if (options.get(new StringValue("fallback")) != null) {
                throw new XPathException("Cannot specify a fallback function when escape=true", "FOJS0005");
            }
        }


        if (getBooleanOption(options, "liberal", false)) {
            flags |= LIBERAL;
            flags |= ALLOW_ANY_TOP_LEVEL;
        }

        boolean validate = false;
        if (allowValidate) {
            validate = getBooleanOption(options, "validate", false);
            if (validate) {
                if (!context.getController().getExecutable().isSchemaAware()) {
                    error("Requiring validation on non-schema-aware processor", ERR_SCHEMA);
                }
                flags |= VALIDATE;
            }
        }

        String duplicates = getStringOption(options, "duplicates", null, context.getConfiguration().getTypeHierarchy());
        if (duplicates != null) {
            if ("reject".equals(duplicates)) {
                flags |= DUPLICATES_REJECTED;
            } else if ("use-last".equals(duplicates)) {
                flags |= DUPLICATES_LAST;
            } else if ("use-first".equals(duplicates)) {
                flags |= DUPLICATES_FIRST;
            } else if ("retain".equals(duplicates)) {
                flags |= DUPLICATES_RETAINED;
            } else {
                error("Invalid value for 'duplicates' option", ERR_OPTIONS);
            }
        }

        if (validate && "retain".equals(duplicates)) {
            error("The options validate:true and duplicates:retain cannot be used together", ERR_OPTIONS);
        }
        return flags;
    }

    /**
     * Get the value of an option setting (as a string)
     *
     * @param options      the set of options provided
     * @param option       the name of the option required
     * @param defaultValue the default to be used if the option has not been specified; may be null
     * @return the setting of the option
     * @throws net.sf.saxon.trans.XPathException if the options cannot be read
     */

    private static String getStringOption(MapItem options, String option, String defaultValue, TypeHierarchy th)
            throws XPathException {
        StringValue ov = new StringValue(option);
        Sequence val = options.get(ov);
        if (val != null) {
            RoleDiagnostic role = new RoleDiagnostic(RoleDiagnostic.OPTION, option, 0);
            role.setErrorCode(ERR_OPTIONS);
            Sequence converted = th.applyFunctionConversionRules(
                val, SequenceType.SINGLE_STRING, role, ExplicitLocation.UNKNOWN_LOCATION);
            converted = SequenceTool.toGroundedValue(converted);
            return converted.head().getStringValue();
        } else {
            return defaultValue;
        }
    }

    /**
     * Get the value of an option setting (as a boolean)
     *
     * @param options      the set of options provided
     * @param option       the name of the option required
     * @param defaultValue the default to be used if the option has not been specified
     * @return the setting of the option
     * @throws net.sf.saxon.trans.XPathException if the options cannot be read
     */
    private static boolean getBooleanOption(MapItem options, String option, boolean defaultValue)
            throws XPathException {
        StringValue ov = new StringValue(option);
        Sequence val = options.get(ov);
        if (val == null) {
            return defaultValue;
        } else if (val instanceof BooleanValue) {
            return ((BooleanValue) val).getBooleanValue();
        } else {
            error("Value of option '" + option + "' is not xs:boolean", ERR_OPTIONS);
            return defaultValue;
        }
    }

    /**
     * Parse a JSON construct (top-level or nested)
     *
     * @param handler   the handler to generate the result
     * @param tokenizer the tokenizer, positioned at the first token of the construct to be read
     * @param flags     parsing options
     * @param toplevel  true if this is the outermost construct of the JSON text to be parsed
     * @param context   XPath evaluation context
     * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs (for example, invalid JSON input)
     */

    private void parseConstruct(JsonHandler handler, JsonTokenizer tokenizer, int flags, boolean toplevel, XPathContext context) throws XPathException {
        boolean debug = (flags & DEBUG) != 0;
        boolean checkAllowedAtTopLevel = false; // toplevel && ((flags & ALLOW_ANY_TOP_LEVEL) == 0);
        if (debug) {
            System.err.println("token:" + tokenizer.currentToken + " :" + tokenizer.currentTokenValue);
        }
        switch (tokenizer.currentToken) {
            case JsonTokenizer.LCURLY:
                parseObject(handler, tokenizer, flags, context);
                break;

            case JsonTokenizer.LSQB:
                parseArray(handler, tokenizer, flags, context);
                break;

            case JsonTokenizer.NUMERIC_LITERAL:
                if (checkAllowedAtTopLevel) {
                    disallowedAtTopLevel();
                }
                double d = parseNumericLiteral(tokenizer.currentTokenValue.toString(), flags);
                handler.writeNumeric(tokenizer.currentTokenValue.toString(), d);
                break;

            case JsonTokenizer.TRUE:
                if (checkAllowedAtTopLevel) {
                    disallowedAtTopLevel();
                }
                handler.writeBoolean(true);
                break;

            case JsonTokenizer.FALSE:
                if (checkAllowedAtTopLevel) {
                    disallowedAtTopLevel();
                }
                handler.writeBoolean(false);
                break;

            case JsonTokenizer.NULL:
                if (checkAllowedAtTopLevel) {
                    disallowedAtTopLevel();
                }
                handler.writeNull();
                break;

            case JsonTokenizer.STRING_LITERAL:
                if (checkAllowedAtTopLevel) {
                    disallowedAtTopLevel();
                }
                String literal = tokenizer.currentTokenValue.toString();
                handler.writeString(unescape(literal, flags, ERR_GRAMMAR));
                break;

            default:
                invalidJSON("Unexpected symbol: " + tokenizer.currentTokenValue, ERR_GRAMMAR);
        }
    }

    private void disallowedAtTopLevel() throws XPathException {
        invalidJSON("Top-level JSON value must be an object or array", ERR_GRAMMAR);
    }

    /**
     * Parse a JSON object (or map), i.e. construct delimited by curly braces
     *
     * @param handler   the handler to generate the result
     * @param tokenizer the tokenizer, positioned at the object to be read
     * @param flags     parsing options as a set of flags
     * @param context   XPath evaluation context
     * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs (such as invalid JSON input)
     */

    private void parseObject(JsonHandler handler, JsonTokenizer tokenizer, int flags, XPathContext context) throws XPathException {
        boolean liberal = (flags & LIBERAL) != 0;
        handler.startMap();
        int tok = tokenizer.next();
        while (tok != JsonTokenizer.RCURLY) {
            if (tok != JsonTokenizer.STRING_LITERAL) {
                invalidJSON("Property name must be a string literal", ERR_GRAMMAR);
            }
            String key = tokenizer.currentTokenValue.toString();
            key = unescape(key, flags, ERR_GRAMMAR);
            String reEscaped = handler.reEscape(key, true);
            tok = tokenizer.next();
            if (tok != JsonTokenizer.COLON) {
                invalidJSON("Missing colon after \"" + Err.wrap(key) + "\"", ERR_GRAMMAR);
            }
            tokenizer.next();
            boolean duplicate = handler.setKey(key, reEscaped);
            if (duplicate && ((flags & DUPLICATES_REJECTED) != 0)) {
                invalidJSON("Duplicate key value \"" + Err.wrap(key) + "\"", ERR_DUPLICATE);
            }
            if (!duplicate || ((flags & (DUPLICATES_LAST|DUPLICATES_RETAINED)) != 0)) {
                parseConstruct(handler, tokenizer, flags, false, context);
            } else {
                // retain first: parse the duplicate value but discard it
                JsonHandler h2 = new JsonHandler();
                h2.setContext(context);
                parseConstruct(h2, tokenizer, flags, false, context);
            }
            tok = tokenizer.next();
            if (tok == JsonTokenizer.COMMA) {
                tok = tokenizer.next();
                if (tok == JsonTokenizer.RCURLY) {
                    if (liberal) {
                        break;  // tolerate the trailing comma
                    } else {
                        invalidJSON("Trailing comma after entry in object", ERR_GRAMMAR);
                    }
                }
            } else if (tok == JsonTokenizer.RCURLY) {
                break;
            } else {
                invalidJSON("Unexpected token after value of \"" + Err.wrap(key) + "\" property", ERR_GRAMMAR);
            }
        }
        handler.endMap();
    }

    /**
     * Parse a JSON array, i.e. construct delimited by square brackets
     *
     * @param handler   the handler to generate the result
     * @param tokenizer the tokenizer, positioned at the object to be read
     * @param flags     parsing options
     * @param context   XPath evaluation context
     * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs (such as invalid JSON input)
     */

    private void parseArray(JsonHandler handler, JsonTokenizer tokenizer, int flags, XPathContext context) throws XPathException {
        boolean liberal = (flags & LIBERAL) != 0;
        handler.startArray();
        int tok = tokenizer.next();
        if (tok == JsonTokenizer.RSQB) {
            handler.endArray();
            return;
        }
        while (true) {
            parseConstruct(handler, tokenizer, flags, false, context);
            tok = tokenizer.next();
            if (tok == JsonTokenizer.COMMA) {
                tok = tokenizer.next();
                if (tok == JsonTokenizer.RSQB) {
                    if (liberal) {
                        break;// tolerate the trailing comma
                    } else {
                        invalidJSON("Trailing comma after entry in array", ERR_GRAMMAR);
                    }
                }
            } else if (tok == JsonTokenizer.RSQB) {
                break;
            } else {
                invalidJSON("Unexpected token after entry in array", ERR_GRAMMAR);
            }
        }
        handler.endArray();
    }

    /**
     * Parse a JSON numeric literal,
     *
     * @param token the numeric literal to be parsed and converted
     * @param flags parsing options
     * @return the result of parsing and conversion to XDM
     * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs (such as invalid JSON input)
     */

    private double parseNumericLiteral(String token, int flags) throws XPathException {
        try {
            if ((flags & LIBERAL) == 0) {
                // extra checks on the number disabled by choosing spec="liberal"
                if (token.startsWith("+")) {
                    invalidJSON("Leading + sign not allowed: " + token, ERR_GRAMMAR);
                } else {
                    String t = token;
                    if (t.startsWith("-")) {
                        t = t.substring(1);
                    }
                    if (t.startsWith("0") &&
                        !(t.equals("0") || t.startsWith("0.") || t.startsWith("0e") || t.startsWith("0E"))) {
                        invalidJSON("Redundant leading zeroes not allowed: " + token, ERR_GRAMMAR);
                    }
                    if (t.endsWith(".") || t.contains(".e") || t.contains(".E")) {
                        invalidJSON("Empty fractional part not allowed", ERR_GRAMMAR);
                    }
                    if (t.startsWith(".")) {
                        invalidJSON("Empty integer part not allowed", ERR_GRAMMAR);
                    }
                }
            }
            return StringToDouble.getInstance().stringToNumber(token);
        } catch (NumberFormatException e) {
            invalidJSON("Invalid numeric literal: " + e.getMessage(), ERR_GRAMMAR);
            return Double.NaN;
        }
    }

//    private CharSequence reEscape(String literal, int flags, XPathContext context) throws XPathException {
//        try {
//            literal = unescape(literal, flags, ERR_GRAMMAR);
//            return JsonReceiver.escape(literal, true, new IntPredicate() {
//                public boolean matches(int value) {
//                    return value < 32 ||
//                        (value >= 127 && value < 160);
//                }
//            });
//        } catch (XPathException e) {
//            e.setErrorCode(ERR_GRAMMAR);
//            throw e;
//        }
//    }

    /**
     * Unescape a JSON string literal,
     *
     * @param literal the string literal to be processed
     * @param flags   parsing options
     * @param errorCode Error code
     * @return the result of parsing and conversion to XDM
     * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs (such as invalid JSON input)
     */

    public static String unescape(String literal, int flags, String errorCode) throws XPathException {
        if (literal.indexOf('\\') < 0) {
            return literal;
        }
        boolean liberal = (flags & LIBERAL) != 0;
        FastStringBuffer buffer = new FastStringBuffer(literal.length());
        for (int i = 0; i < literal.length(); i++) {
            char c = literal.charAt(i);
            if (c == '\\') {
                if (i++ == literal.length() - 1) {
                    throw new XPathException("Invalid JSON escape: String " + Err.wrap(literal) + " ends in backslash", errorCode);
                }
                switch (literal.charAt(i)) {
                    case '"':
                        buffer.append('"');
                        break;
                    case '\\':
                        buffer.append('\\');
                        break;
                    case '/':
                        buffer.append('/');
                        break;
                    case 'b':
                        buffer.append('\b');
                        break;
                    case 'f':
                        buffer.append('\f');
                        break;
                    case 'n':
                        buffer.append('\n');
                        break;
                    case 'r':
                        buffer.append('\r');
                        break;
                    case 't':
                        buffer.append('\t');
                        break;
                    case 'u':
                        try {
                            String hex = literal.substring(i + 1, i + 5);
                            int code = Integer.parseInt(hex, 16);
                            buffer.append((char) code);
                            i += 4;
                        } catch (Exception e) {
                            if (liberal) {
                                buffer.append("\\u");
                            } else {
                                throw new XPathException("Invalid JSON escape: \\u must be followed by four hex characters", errorCode);
                            }
                        }
                        break;
                    default:
                        if (liberal) {
                            buffer.append(c);
                        } else {
                            throw new XPathException("Unknown escape sequence \\" + literal.charAt(i), errorCode);
                        }
                }
            } else {
                buffer.append(c);
            }
        }
        return buffer.toString();
    }





    /**
     * Throw an error
     *
     * @param message the error message
     * @param code    the error code to be used
     * @throws net.sf.saxon.trans.XPathException always
     */

    private static void error(String message, String code)
            throws XPathException {
        throw new XPathException(message, code);
    }

    /**
     * Throw an error
     *
     * @param message the error message
     * @param code    the error code to be used
     * @throws net.sf.saxon.trans.XPathException always
     */

    private void invalidJSON(String message, String code)
            throws XPathException {
        error("Invalid JSON input: " + message, code);
    }


    /**
     * Inner class to do the tokenization
     */

    private class JsonTokenizer {

        private String input;
        private int position;
        public int currentToken;
        public FastStringBuffer currentTokenValue = new FastStringBuffer(FastStringBuffer.C64);

        private final static int LSQB = 1;
        private final static int RSQB = 2;
        public static final int LCURLY = 3;
        public static final int RCURLY = 4;
        public static final int STRING_LITERAL = 5;
        public static final int NUMERIC_LITERAL = 6;
        public static final int TRUE = 7;
        public static final int FALSE = 8;
        public static final int NULL = 9;
        public static final int COLON = 10;
        public static final int COMMA = 11;
        public static final int EOF = 999;

        public JsonTokenizer(String input) {
            this.input = input;
            this.position = 0;
            // Ignore a leading BOM
            if (input.length() > 0 && input.charAt(0) == 65279) {
                position++;
            }
        }

        public int next() throws XPathException {
            currentToken = readToken();
            return currentToken;
        }

        private int readToken() throws XPathException {
            if (position >= input.length()) {
                return EOF;
            }
            while (Whitespace.isWhitespace(input.charAt(position))) {
                if (++position >= input.length()) {
                    return EOF;
                }
            }
            char ch = input.charAt(position++);
            switch (ch) {
                case '[':
                    return LSQB;
                case '{':
                    return LCURLY;
                case ']':
                    return RSQB;
                case '}':
                    return RCURLY;
                case '"':
                    currentTokenValue.setLength(0);
                    boolean afterBackslash = false;
                    while (true) {
                        char c = input.charAt(position++);
                        if (afterBackslash && c == 'u') {
                            try {
                                String hex = input.substring(position, position + 4);
                                int code = Integer.parseInt(hex, 16);
                                // TODO: we aren't using the value
                            } catch (Exception e) {
                                invalidJSON("\\u must be followed by four hex characters", ERR_GRAMMAR);
                            }
                        }
                        if (c == '"' && !afterBackslash) {
                            break;
                        } else {
                            currentTokenValue.append(c);
                            afterBackslash = c == '\\' && !afterBackslash;
                        }
                        if (position >= input.length()) {
                            invalidJSON("Unclosed quotes in string literal", ERR_GRAMMAR);
                        }
                    }
                    return STRING_LITERAL;
                case ':':
                    return COLON;
                case ',':
                    return COMMA;
                case '-':
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    currentTokenValue.setLength(0);
                    currentTokenValue.append(ch);
                    if (position < input.length()) {   // We could be in ECMA mode when there is a single digit
                        while (true) {
                            char c = input.charAt(position);
                            if ((c >= '0' && c <= '9') || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E') {
                                currentTokenValue.append(c);
                                if (++position >= input.length()) {
                                    break;
                                }
                            } else {
                                break;
                            }
                        }
                    }
                    return NUMERIC_LITERAL;
                case 't':
                case 'f':
                case 'n':
                    currentTokenValue.setLength(0);
                    currentTokenValue.append(ch);
                    while (true) {
                        char c = input.charAt(position);
                        if (c >= 'a' && c <= 'z') {
                            currentTokenValue.append(c);
                            if (++position >= input.length()) {
                                break;
                            }
                        } else {
                            break;
                        }
                    }
                    String val = currentTokenValue.toString();
                    if (val.equals("true")) {
                        return TRUE;
                    } else if (val.equals("false")) {
                        return FALSE;
                    } else if (val.equals("null")) {
                        return NULL;
                    } else {
                        error("Unknown constant " + currentTokenValue, ERR_GRAMMAR);
                    }
                default:
                    char c = input.charAt(--position);
                    invalidJSON("Unexpected character '" + c + "' (\\u" +
                            Integer.toHexString(c) + ") at position " + position, ERR_GRAMMAR);
                    return -1;
            }
        }
    }


    private final static Map requiredTypes = new HashMap(20);

    static {
        requiredTypes.put("liberal", SequenceType.SINGLE_BOOLEAN);
        requiredTypes.put("duplicates", SequenceType.SINGLE_STRING);
        requiredTypes.put("validate", SequenceType.SINGLE_BOOLEAN);
        requiredTypes.put("escape", SequenceType.SINGLE_BOOLEAN);
        requiredTypes.put("fallback", SequenceType.makeSequenceType(new SpecificFunctionType(new SequenceType[]{SequenceType.SINGLE_STRING}, SequenceType.SINGLE_STRING), StaticProperty.EXACTLY_ONE));
    }

    private static String[] optionNames = new String[]{
            "liberal", "duplicates", "validate", "escape", "fallback"
    };

    private static boolean isOptionName(String string) {
        for (String s : optionNames) {
            if (s.equals(string)) {
                return true;
            }
        }
        return false;
    }

    public static MapItem checkOptions(MapItem map, XPathContext context) throws XPathException {
        HashTrieMap result = new HashTrieMap(context);
        TypeHierarchy th = context.getConfiguration().getTypeHierarchy();

        AtomicIterator keysIterator = map.keys();
        AtomicValue key;
        while ((key = keysIterator.next()) != null) {
            if (key instanceof StringValue) {
                String keyName = key.getStringValue();
                if (isOptionName(keyName)) {
                    RoleDiagnostic role = new RoleDiagnostic(RoleDiagnostic.OPTION, keyName, 0);
                    role.setErrorCode("XPTY0004");
                    Sequence converted = th.applyFunctionConversionRules(
                        map.get(key), requiredTypes.get(keyName), role, ExplicitLocation.UNKNOWN_LOCATION);
                    converted = SequenceTool.toGroundedValue(converted);
                    result = result.addEntry(key, converted);
                }
            } else {
                break;
            }
        }
        return result;
    }

}

// Copyright (c) 2015 Saxonica Limited. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy