All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.expr.parser.Token Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.expr.parser;

import java.util.HashMap;

/**
 * This class holds static constants and methods defining the lexical tokens used in
 * XPath and XQuery, and associated keywords.
 */

public abstract class Token {

    /*
     * Token numbers. Those in the range 0 to LAST_OPERATOR are tokens that can be followed
     * by a name or expression; those above this range are tokens that can be
     * followed by an binary operator.
     */

    /**
     * Pseudo-token representing implicit end of expression (the parser doesn't care what follows
     * the expression)
     */
    public static final int IMPLICIT_EOF = -1;
    /**
     * Pseudo-token representing the end of the expression
     */
    public static final int EOF = 0;
    /**
     * "union" or "|" token
     */
    public static final int UNION = 1;
    /**
     * Forwards "/"
     */
    public static final int SLASH = 2;
    /**
     * At token, "@"
     */
    public static final int AT = 3;
    /**
     * Left square bracket
     */
    public static final int LSQB = 4;
    /**
     * Left parenthesis
     */
    public static final int LPAR = 5;
    /**
     * Equals token ("=")
     */
    public static final int EQUALS = 6;
    /**
     * Comma token
     */
    public static final int COMMA = 7;
    /**
     * Double forwards slash, "//"
     */
    public static final int SLASH_SLASH = 8;
    /**
     * Operator "or"
     */
    public static final int OR = 9;
    /**
     * Operator "and"
     */
    public static final int AND = 10;
    /**
     * Operator ">"
     */
    public static final int GT = 11;
    /**
     * Operator "<"
     */
    public static final int LT = 12;
    /**
     * Operator ">="
     */
    public static final int GE = 13;
    /**
     * Operator "<="
     */
    public static final int LE = 14;
    /**
     * Operator "+"
     */
    public static final int PLUS = 15;
    /**
     * Binary minus operator
     */
    public static final int MINUS = 16;
    /**
     * Multiply operator, "*" when used in an operator context
     */
    public static final int MULT = 17;
    /**
     * Operator "div"
     */
    public static final int DIV = 18;
    /**
     * Operator "mod"
     */
    public static final int MOD = 19;
    /**
     * Operator "is"
     */
    public static final int IS = 20;
    /**
     * "$" symbol
     */
    public static final int DOLLAR = 21;
    /**
     * Operator not-equals. That is, "!="
     */
    public static final int NE = 22;
    /**
     * Operator "intersect"
     */
    public static final int INTERSECT = 23;
    /**
     * Operator "except"
     */
    public static final int EXCEPT = 24;
    /**
     * Keyword "return"
     */
    public static final int RETURN = 25;
    /**
     * Ketword "then"
     */
    public static final int THEN = 26;
    /**
     * Keyword "else"
     */
    public static final int ELSE = 27;
    /**
     * Keyword "where"
     */
    public static final int WHERE = 28;
    /**
     * Operator "to"
     */
    public static final int TO = 29;
    /**
     * Operator "||"
     */
    public static final int CONCAT = 30;
    /**
     * Keyword "in"
     */
    public static final int IN = 31;
    /**
     * Keyword "some"
     */
    public static final int SOME = 32;
    /**
     * Keyword "every"
     */
    public static final int EVERY = 33;
    /**
     * Keyword "satisfies"
     */
    public static final int SATISFIES = 34;
    /**
     * Token representing the name of a function and the following "(" symbol
     */
    public static final int FUNCTION = 35;

    /**
     * Token representing the name of an axis and the following "::" symbol
     */
    public static final int AXIS = 36;
    /**
     * Keyword "if"
     */
    public static final int IF = 37;
    /**
     * Operator "<<"
     */
    public static final int PRECEDES = 38;
    /**
     * Operator ">>"
     */
    public static final int FOLLOWS = 39;
    /**
     * Operator "!"
     */
    public static final int BANG = 40;
    /**
     * "::" symbol
     */
    public static final int COLONCOLON = 41;
    /**
     * ":*" symbol
     */
    public static final int COLONSTAR = 42;
    /**
     * Token representing a function name and the following "#" symbol
     */
    public static final int NAMED_FUNCTION_REF = 43;
    /**
     * # symbol
     */
    public static final int HASH = 44;
    /**
     * operator "instance of"
     */
    public static final int INSTANCE_OF = 45;
    /**
     * operator "cast as"
     */
    public static final int CAST_AS = 46;
    /**
     * operator "treat as"
     */
    public static final int TREAT_AS = 47;
    /**
     * operator "??"
     */
    public static final int QMARK_QMARK = 48;
    /*
     * operator "!!"
     */
    public static final int BANG_BANG = 49;
    /**
     * operator "eq"
     */
    public static final int FEQ = 50;       // "Fortran" style comparison operators eq, ne, etc
    /**
     * operator "ne"
     */
    public static final int FNE = 51;
    /**
     * operator "gt"
     */
    public static final int FGT = 52;
    /**
     * operator "lt"
     */
    public static final int FLT = 53;
    /**
     * operator "ge"
     */
    public static final int FGE = 54;
    /**
     * opeartor "le"
     */
    public static final int FLE = 55;
    /**
     * operator "idiv"
     */
    public static final int IDIV = 56;
    /**
     * operator "castable as"
     */
    public static final int CASTABLE_AS = 57;
    /**
     * ":=" symbol (XQuery only)
     */
    public static final int ASSIGN = 58;
    /**
     * "{" symbol (XQuery only)
     */
    public static final int LCURLY = 59;
    /**
     * composite token: <keyword "{"> (XQuery only)
     */
    public static final int KEYWORD_CURLY = 60;
    /**
     * composite token <'element' QNAME> (XQuery only)
     */
    public static final int ELEMENT_QNAME = 61;
    /**
     * composite token <'attribute' QNAME> (XQuery only)
     */
    public static final int ATTRIBUTE_QNAME = 62;
    /**
     * composite token <'pi' QNAME> (XQuery only)
     */
    public static final int PI_QNAME = 63;
    /**
     * composite token <'namespace' QNAME> (XQuery only)
     */
    public static final int NAMESPACE_QNAME = 64;
    /**
     * Keyword "typeswitch"
     */
    public static final int TYPESWITCH = 65;
    /**
     * Keyword "switch" (XQuery 1.1)
     */
    public static final int SWITCH = 66;
    /**
     * Keyword "case"
     */
    public static final int CASE = 67;
    /**
     * Keyword "modify"
     */
    public static final int MODIFY = 68;

    /**
     * Node kind, e.g. "node()" or "comment()"
     */
    public static final int NODEKIND = 69;
    /**
     * "*:" token
     */
    public static final int SUFFIX = 70;    // e.g. *:suffix - the suffix is actually a separate token
    /**
     * "as" (in XQuery Update rename expression)
     */
    public static final int AS = 71;
    /*
     * "group by" (XQuery 3.0)
     */
    public static final int GROUP_BY = 72;
    /**
     * "for tumbling" (XQuery 3.0)
     */
    public static final int FOR_TUMBLING = 73;
    /**
     * "for sliding" (XQuery 3.0)
     */
    public static final int FOR_SLIDING = 74;
    /**
     * "for member" (XQuery 4.0)
     */
    public static final int FOR_MEMBER = 75;

    // map colon key-entry separator
    /**
     * ":" (XPath 3.0 maps)
     */
    public static final int COLON = 76;
    /**
     * Arrow operator "=>" (XQuery 3.1)
     */
    public static final int FAT_ARROW = 77;
    /**
     * First part of a string template. Token value includes all the text from ``[ up to the first `{
     */
    public static final int STRING_CONSTRUCTOR_INITIAL = 78;
    /**
     * "otherwise" (Saxon extension)
     */
    public static final int OTHERWISE = 79;
    /**
     * "andAlso" (Saxon extension)
     */
    public static final int AND_ALSO = 80;
    /**
     * "orElse" (Saxon extension)
     */
    public static final int OR_ELSE = 81;
    /**
     * "by"
     */
    public static final int BY = 82;
    /**
     * Thin arrow operator "->" (XQuery 4.0)
     */
    public static final int THIN_ARROW = 83;


    // The following tokens are used only in the query prolog. They are categorized
    // as operators on the basis that a following name is treated as a name rather than
    // an operator.


    /**
     * "xquery version"
     */
    public static final int XQUERY_VERSION = 88;
    /**
     * "xquery encoding"
     */
    public static final int XQUERY_ENCODING = 89;
    /**
     * "declare namespace"
     */
    public static final int DECLARE_NAMESPACE = 90;
    /**
     * "declare default"
     */
    public static final int DECLARE_DEFAULT = 91;
    /**
     * "declare construction"
     */
    public static final int DECLARE_CONSTRUCTION = 92;
    /**
     * "declare base-uri"
     */
    public static final int DECLARE_BASEURI = 93;
    /**
     * "declare boundary-space"
     */
    public static final int DECLARE_BOUNDARY_SPACE = 94;
    /**
     * "declare decimal-format"
     */
    public static final int DECLARE_DECIMAL_FORMAT = 95;
    /**
     * "import schema"
     */
    public static final int IMPORT_SCHEMA = 96;
    /**
     * "import module"
     */
    public static final int IMPORT_MODULE = 97;
    /**
     * "declare variable"
     */
    public static final int DECLARE_VARIABLE = 98;
    /**
     * "declare context"
     */
    public static final int DECLARE_CONTEXT = 99;
    /**
     * "declare function"
     */
    public static final int DECLARE_FUNCTION = 100;
    /**
     * "module namespace"
     */
    public static final int MODULE_NAMESPACE = 101;
    /**
     * Various compound symbols supporting XQuery validation expression
     */
    public static final int VALIDATE = 102;
    public static final int VALIDATE_STRICT = 103;
    public static final int VALIDATE_LAX = 104;
    public static final int VALIDATE_TYPE = 105;
    /**
     * percent sign '%'
     */
    public static final int PERCENT = 106;

    /**
     * "declare xmlspace"
     */
    public static final int DECLARE_ORDERING = 107;

    /**
     * "declare copy-namespaces"
     */
    public static final int DECLARE_COPY_NAMESPACES = 108;
    /**
     * "declare option"
     */
    public static final int DECLARE_OPTION = 109;
    /**
     * "declare revalidation"
     */
    public static final int DECLARE_REVALIDATION = 110;
    /**
     * "insert node/nodes"
     */
    public static final int INSERT_NODE = 111;
    /**
     * "delete node/nodes"
     */
    public static final int DELETE_NODE = 112;
    /**
     * "replace node/nodes"
     */
    public static final int REPLACE_NODE = 113;
    /**
     * "replace value"
     */
    public static final int REPLACE_VALUE = 114;
    /**
     * "rename node"
     */
    public static final int RENAME_NODE = 115;
    /**
     * "first into"
     */
    public static final int FIRST_INTO = 116;
    /**
     * "last into"
     */
    public static final int LAST_INTO = 117;
    /**
     * "after"
     */
    public static final int AFTER = 118;
    /**
     * "before"
     */
    public static final int BEFORE = 119;
    /**
     * "into"
     */
    public static final int INTO = 120;
    /**
     * "with"
     */
    public static final int WITH = 121;
    /**
     * "declare updating [function]"
     */
    public static final int DECLARE_UPDATING = 122;
    /**
     * declare %
     */
    public static final int DECLARE_ANNOTATED = 123;
    /**
     * Saxon extension: declare type
     */
    public static final int DECLARE_TYPE = 124;
    /**
     * semicolon separator
     */
    public static final int SEMICOLON = 149;


    /**
     * Constant identifying the token number of the last token to be classified as an operator
     */
    public static final int LAST_OPERATOR = 150;

    // Tokens that set "operator" context, so an immediately following "div" is recognized
    // as an operator, not as an element name

    /**
     * Name token (a QName, in general)
     */
    public static final int NAME = 201;
    /**
     * String literal
     */
    public static final int STRING_LITERAL = 202;
    /**
     * Right square bracket
     */
    public static final int RSQB = 203;
    /**
     * Right parenthesis
     */
    public static final int RPAR = 204;
    /**
     * "." symbol
     */
    public static final int DOT = 205;
    /**
     * ".." symbol
     */
    public static final int DOTDOT = 206;
    /**
     * "*" symbol when used as a wildcard
     */
    public static final int STAR = 207;
    /**
     * "prefix:*" token
     */
    public static final int PREFIX = 208;    // e.g. prefix:*
    /**
     * Numeric literal
     */
    public static final int NUMBER = 209;

    /**
     * "for" keyword
     */
    public static final int FOR = 211;

    /**
     * Keyword "default"
     */
    public static final int DEFAULT = 212;
    /**
     * Question mark symbol. That is, "?"
     */
    public static final int QMARK = 213;
    /**
     * "}" symbol (XQuery only)
     */
    public static final int RCURLY = 215;
    /**
     * "let" keyword (XQuery only)
     */
    public static final int LET = 216;
    /**
     * "<" at the start of a tag (XQuery only). The pseudo-XML syntax that
     * follows is read character-by-character by the XQuery parser
     */
    public static final int TAG = 217;
    /**
     * A token representing an XQuery pragma.
     * This construct "(# .... #)" is regarded as a single token, for the QueryParser to sort out.
     */
    public static final int PRAGMA = 218;
    /**
     * "copy" keyword
     */
    public static final int COPY = 219;
    /**
     * "count" keyword
     */
    public static final int COUNT = 220;
    /**
     * Complete string template with no embedded expressions
     */
    public static final int STRING_LITERAL_BACKTICKED = 222;

    /**
     * Unary minus sign
     */
    public static final int NEGATE = 299;    // unary minus: not actually a token, but we
    // use token numbers to identify operators.


    /**
     * The following strings are used to represent tokens in error messages
     */

    public final static String[] tokens = new String[300];

    static {
        tokens[EOF] = "";
        tokens[UNION] = "|";
        tokens[SLASH] = "/";
        tokens[AT] = "@";
        tokens[LSQB] = "[";
        tokens[LPAR] = "(";
        tokens[EQUALS] = "=";
        tokens[COMMA] = ",";
        tokens[SLASH_SLASH] = "//";
        tokens[OR] = "or";
        tokens[AND] = "and";
        tokens[GT] = ">";
        tokens[LT] = "<";
        tokens[GE] = ">=";
        tokens[LE] = "<=";
        tokens[PLUS] = "+";
        tokens[MINUS] = "-";
        tokens[MULT] = "*";
        tokens[DIV] = "div";
        tokens[MOD] = "mod";
        tokens[IS] = "is";
        tokens[DOLLAR] = "$";
        tokens[NE] = "!=";
        tokens[BANG] = "!";
        tokens[CONCAT] = "||";
        tokens[INTERSECT] = "intersect";
        tokens[EXCEPT] = "except";
        tokens[RETURN] = "return";
        tokens[THEN] = "then";
        tokens[ELSE] = "else";
        tokens[TO] = "to";
        tokens[IN] = "in";
        tokens[SOME] = "some";
        tokens[EVERY] = "every";
        tokens[SATISFIES] = "satisfies";
        tokens[FUNCTION] = "(";
        tokens[AXIS] = "";
        tokens[IF] = "if(";
        tokens[PRECEDES] = "<<";
        tokens[FOLLOWS] = ">>";
        tokens[COLONCOLON] = "::";
        tokens[COLONSTAR] = ":*";
        tokens[HASH] = "#";
        tokens[INSTANCE_OF] = "instance of";
        tokens[CAST_AS] = "cast as";
        tokens[TREAT_AS] = "treat as";
        tokens[QMARK_QMARK] = "??";
        tokens[BANG_BANG] = "!!";
        tokens[FEQ] = "eq";
        tokens[FNE] = "ne";
        tokens[FGT] = "gt";
        tokens[FGE] = "ge";
        tokens[FLT] = "lt";
        tokens[FLE] = "le";
        tokens[IDIV] = "idiv";
        tokens[CASTABLE_AS] = "castable as";
        tokens[ASSIGN] = ":=";
        tokens[SWITCH] = "switch";
        tokens[TYPESWITCH] = "typeswitch";
        tokens[CASE] = "case";
        tokens[DEFAULT] = "default";
        //tokens [ AS_FIRST ] = "as first";
        //tokens [ AS_LAST ] = "as last";
        tokens[AFTER] = "after";
        tokens[BEFORE] = "before";
        tokens[INTO] = "into";
        tokens[WITH] = "with";
        tokens[MODIFY] = "modify";
        tokens[AS] = "as";

        tokens[COLON] = ":";
        tokens[FAT_ARROW] = "=>";
        tokens[THIN_ARROW] = "->";
        tokens[AND_ALSO] = "andAlso";
        tokens[OR_ELSE] = "orElse";
        tokens[BY] = "by";
        tokens[STRING_CONSTRUCTOR_INITIAL] = "``[`{";
        tokens[STRING_LITERAL_BACKTICKED] = "``[]``";
        tokens[OTHERWISE] = "otherwise";

        tokens[NAME] = "";
        tokens[STRING_LITERAL] = "";
        tokens[RSQB] = "]";
        tokens[RPAR] = ")";
        tokens[DOT] = ".";
        tokens[DOTDOT] = "..";
        tokens[STAR] = "*";
        tokens[PREFIX] = "";
        tokens[NUMBER] = "";
        tokens[NODEKIND] = "()";
        tokens[FOR] = "for";
        tokens[SUFFIX] = "<*:local-name>";
        tokens[QMARK] = "?";
        tokens[LCURLY] = "{";
        tokens[KEYWORD_CURLY] = " {";
        tokens[RCURLY] = "}";
        tokens[LET] = "let";
        tokens[VALIDATE] = "validate {";
        tokens[TAG] = "";
        tokens[PRAGMA] = "(# ... #)";
        tokens[SEMICOLON] = ";";
        tokens[COPY] = "copy";
        tokens[NEGATE] = "-";
        tokens[PERCENT] = "%";
    }

    /**
     * Lookup table for composite (two-keyword) tokens
     */
    public static HashMap doubleKeywords = new HashMap<>(30);
    /**
     * Pseudo-token representing the start of the expression
     */
    public static final int UNKNOWN = -1;

    private Token() {
    }

    static {
        mapDouble("instance of", INSTANCE_OF);
        mapDouble("cast as", CAST_AS);
        mapDouble("treat as", TREAT_AS);
        mapDouble("castable as", CASTABLE_AS);
        mapDouble("group by", GROUP_BY);
        mapDouble("for tumbling", FOR_TUMBLING);
        mapDouble("for sliding", FOR_SLIDING);
        mapDouble("for member", FOR_MEMBER);
        mapDouble("xquery version", XQUERY_VERSION);
        mapDouble("xquery encoding", XQUERY_ENCODING);
        mapDouble("declare namespace", DECLARE_NAMESPACE);
        mapDouble("declare default", DECLARE_DEFAULT);
        mapDouble("declare construction", DECLARE_CONSTRUCTION);
        mapDouble("declare base-uri", DECLARE_BASEURI);
        mapDouble("declare boundary-space", DECLARE_BOUNDARY_SPACE);
        mapDouble("declare decimal-format", DECLARE_DECIMAL_FORMAT);
        mapDouble("declare ordering", DECLARE_ORDERING);
        mapDouble("declare copy-namespaces", DECLARE_COPY_NAMESPACES);
        mapDouble("declare option", DECLARE_OPTION);
        mapDouble("declare revalidation", DECLARE_REVALIDATION);
        mapDouble("declare type", DECLARE_TYPE); // Saxon extension
        mapDouble("import schema", IMPORT_SCHEMA);
        mapDouble("import module", IMPORT_MODULE);
        mapDouble("declare variable", DECLARE_VARIABLE);
        mapDouble("declare context", DECLARE_CONTEXT);
        mapDouble("declare function", DECLARE_FUNCTION);
        mapDouble("declare updating", DECLARE_UPDATING);
        mapDouble("module namespace", MODULE_NAMESPACE);
        mapDouble("validate strict", VALIDATE_STRICT);
        mapDouble("validate lax", VALIDATE_LAX);
        mapDouble("validate type", VALIDATE_TYPE);
        mapDouble("insert node", INSERT_NODE);
        mapDouble("insert nodes", INSERT_NODE);
        mapDouble("delete node", DELETE_NODE);
        mapDouble("delete nodes", DELETE_NODE);
        mapDouble("replace node", REPLACE_NODE);
        mapDouble("replace value", REPLACE_VALUE);
        mapDouble("rename node", RENAME_NODE);
        mapDouble("rename nodes", RENAME_NODE);
        mapDouble("first into", FIRST_INTO);
        mapDouble("last into", LAST_INTO);
    }

    private static void mapDouble(String doubleKeyword, int token) {
        doubleKeywords.put(doubleKeyword, token);
        tokens[token] = doubleKeyword;
    }

    /**
     * Return the inverse of a relational operator, so that "a op b" can be
     * rewritten as "b inverse(op) a"
     *
     * @param operator the operator whose inverse is required
     * @return the inverse operator
     */

    public static int inverse(int operator) {
        switch (operator) {
            case LT:
                return GT;
            case LE:
                return GE;
            case GT:
                return LT;
            case GE:
                return LE;
            case FLT:
                return FGT;
            case FLE:
                return FGE;
            case FGT:
                return FLT;
            case FGE:
                return FLE;
            default:
                return operator;
        }
    }

    /**
     * Return the negation of a relational operator, so that "a op b" can be
     * rewritten as not(b op' a)
     *
     * @param operator the operator to be negated
     * @return the negated operator
     */

    public static int negate(int operator) {
        switch (operator) {
            case FEQ:
                return FNE;
            case FNE:
                return FEQ;
            case FLT:
                return FGE;
            case FLE:
                return FGT;
            case FGT:
                return FLE;
            case FGE:
                return FLT;
            default:
                throw new IllegalArgumentException("Invalid operator for negate()");
        }
    }

    public static boolean isOrderedOperator(int operator) {
        return operator != FEQ && operator != FNE;
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy