org.partiql.lang.syntax.LexerConstants.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of partiql-lang-kotlin Show documentation
Show all versions of partiql-lang-kotlin Show documentation
An implementation of PartiQL for the JVM written in Kotlin.
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at:
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
* language governing permissions and limitations under the License.
*/
@file:Suppress("DEPRECATION")
package org.partiql.lang.syntax
import org.partiql.lang.syntax.TokenType.KEYWORD
import org.partiql.lang.syntax.TokenType.OPERATOR
@JvmField internal val TRIM_SPECIFICATION_KEYWORDS = setOf("both", "leading", "trailing")
internal enum class DateTimePart {
YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, TIMEZONE_HOUR, TIMEZONE_MINUTE
}
internal val DATE_TIME_PART_KEYWORDS: Set = DateTimePart.values()
.map { it.toString().toLowerCase() }.toSet()
/** All SQL-92 keywords. */
@JvmField internal val SQL92_KEYWORDS = setOf(
"absolute",
"action",
"add",
"all",
"allocate",
"alter",
"and",
"any",
"are",
"as",
"asc",
"assertion",
"at",
"authorization",
"avg",
"begin",
"between",
"bit",
"bit_length",
"by",
"cascade",
"cascaded",
"case",
"cast",
"catalog",
"char",
"character",
"character_length",
"char_length",
"check",
"close",
"coalesce",
"collate",
"collation",
"column",
"commit",
"connect",
"connection",
"constraint",
"constraints",
"continue",
"convert",
"corresponding",
"count",
"create",
"cross",
"current",
"current_date",
"current_time",
"current_timestamp",
"current_user",
"cursor",
"date",
"deallocate",
"dec",
"decimal",
"declare",
"default",
"deferrable",
"deferred",
"delete",
"desc",
"describe",
"descriptor",
"diagnostics",
"disconnect",
"distinct",
"domain",
"double",
"drop",
"else",
"end",
"end-exec",
"escape",
"except",
"exception",
"exec",
"execute",
"exists",
"external",
"extract",
"date_add",
"date_diff",
"false",
"fetch",
"first",
"float",
"for",
"foreign",
"found",
"from",
"full",
"get",
"global",
"go",
"goto",
"grant",
"group",
"having",
"identity",
"immediate",
"in",
"indicator",
"initially",
"inner",
"input",
"insensitive",
"insert",
"int",
"integer",
"intersect",
"interval",
"into",
"is",
"isolation",
"join",
"key",
"language",
"last",
"left",
"level",
"like",
"local",
"lower",
"match",
"max",
"min",
"module",
"names",
"national",
"natural",
"nchar",
"next",
"no",
"not",
"null",
"nullif",
"coalesce",
"numeric",
"octet_length",
"of",
"on",
"only",
"open",
"option",
"or",
"order",
"outer",
"output",
"overlaps",
"pad",
"partial",
"position",
"precision",
"prepare",
"preserve",
"primary",
"prior",
"privileges",
"procedure",
"public",
"read",
"real",
"references",
"relative",
"restrict",
"revoke",
"right",
"rollback",
"rows",
"schema",
"scroll",
"section",
"select",
"session",
"session_user",
"set",
"size",
"smallint",
"some",
"space",
"sql",
"sqlcode",
"sqlerror",
"sqlstate",
"substring",
"sum",
"system_user",
"table",
"temporary",
"then",
"time",
"timestamp",
"to",
"transaction",
"translate",
"translation",
"trim",
"true",
"union",
"unique",
"unknown",
"update",
"upper",
"usage",
"user",
"using",
"value",
"values",
"varchar",
"varying",
"view",
"when",
"whenever",
"where",
"with",
"work",
"write",
"zone"
)
// Note: DATE_TIME_PART_KEYWORDs are not keywords in the traditional sense--they are only keywords within
// the context of the DATE_ADD, DATE_DIFF and EXTRACT functions, for which [SqlParser] has special support.
// Similarly, TRIM_SPECIFICATION_KEYWORDS are only keywords within the context of the TRIM function.
/** PartiQL additional keywords. */
@JvmField internal val SQLPP_KEYWORDS = setOf(
"can_cast",
"can_lossless_cast",
"missing",
"pivot",
"unpivot",
"limit",
"offset",
"tuple",
"remove",
"index",
"let",
// Type names
"smallint",
"integer2",
"int2",
"integer4",
"int4",
"integer8",
"int8",
"bigint",
"conflict",
"do",
"nothing",
"returning",
"modified",
"all",
"new",
"old",
"let",
// Ion type names
// null
"bool",
"boolean",
// int
// float
// decimal
// timestamp
"string",
"symbol",
"clob",
"blob",
"struct",
"list",
"sexp",
"bag"
)
/** All Keywords. */
@JvmField internal val KEYWORDS = SQL92_KEYWORDS union SQLPP_KEYWORDS
/** Keywords that are aliases for type keywords. */
@JvmField internal val TYPE_ALIASES = mapOf(
"varchar" to "character_varying",
"char" to "character",
"dec" to "decimal",
"int" to "integer",
"int2" to "smallint",
"integer2" to "smallint",
"int4" to "integer4",
"int8" to "integer8",
"bigint" to "integer8",
"bool" to "boolean"
)
/**
* Indicates the keywords (and pseudo keywords) the indicate types that map to core types.
* Some of these types (e.g. VARCHAR) requires a parameters, but many implementations
* don't require that.
*/
@JvmField internal val CORE_TYPE_NAME_ARITY_MAP = mapOf(
"missing" to 0..0, // PartiQL
"null" to 0..0, // Ion
"boolean" to 0..0, // Ion & SQL-99
"smallint" to 0..0, // SQL-92
"integer4" to 0..0, // PartiQL
"integer8" to 0..0, // PartiQL
"integer" to 0..0, // Ion & SQL-92
"float" to 0..1, // Ion & SQL-92
"real" to 0..0, // SQL-92
"double_precision" to 0..0, // SQL-92
"decimal" to 0..2, // Ion & SQL-92
"numeric" to 0..2, // SQL-92
"timestamp" to 0..0, // Ion & SQL-92
"date" to 0..0, // PartiQL & SQL-92
"time" to 0..1, // PartiQL & SQL-92
"character" to 0..1, // SQL-92
"character_varying" to 0..1, // SQL-92
"string" to 0..0, // Ion
"symbol" to 0..0, // Ion
"clob" to 0..0, // Ion
"blob" to 0..0, // Ion
"struct" to 0..0, // Ion
"tuple" to 0..0, // PartiQL
"list" to 0..0, // Ion
"sexp" to 0..0, // Ion
"bag" to 0..0 // PartiQL
// TODO SQL-92 types BIT, BIT VARYING, DATE, TIME, INTERVAL and TIMEZONE qualifier
)
/** Indicates the keywords that indicate special union types. */
@JvmField internal val UNION_TYPE_NAME_ARITY_MAP = mapOf(
"any" to 0..0,
/* ElasticSearch Data Types */
"es_any" to 0..0
)
/** All type names and their arity. */
@JvmField internal val ALL_TYPE_NAME_ARITY_MAP = CORE_TYPE_NAME_ARITY_MAP + UNION_TYPE_NAME_ARITY_MAP
/** Keywords that are normal function names. */
@JvmField internal val FUNCTION_NAME_KEYWORDS = setOf(
"exists",
// aggregate functions
"count",
"avg",
"max",
"min",
"sum",
// string functions
// POSITION, SUBSTRING, TRIM, EXTRACT, TRANSLATE, CONVERT have special syntax
"substring",
"char_length",
"character_length",
"octet_length",
"bit_length",
"upper",
"lower",
// functions
"size",
// sexp/list/bag constructors as functions
"sexp",
"list",
"bag"
)
/** Aggregates functions. */
@JvmField val STANDARD_AGGREGATE_FUNCTIONS = setOf(
"count",
"avg",
"max",
"min",
"sum"
)
@JvmField internal val BASE_DML_KEYWORDS = setOf("insert_into", "set", "remove")
/**
* These reserved keywords cannot be used as identifiers for items in `select list`.
* Note that this list is not exhaustive.
*/
@JvmField internal val RESERVED_KEYWORDS = BASE_DML_KEYWORDS + setOf("update", "delete", "select", "from", "where")
@JvmField internal val BOOLEAN_KEYWORDS = setOf("true", "false")
/** Operator renames for the AST. */
@JvmField internal val OPERATOR_ALIASES = mapOf(
"!=" to "<>"
)
/** Operators that parse as infix, but have special parsing rules. */
@JvmField internal val SPECIAL_INFIX_OPERATORS = setOf(
"between", "not_between",
"like", "not_like" // optionally a ternary operator when `ESCAPE` is present
)
/** Binary operators with verbatim lexical token equivalents. */
@JvmField internal val SINGLE_LEXEME_BINARY_OPERATORS = setOf(
"+", "-", "/", "%", "*",
"<", "<=", ">", ">=", "=", "<>",
"||",
"and", "or",
"is", "in",
"union", "except", "intersect"
)
/** Tokens comprising multiple lexemes (**happens before** keyword aliasing). */
@JvmField internal val MULTI_LEXEME_TOKEN_MAP = mapOf(
listOf("not", "in") to ("not_in" to OPERATOR),
listOf("is", "not") to ("is_not" to OPERATOR),
listOf("not", "between") to ("not_between" to OPERATOR),
listOf("union", "all") to ("union_all" to OPERATOR),
listOf("intersect", "all") to ("intersect_all" to OPERATOR),
listOf("except", "all") to ("except_all" to OPERATOR),
listOf("union", "distinct") to ("union_distinct" to OPERATOR),
listOf("intersect", "distinct") to ("intersect_distinct" to OPERATOR),
listOf("except", "distinct") to ("except_distinct" to OPERATOR),
listOf("outer", "union") to ("outer_union" to OPERATOR),
listOf("outer", "intersect") to ("outer_intersect" to OPERATOR),
listOf("outer", "except") to ("outer_except" to OPERATOR),
listOf("outer_union", "all") to ("outer_union_all" to OPERATOR),
listOf("outer_intersect", "all") to ("outer_intersect_all" to OPERATOR),
listOf("outer_except", "all") to ("outer_except_all" to OPERATOR),
listOf("outer_union", "distinct") to ("outer_union_distinct" to OPERATOR),
listOf("outer_intersect", "distinct") to ("outer_intersect_distinct" to OPERATOR),
listOf("outer_except", "distinct") to ("outer_except_distinct" to OPERATOR),
listOf("character", "varying") to ("character_varying" to KEYWORD),
listOf("double", "precision") to ("double_precision" to KEYWORD),
listOf("not", "like") to ("not_like" to OPERATOR),
listOf("cross", "join") to ("cross_join" to KEYWORD),
listOf("inner", "join") to ("inner_join" to KEYWORD),
listOf("inner", "cross", "join") to ("cross_join" to KEYWORD),
listOf("left", "join") to ("left_join" to KEYWORD),
listOf("left", "outer", "join") to ("left_join" to KEYWORD),
listOf("left", "cross", "join") to ("left_cross_join" to KEYWORD),
listOf(
"left", "outer",
"cross", "join"
) to ("left_cross_join" to KEYWORD),
listOf("right", "join") to ("right_join" to KEYWORD),
listOf("right", "outer", "join") to ("right_join" to KEYWORD),
listOf("right", "cross", "join") to ("right_cross_join" to KEYWORD),
listOf(
"right", "outer",
"cross", "join"
) to ("right_cross_join" to KEYWORD),
listOf("full", "join") to ("outer_join" to KEYWORD),
listOf("outer", "join") to ("outer_join" to KEYWORD),
listOf("full", "outer", "join") to ("outer_join" to KEYWORD),
listOf("full", "cross", "join") to ("outer_cross_join" to KEYWORD),
listOf("outer", "cross", "join") to ("outer_cross_join" to KEYWORD),
listOf(
"full", "outer",
"cross", "join"
) to ("outer_cross_join" to KEYWORD),
listOf("insert", "into") to ("insert_into" to KEYWORD),
listOf("on", "conflict") to ("on_conflict" to KEYWORD),
listOf("do", "nothing") to ("do_nothing" to KEYWORD),
listOf("modified", "old") to ("modified_old" to KEYWORD),
listOf("modified", "new") to ("modified_new" to KEYWORD),
listOf("all", "old") to ("all_old" to KEYWORD),
listOf("all", "new") to ("all_new" to KEYWORD)
)
@JvmField internal val MULTI_LEXEME_MIN_LENGTH = MULTI_LEXEME_TOKEN_MAP.keys.minOf { it.size }
@JvmField internal val MULTI_LEXEME_MAX_LENGTH = MULTI_LEXEME_TOKEN_MAP.keys.maxOf { it.size }
@JvmField internal val MULTI_LEXEME_BINARY_OPERATORS =
MULTI_LEXEME_TOKEN_MAP.values.filter {
it.second == TokenType.OPERATOR && it.first !in SPECIAL_INFIX_OPERATORS
}.map { it.first }
/** Binary operators. */
@JvmField internal val BINARY_OPERATORS =
SINGLE_LEXEME_BINARY_OPERATORS + MULTI_LEXEME_BINARY_OPERATORS
/** Unary operators. */
@JvmField internal val UNARY_OPERATORS = setOf(
"+", "-", "not"
)
/** Operators specific to the `MATCH` clause. */
@JvmField internal val MATCH_OPERATORS = setOf(
"~"
)
/** All operators with special parsing rules. */
@JvmField internal val SPECIAL_OPERATORS = SPECIAL_INFIX_OPERATORS + setOf(
"@"
)
@JvmField internal val ALL_SINGLE_LEXEME_OPERATORS =
SINGLE_LEXEME_BINARY_OPERATORS + UNARY_OPERATORS + SPECIAL_OPERATORS + MATCH_OPERATORS
@JvmField internal val ALL_OPERATORS =
BINARY_OPERATORS + UNARY_OPERATORS + SPECIAL_OPERATORS + MATCH_OPERATORS
/**
* Operator precedence groups
*/
enum class OperatorPrecedenceGroups(val precedence: Int) {
SET(5),
SELECT(6),
LOGICAL_OR(10),
LOGICAL_AND(20),
LOGICAL_NOT(30),
EQUITY(40),
COMPARISON(50),
ADDITION(60),
MULTIPLY(70)
}
/**
* Precedence rank integer is ascending with higher precedence and is in terms of the
* un-aliased names of the operators.
*/
@JvmField internal val OPERATOR_PRECEDENCE = mapOf(
// set operator group
"union" to OperatorPrecedenceGroups.SET.precedence,
"union_distinct" to OperatorPrecedenceGroups.SET.precedence,
"union_all" to OperatorPrecedenceGroups.SET.precedence,
"intersect" to OperatorPrecedenceGroups.SET.precedence,
"intersect_distinct" to OperatorPrecedenceGroups.SET.precedence,
"intersect_all" to OperatorPrecedenceGroups.SET.precedence,
"except" to OperatorPrecedenceGroups.SET.precedence,
"except_distinct" to OperatorPrecedenceGroups.SET.precedence,
"except_all" to OperatorPrecedenceGroups.SET.precedence,
"outer_union" to OperatorPrecedenceGroups.SET.precedence,
"outer_union_distinct" to OperatorPrecedenceGroups.SET.precedence,
"outer_union_all" to OperatorPrecedenceGroups.SET.precedence,
"outer_intersect" to OperatorPrecedenceGroups.SET.precedence,
"outer_intersect_distinct" to OperatorPrecedenceGroups.SET.precedence,
"outer_intersect_all" to OperatorPrecedenceGroups.SET.precedence,
"outer_except" to OperatorPrecedenceGroups.SET.precedence,
"outer_except_distinct" to OperatorPrecedenceGroups.SET.precedence,
"outer_except_all" to OperatorPrecedenceGroups.SET.precedence,
// logical group
"or" to OperatorPrecedenceGroups.LOGICAL_OR.precedence,
"and" to OperatorPrecedenceGroups.LOGICAL_AND.precedence,
"not" to OperatorPrecedenceGroups.LOGICAL_NOT.precedence,
// equality group (TODO add other morphemes of equality/non-equality)
"=" to OperatorPrecedenceGroups.EQUITY.precedence,
"<>" to OperatorPrecedenceGroups.EQUITY.precedence,
"is" to OperatorPrecedenceGroups.EQUITY.precedence,
"is_not" to OperatorPrecedenceGroups.EQUITY.precedence,
"in" to OperatorPrecedenceGroups.EQUITY.precedence,
"not_in" to OperatorPrecedenceGroups.EQUITY.precedence,
// comparison group
"<" to OperatorPrecedenceGroups.COMPARISON.precedence,
"<=" to OperatorPrecedenceGroups.COMPARISON.precedence,
">" to OperatorPrecedenceGroups.COMPARISON.precedence,
">=" to OperatorPrecedenceGroups.COMPARISON.precedence,
"between" to OperatorPrecedenceGroups.COMPARISON.precedence, // note that this **must** be above 'AND'
"not_between" to OperatorPrecedenceGroups.COMPARISON.precedence, // note that this **must** be above 'AND'
"like" to OperatorPrecedenceGroups.COMPARISON.precedence,
"not_like" to OperatorPrecedenceGroups.COMPARISON.precedence,
// the addition group
"+" to OperatorPrecedenceGroups.ADDITION.precedence,
"-" to OperatorPrecedenceGroups.ADDITION.precedence,
"||" to OperatorPrecedenceGroups.ADDITION.precedence,
// multiply group (TODO add exponentiation)
"*" to OperatorPrecedenceGroups.MULTIPLY.precedence,
"/" to OperatorPrecedenceGroups.MULTIPLY.precedence,
"%" to OperatorPrecedenceGroups.MULTIPLY.precedence
)
//
// Character Classes
// Strings as place holders for immutable character arrays
//
private fun allCase(chars: String) = chars.toLowerCase() + chars.toUpperCase()
internal const val SIGN_CHARS = "+-"
internal const val NON_ZERO_DIGIT_CHARS = "123456789"
internal const val DIGIT_CHARS = "0" + NON_ZERO_DIGIT_CHARS
@JvmField internal val E_NOTATION_CHARS = allCase("E")
internal const val NON_OVERLOADED_OPERATOR_CHARS = "^%=@+~"
internal const val OPERATOR_CHARS = NON_OVERLOADED_OPERATOR_CHARS + "-*/<>|!"
@JvmField internal val ALPHA_CHARS = allCase("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
@JvmField internal val IDENT_START_CHARS = "_\$" + ALPHA_CHARS
@JvmField internal val IDENT_CONTINUE_CHARS = IDENT_START_CHARS + DIGIT_CHARS
internal const val NL_WHITESPACE_CHARS = "\u000D\u000A" // CR, LF
internal const val NON_NL_WHITESPACE_CHARS = "\u0009\u000B\u000C\u0020" // TAB, VT, FF, SPACE
internal const val ALL_WHITESPACE_CHARS = NL_WHITESPACE_CHARS + NON_NL_WHITESPACE_CHARS
internal const val DOUBLE_QUOTE_CHARS = "\""
internal const val SINGLE_QUOTE_CHARS = "'"
internal const val BACKTICK_CHARS = "`"