com.github.harbby.dsxparser.antlr4.SqlBase.g4 Maven / Gradle / Ivy
The newest version!
grammar SqlBase;
@header {
package com.github.harbby.dsxparser.antlr4;
}
tokens {
DELIMITER
}
singleExpression
: expression EOF
;
sampleType
: BERNOULLI
| SYSTEM
;
expression
: booleanExpression
;
whenClause
: WHEN | (ELSE IF) condition=booleanExpression THEN result=expression
;
booleanExpression
: valueExpression predicate[$valueExpression.ctx]? #predicated
| NOT booleanExpression #logicalNot
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
;
// workaround for https://github.com/antlr/antlr4/issues/780
predicate[ParserRuleContext value]
: comparisonOperator right=valueExpression #comparison
| NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between
| NOT? IN '(' expression (',' expression)* ')' #inList
| IS NOT? NULL #nullPredicate
;
valueExpression
: primaryExpression #valueExpressionDefault
| operator=(MINUS | PLUS) valueExpression #arithmeticUnary
| left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary
| left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
| left=valueExpression CONCAT right=valueExpression #concatenation
;
setQuantifier
: DISTINCT
| ALL
;
primaryExpression
: NULL #nullLiteral
| identifier string #typeConstructor
| DOUBLE_PRECISION string #typeConstructor
| number #numericLiteral
| booleanValue #booleanLiteral
| string #stringLiteral
| BINARY_LITERAL #binaryLiteral
| '?' #parameter
| POSITION '(' valueExpression IN valueExpression ')' #position
| IF condition=booleanExpression THEN result=expression whenClause* ELSE elseExpression=expression #simpleIfThen
| qualifiedName '(' ASTERISK ')' #functionCall
| qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' #functionCall
| ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor
| value=primaryExpression '[' index=valueExpression ']' #subscript
| value=primaryExpression '[' pos=valueExpression ',' len=valueExpression ']' #substring2
| identifier #columnReference
| base=primaryExpression '.' fieldName=identifier #dereference
| name=LOCALTIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=LOCALTIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
| name=CURRENT_USER #currentUser
| name=CURRENT_PATH #currentPath
// | SUBSTRING '(' valueExpression FROM valueExpression (FOR valueExpression)? ')' #substring
| NORMALIZE '(' valueExpression (',' normalForm)? ')' #normalize
// | EXTRACT '(' identifier FROM valueExpression ')' #extract
| '(' expression ')' #parenthesizedExpression
;
string
: STRING #basicStringLiteral
| UNICODE_STRING (UESCAPE STRING)? #unicodeStringLiteral
;
comparisonOperator
: EQ | NEQ | LT | LTE | GT | GTE
;
comparisonQuantifier
: ALL | SOME | ANY
;
booleanValue
: TRUE | FALSE
;
normalForm
: NFD | NFC | NFKD | NFKC
;
type
: type ARRAY
| ARRAY '<' type '>'
| MAP '<' type ',' type '>'
| ROW '(' identifier type (',' identifier type)* ')'
| baseType ('(' typeParameter (',' typeParameter)* ')')?
;
typeParameter
: INTEGER_VALUE | type
;
baseType
: TIME_WITH_TIME_ZONE
| TIMESTAMP_WITH_TIME_ZONE
| DOUBLE_PRECISION
| identifier
;
pathElement
: identifier '.' identifier #qualifiedArgument
| identifier #unqualifiedArgument
;
pathSpecification
: pathElement (',' pathElement)*
;
privilege
: SELECT | DELETE | INSERT | identifier
;
qualifiedName
: identifier ('.' identifier)*
;
identifier
: IDENTIFIER #unquotedIdentifier
| QUOTED_IDENTIFIER #quotedIdentifier
| nonReserved #unquotedIdentifier
| BACKQUOTED_IDENTIFIER #backQuotedIdentifier
| DIGIT_IDENTIFIER #digitIdentifier
;
number
: DECIMAL_VALUE #decimalLiteral
| DOUBLE_VALUE #doubleLiteral
| INTEGER_VALUE #integerLiteral
;
nonReserved
// IMPORTANT: this rule must only contain tokens. Nested rules are not supported. See SqlParser.exitNonReserved
: ADD | ALL | ANALYZE | ANY | ARRAY | ASC | AT
| BERNOULLI
| CALL | CASCADE | CATALOGS | COLUMN | COLUMNS | COMMENT | COMMIT | COMMITTED | CURRENT
| DATA | DATE | DAY | DESC | DISTRIBUTED
| EXCLUDING | EXPLAIN
| FILTER | FIRST | FOLLOWING | FORMAT | FUNCTIONS
| GRANT | GRANTS | GRAPHVIZ
| HOUR
| IF | INCLUDING | INPUT | INTERVAL | ISOLATION
| LAST | LATERAL | LEVEL | LIMIT | LOGICAL
| MAP | MINUTE | MONTH
| NFC | NFD | NFKC | NFKD | NO | NULLIF | NULLS
| ONLY | OPTION | ORDINALITY | OUTPUT | OVER
| PARTITION | PARTITIONS | PATH | POSITION | PRECEDING | PRIVILEGES | PROPERTIES | PUBLIC
| RANGE | READ | RENAME | REPEATABLE | REPLACE | RESET | RESTRICT | REVOKE | ROLLBACK | ROW | ROWS
| SCHEMA | SCHEMAS | SECOND | SERIALIZABLE | SESSION | SET | SETS
| SHOW | SOME | START | STATS | SUBSTRING | SYSTEM
| TABLES | TABLESAMPLE | TEXT | TIME | TIMESTAMP | TO | TRANSACTION | TRY_CAST | TYPE
| UNBOUNDED | UNCOMMITTED | USE
| VALIDATE | VERBOSE | VIEW
| WORK | WRITE
| YEAR
| ZONE
// dsx column name need:
| IN | DELETE | INSERT | TABLE
;
IN: 'IN';
ADD: 'ADD';
ALL: 'ALL';
ALTER: 'ALTER';
ANALYZE: 'ANALYZE';
AND: 'AND';
ANY: 'ANY';
ARRAY: 'ARRAY';
AS: 'AS';
ASC: 'ASC';
AT: 'AT';
BERNOULLI: 'BERNOULLI';
BETWEEN: 'BETWEEN';
BY: 'BY';
CALL: 'CALL';
CASCADE: 'CASCADE';
CASE: 'CASE';
CAST: 'CAST';
CATALOGS: 'CATALOGS';
COLUMN: 'COLUMN';
COLUMNS: 'COLUMNS';
COMMENT: 'COMMENT';
COMMIT: 'COMMIT';
COMMITTED: 'COMMITTED';
CONSTRAINT: 'CONSTRAINT';
CREATE: 'CREATE';
CROSS: 'CROSS';
CUBE: 'CUBE';
CURRENT: 'CURRENT';
// CURRENT_DATE: 'CURRENT_DATE';
// CURRENT_TIME: 'CURRENT_TIME';
// CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
CURRENT_PATH: 'CURRENT_PATH';
CURRENT_USER: 'CURRENT_USER';
DATA: 'DATA';
DATE: 'DATE';
DAY: 'DAY';
DEALLOCATE: 'DEALLOCATE';
DELETE: 'DELETE';
DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
DISTINCT: 'DISTINCT';
DISTRIBUTED: 'DISTRIBUTED';
DROP: 'DROP';
ELSE: 'ELSE';
END: 'END';
ESCAPE: 'ESCAPE';
EXCEPT: 'EXCEPT';
EXCLUDING: 'EXCLUDING';
EXECUTE: 'EXECUTE';
EXISTS: 'EXISTS';
EXPLAIN: 'EXPLAIN';
// EXTRACT: 'EXTRACT';
FALSE: 'FALSE';
FILTER: 'FILTER';
FIRST: 'FIRST';
FOLLOWING: 'FOLLOWING';
FOR: 'FOR';
FORMAT: 'FORMAT';
FROM: 'FROM';
WINDOW: 'WINDOW';
SECONDS: 'SECONDS';
LATE: 'LATE';
DELAY: 'DELAY';
EVERY: 'EVERY';
TRIGGER: 'TRIGGER';
BATCH: 'BATCH';
FUNCTION: 'FUNCTION';
EXTEND: 'EXTEND';
WATERMARK: 'WATERMARK';
FULL: 'FULL';
FUNCTIONS: 'FUNCTIONS';
GRANT: 'GRANT';
GRANTS: 'GRANTS';
GRAPHVIZ: 'GRAPHVIZ';
HOUR: 'HOUR';
IF: 'IF';
INCLUDING: 'INCLUDING';
INNER: 'INNER';
INPUT: 'INPUT';
INSERT: 'INSERT';
INTERSECT: 'INTERSECT';
INTERVAL: 'INTERVAL';
INTO: 'INTO';
IS: 'IS';
ISOLATION: 'ISOLATION';
JOIN: 'JOIN';
LAST: 'LAST';
LATERAL: 'LATERAL';
LEVEL: 'LEVEL';
LIKE: 'LIKE';
LIMIT: 'LIMIT';
LOCALTIME: 'LOCALTIME';
LOCALTIMESTAMP: 'LOCALTIMESTAMP';
LOGICAL: 'LOGICAL';
MAP: 'MAP';
MINUTE: 'MINUTE';
MONTH: 'MONTH';
NATURAL: 'NATURAL';
NFC : 'NFC';
NFD : 'NFD';
NFKC : 'NFKC';
NFKD : 'NFKD';
NO: 'NO';
NORMALIZE: 'NORMALIZE';
NOT: 'NOT';
NULL: 'NULL';
NULLIF: 'NULLIF';
NULLS: 'NULLS';
ON: 'ON';
ONLY: 'ONLY';
OPTION: 'OPTION';
OR: 'OR';
ORDER: 'ORDER';
ORDINALITY: 'ORDINALITY';
OUTER: 'OUTER';
OUTPUT: 'OUTPUT';
OVER: 'OVER';
PARTITION: 'PARTITION';
PARTITIONS: 'PARTITIONS';
PATH: 'PATH';
POSITION: 'POSITION';
PRECEDING: 'PRECEDING';
PREPARE: 'PREPARE';
PRIVILEGES: 'PRIVILEGES';
PROPERTIES: 'PROPERTIES';
PUBLIC: 'PUBLIC';
RANGE: 'RANGE';
READ: 'READ';
RECURSIVE: 'RECURSIVE';
RENAME: 'RENAME';
REPEATABLE: 'REPEATABLE';
REPLACE: 'REPLACE';
RESET: 'RESET';
RESTRICT: 'RESTRICT';
REVOKE: 'REVOKE';
ROLLBACK: 'ROLLBACK';
ROLLUP: 'ROLLUP';
ROW: 'ROW';
ROWS: 'ROWS';
SCHEMA: 'SCHEMA';
SCHEMAS: 'SCHEMAS';
SECOND: 'SECOND';
SELECT: 'SELECT';
SERIALIZABLE: 'SERIALIZABLE';
SESSION: 'SESSION';
SET: 'SET';
SETS: 'SETS';
SHOW: 'SHOW';
SOME: 'SOME';
START: 'START';
STATS: 'STATS';
SUBSTRING: 'SUBSTRING';
SYSTEM: 'SYSTEM';
TABLE: 'TABLE';
TABLES: 'TABLES';
TABLESAMPLE: 'TABLESAMPLE';
TEXT: 'TEXT';
THEN: 'THEN';
TIME: 'TIME';
TIMESTAMP: 'TIMESTAMP';
TO: 'TO';
TRANSACTION: 'TRANSACTION';
TRUE: 'TRUE';
TRY_CAST: 'TRY_CAST';
TYPE: 'TYPE';
UESCAPE: 'UESCAPE';
UNBOUNDED: 'UNBOUNDED';
UNCOMMITTED: 'UNCOMMITTED';
UNION: 'UNION';
UNNEST: 'UNNEST';
USE: 'USE';
USING: 'USING';
VALIDATE: 'VALIDATE';
VALUES: 'VALUES';
VERBOSE: 'VERBOSE';
VIEW: 'VIEW';
WHEN: 'WHEN';
WHERE: 'WHERE';
WITH: 'WITH';
WORK: 'WORK';
WRITE: 'WRITE';
YEAR: 'YEAR';
ZONE: 'ZONE';
EQ : '=';
NEQ : '<>' | '!=';
LT : '<';
LTE : '<=';
GT : '>';
GTE : '>=';
PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/';
PERCENT: '%';
CONCAT: '||' | ':';
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
;
UNICODE_STRING
: 'U&\'' ( ~'\'' | '\'\'' )* '\''
;
// Note: we allow any character inside the binary literal and validate
// its a correct literal when the AST is being constructed. This
// allows us to provide more meaningful error messages to the user
BINARY_LITERAL
: 'X\'' (~'\'')* '\''
;
INTEGER_VALUE
: DIGIT+
;
DECIMAL_VALUE
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;
DOUBLE_VALUE
: DIGIT+ ('.' DIGIT*)? EXPONENT
| '.' DIGIT+ EXPONENT
;
// remove ':' -> (LETTER | '_') (LETTER | DIGIT | '_' | '@' | ':')*
IDENTIFIER
: ('@' | '$' | LETTER | '_') (LETTER | DIGIT | '_')*
;
// remove ':' -> DIGIT (LETTER | DIGIT | '_' | '@' | ':')+
DIGIT_IDENTIFIER
: DIGIT (LETTER | DIGIT | '_')+
;
QUOTED_IDENTIFIER
: '"' ( ~'"' | '""' )* '"'
;
BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;
TIME_WITH_TIME_ZONE
: 'TIME' WS 'WITH' WS 'TIME' WS 'ZONE'
;
TIMESTAMP_WITH_TIME_ZONE
: 'TIMESTAMP' WS 'WITH' WS 'TIME' WS 'ZONE'
;
DOUBLE_PRECISION
: 'DOUBLE' WS 'PRECISION'
;
fragment EXPONENT
: 'E' [+-]? DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment LETTER
: [A-Z]
;
SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;
BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
WS
: [ \r\n\t]+ -> channel(HIDDEN)
;
// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;