org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensions.g4 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-spark-extensions-3.5_2.13 Show documentation
Show all versions of iceberg-spark-extensions-3.5_2.13 Show documentation
A table format for huge analytic datasets
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* This file is an adaptation of Presto's and Spark's grammar files.
*/
grammar IcebergSqlExtensions;
@lexer::members {
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public boolean isValidDecimal() {
int nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
nextChar == '_') {
return false;
} else {
return true;
}
}
/**
* This method will be called when we see '/*' and try to match it as a bracketed comment.
* If the next character is '+', it should be parsed as hint later, and we cannot match
* it as a bracketed comment.
*
* Returns true if the next character is '+'.
*/
public boolean isHint() {
int nextChar = _input.LA(1);
if (nextChar == '+') {
return true;
} else {
return false;
}
}
}
singleStatement
: statement EOF
;
statement
: CALL multipartIdentifier '(' (callArgument (',' callArgument)*)? ')' #call
| ALTER TABLE multipartIdentifier ADD PARTITION FIELD transform (AS name=identifier)? #addPartitionField
| ALTER TABLE multipartIdentifier DROP PARTITION FIELD transform #dropPartitionField
| ALTER TABLE multipartIdentifier REPLACE PARTITION FIELD transform WITH transform (AS name=identifier)? #replacePartitionField
| ALTER TABLE multipartIdentifier WRITE writeSpec #setWriteDistributionAndOrdering
| ALTER TABLE multipartIdentifier SET IDENTIFIER_KW FIELDS fieldList #setIdentifierFields
| ALTER TABLE multipartIdentifier DROP IDENTIFIER_KW FIELDS fieldList #dropIdentifierFields
| ALTER TABLE multipartIdentifier createReplaceBranchClause #createOrReplaceBranch
| ALTER TABLE multipartIdentifier createReplaceTagClause #createOrReplaceTag
| ALTER TABLE multipartIdentifier DROP BRANCH (IF EXISTS)? identifier #dropBranch
| ALTER TABLE multipartIdentifier DROP TAG (IF EXISTS)? identifier #dropTag
;
createReplaceTagClause
: (CREATE OR)? REPLACE TAG identifier tagOptions
| CREATE TAG (IF NOT EXISTS)? identifier tagOptions
;
createReplaceBranchClause
: (CREATE OR)? REPLACE BRANCH identifier branchOptions
| CREATE BRANCH (IF NOT EXISTS)? identifier branchOptions
;
tagOptions
: (AS OF VERSION snapshotId)? (refRetain)?
;
branchOptions
: (AS OF VERSION snapshotId)? (refRetain)? (snapshotRetention)?
;
snapshotRetention
: WITH SNAPSHOT RETENTION minSnapshotsToKeep
| WITH SNAPSHOT RETENTION maxSnapshotAge
| WITH SNAPSHOT RETENTION minSnapshotsToKeep maxSnapshotAge
;
refRetain
: RETAIN number timeUnit
;
maxSnapshotAge
: number timeUnit
;
minSnapshotsToKeep
: number SNAPSHOTS
;
writeSpec
: (writeDistributionSpec | writeOrderingSpec)*
;
writeDistributionSpec
: DISTRIBUTED BY PARTITION
;
writeOrderingSpec
: LOCALLY? ORDERED BY order
| UNORDERED
;
callArgument
: expression #positionalArgument
| identifier '=>' expression #namedArgument
;
singleOrder
: order EOF
;
order
: fields+=orderField (',' fields+=orderField)*
| '(' fields+=orderField (',' fields+=orderField)* ')'
;
orderField
: transform direction=(ASC | DESC)? (NULLS nullOrder=(FIRST | LAST))?
;
transform
: multipartIdentifier #identityTransform
| transformName=identifier
'(' arguments+=transformArgument (',' arguments+=transformArgument)* ')' #applyTransform
;
transformArgument
: multipartIdentifier
| constant
;
expression
: constant
| stringMap
| stringArray
;
constant
: number #numericLiteral
| booleanValue #booleanLiteral
| STRING+ #stringLiteral
| identifier STRING #typeConstructor
;
stringMap
: MAP '(' constant (',' constant)* ')'
;
stringArray
: ARRAY '(' constant (',' constant)* ')'
;
booleanValue
: TRUE | FALSE
;
number
: MINUS? EXPONENT_VALUE #exponentLiteral
| MINUS? DECIMAL_VALUE #decimalLiteral
| MINUS? INTEGER_VALUE #integerLiteral
| MINUS? BIGINT_LITERAL #bigIntLiteral
| MINUS? SMALLINT_LITERAL #smallIntLiteral
| MINUS? TINYINT_LITERAL #tinyIntLiteral
| MINUS? DOUBLE_LITERAL #doubleLiteral
| MINUS? FLOAT_LITERAL #floatLiteral
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
;
multipartIdentifier
: parts+=identifier ('.' parts+=identifier)*
;
identifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| nonReserved #unquotedIdentifier
;
quotedIdentifier
: BACKQUOTED_IDENTIFIER
;
fieldList
: fields+=multipartIdentifier (',' fields+=multipartIdentifier)*
;
nonReserved
: ADD | ALTER | AS | ASC | BRANCH | BY | CALL | CREATE | DAYS | DESC | DROP | EXISTS | FIELD | FIRST | HOURS | IF | LAST | NOT | NULLS | OF | OR | ORDERED | PARTITION | TABLE | WRITE
| DISTRIBUTED | LOCALLY | MINUTES | MONTHS | UNORDERED | REPLACE | RETAIN | VERSION | WITH | IDENTIFIER_KW | FIELDS | SET | SNAPSHOT | SNAPSHOTS
| TAG | TRUE | FALSE
| MAP
;
snapshotId
: number
;
numSnapshots
: number
;
timeUnit
: DAYS
| HOURS
| MINUTES
;
ADD: 'ADD';
ALTER: 'ALTER';
AS: 'AS';
ASC: 'ASC';
BRANCH: 'BRANCH';
BY: 'BY';
CALL: 'CALL';
DAYS: 'DAYS';
DESC: 'DESC';
DISTRIBUTED: 'DISTRIBUTED';
DROP: 'DROP';
EXISTS: 'EXISTS';
FIELD: 'FIELD';
FIELDS: 'FIELDS';
FIRST: 'FIRST';
HOURS: 'HOURS';
IF : 'IF';
LAST: 'LAST';
LOCALLY: 'LOCALLY';
MINUTES: 'MINUTES';
MONTHS: 'MONTHS';
CREATE: 'CREATE';
NOT: 'NOT';
NULLS: 'NULLS';
OF: 'OF';
OR: 'OR';
ORDERED: 'ORDERED';
PARTITION: 'PARTITION';
REPLACE: 'REPLACE';
RETAIN: 'RETAIN';
RETENTION: 'RETENTION';
IDENTIFIER_KW: 'IDENTIFIER';
SET: 'SET';
SNAPSHOT: 'SNAPSHOT';
SNAPSHOTS: 'SNAPSHOTS';
TABLE: 'TABLE';
TAG: 'TAG';
UNORDERED: 'UNORDERED';
VERSION: 'VERSION';
WITH: 'WITH';
WRITE: 'WRITE';
TRUE: 'TRUE';
FALSE: 'FALSE';
MAP: 'MAP';
ARRAY: 'ARRAY';
PLUS: '+';
MINUS: '-';
STRING
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
| '"' ( ~('"'|'\\') | ('\\' .) )* '"'
;
BIGINT_LITERAL
: DIGIT+ 'L'
;
SMALLINT_LITERAL
: DIGIT+ 'S'
;
TINYINT_LITERAL
: DIGIT+ 'Y'
;
INTEGER_VALUE
: DIGIT+
;
EXPONENT_VALUE
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
;
DECIMAL_VALUE
: DECIMAL_DIGITS {isValidDecimal()}?
;
FLOAT_LITERAL
: DIGIT+ EXPONENT? 'F'
| DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
;
DOUBLE_LITERAL
: DIGIT+ EXPONENT? 'D'
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
;
BIGDECIMAL_LITERAL
: DIGIT+ EXPONENT? 'BD'
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
;
IDENTIFIER
: (LETTER | DIGIT | '_')+
;
BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;
fragment DECIMAL_DIGITS
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;
fragment EXPONENT
: 'E' [+-]? DIGIT+
;
fragment DIGIT
: [0-9]
;
fragment LETTER
: [A-Z]
;
SIMPLE_COMMENT
: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
;
BRACKETED_COMMENT
: '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
;
WS
: [ \r\n\t]+ -> channel(HIDDEN)
;
// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;