com.mysql.cj.xdevapi.ExprParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mysql-connector-j Show documentation
Show all versions of mysql-connector-j Show documentation
JDBC Type 4 driver for MySQL.
/*
* Copyright (c) 2015, 2024, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by
* the Free Software Foundation.
*
* This program is designed to work with certain software that is licensed under separate terms, as designated in a particular file or component or in
* included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the
* separately licensed software that they have either included with the program or referenced in the documentation.
*
* Without limiting anything contained in the foregoing, this file, which is part of MySQL Connector/J, is also subject to the Universal FOSS Exception,
* version 1.0, a copy of which can be found at http://oss.oracle.com/licenses/universal-foss-exception.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details.
*
* You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.mysql.cj.xdevapi;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import com.mysql.cj.exceptions.WrongArgumentException;
import com.mysql.cj.x.protobuf.MysqlxCrud.Column;
import com.mysql.cj.x.protobuf.MysqlxCrud.Order;
import com.mysql.cj.x.protobuf.MysqlxCrud.Projection;
import com.mysql.cj.x.protobuf.MysqlxExpr.Array;
import com.mysql.cj.x.protobuf.MysqlxExpr.ColumnIdentifier;
import com.mysql.cj.x.protobuf.MysqlxExpr.DocumentPathItem;
import com.mysql.cj.x.protobuf.MysqlxExpr.Expr;
import com.mysql.cj.x.protobuf.MysqlxExpr.FunctionCall;
import com.mysql.cj.x.protobuf.MysqlxExpr.Identifier;
import com.mysql.cj.x.protobuf.MysqlxExpr.Object;
import com.mysql.cj.x.protobuf.MysqlxExpr.Object.ObjectField;
import com.mysql.cj.x.protobuf.MysqlxExpr.Operator;
// Grammar includes precedence & associativity of binary operators:
// (^ refers to the preceding production)
// (c.f. https://dev.mysql.com/doc/refman/8.0/en/operator-precedence.html)
//
// AtomicExpr: [Unary]OpExpr | Identifier | FunctionCall | '(' Expr ')'
//
// AddSubIntervalExpr: ^ (ADD/SUB ^)* | (ADD/SUB 'INTERVAL' ^ UNIT)*
//
// MulDivExpr: ^ (STAR/SLASH/MOD ^)*
//
// ShiftExpr: ^ (LSHIFT/RSHIFT ^)*
//
// BitExpr: ^ (BITAND/BITOR/BITXOR ^)*
//
// CompExpr: ^ (GE/GT/LE/LT/EQ/NE ^)*
//
// IlriExpr(ilri=IS/LIKE/REGEXP/IN/BETWEEN): ^ (ilri ^)
//
// AndExpr: ^ (AND ^)*
//
// OrExpr: ^ (OR ^)*
//
// Expr: ^
//
/**
* Expression parser for X protocol.
*/
public class ExprParser {
private static HashMap escapeChars = new HashMap<>();
static { // Replicated from JsonParser.EscapeChar
escapeChars.put('"', '"');
escapeChars.put('\'', '\'');
escapeChars.put('`', '`');
escapeChars.put('\\', '\\');
escapeChars.put('/', '/');
escapeChars.put('b', '\b');
escapeChars.put('f', '\f');
escapeChars.put('n', '\n');
escapeChars.put('r', '\r');
escapeChars.put('t', '\t');
}
/** String being parsed. */
String string;
/** Token stream produced by lexer. */
List tokens = new ArrayList<>();
/** Parser's position in token stream. */
int tokenPos = 0;
/**
* Mapping of names to positions for named placeholders. Used for both string values ":arg" and numeric values ":2".
*/
Map placeholderNameToPosition = new HashMap<>();
/** Number of positional placeholders. */
int positionalPlaceholderCount = 0;
/** Are relational columns identifiers allowed? */
private boolean allowRelationalColumns;
/**
* Constructor.
*
* @param s
* expression string to parse
*/
public ExprParser(String s) {
this(s, true);
}
/**
* Constructor.
*
* @param s
* expression string to parse
* @param allowRelationalColumns
* are relational columns identifiers allowed?
*/
public ExprParser(String s, boolean allowRelationalColumns) {
this.string = s;
lex();
// java.util.stream.IntStream.range(0, this.tokens.size()).forEach(i -> System.err.println("[" + i + "] = " + this.tokens.get(i)));
this.allowRelationalColumns = allowRelationalColumns;
}
/**
* Token types used by the lexer.
*/
private enum TokenType {
NOT, AND, ANDAND, OR, OROR, XOR, IS, LPAREN, RPAREN, LSQBRACKET, RSQBRACKET, BETWEEN, TRUE, NULL, FALSE, IN, LIKE, INTERVAL, REGEXP, ESCAPE, IDENT,
LSTRING, LNUM_INT, LNUM_DOUBLE, DOT, DOLLAR, COMMA, EQ, NE, GT, GE, LT, LE, BITAND, BITOR, BITXOR, LSHIFT, RSHIFT, PLUS, MINUS, STAR, SLASH, HEX, BIN,
NEG, BANG, EROTEME, MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR, SECOND_MICROSECOND, MINUTE_MICROSECOND, MINUTE_SECOND,
HOUR_MICROSECOND, HOUR_SECOND, HOUR_MINUTE, DAY_MICROSECOND, DAY_SECOND, DAY_MINUTE, DAY_HOUR, YEAR_MONTH, DOUBLESTAR, MOD, COLON, ORDERBY_ASC,
ORDERBY_DESC, AS, LCURLY, RCURLY, DOTSTAR, CAST, DECIMAL, UNSIGNED, SIGNED, INTEGER, DATE, TIME, DATETIME, CHAR, BINARY, JSON, COLDOCPATH, OVERLAPS
}
/**
* Token. Includes type and string value of the token.
*/
static class Token {
TokenType type;
String value;
public Token(TokenType x, char c) {
this.type = x;
this.value = new String(new char[] { c });
}
public Token(TokenType t, String v) {
this.type = t;
this.value = v;
}
@Override
public String toString() {
if (this.type == TokenType.IDENT || this.type == TokenType.LNUM_INT || this.type == TokenType.LNUM_DOUBLE || this.type == TokenType.LSTRING) {
return this.type.toString() + "(" + this.value + ")";
}
return this.type.toString();
}
}
/** Mapping of reserved words to token types. */
static Map reservedWords = new HashMap<>();
static {
reservedWords.put("and", TokenType.AND);
reservedWords.put("or", TokenType.OR);
reservedWords.put("xor", TokenType.XOR);
reservedWords.put("is", TokenType.IS);
reservedWords.put("not", TokenType.NOT);
reservedWords.put("like", TokenType.LIKE);
reservedWords.put("in", TokenType.IN);
reservedWords.put("regexp", TokenType.REGEXP);
reservedWords.put("between", TokenType.BETWEEN);
reservedWords.put("interval", TokenType.INTERVAL);
reservedWords.put("escape", TokenType.ESCAPE);
reservedWords.put("div", TokenType.SLASH);
reservedWords.put("hex", TokenType.HEX);
reservedWords.put("bin", TokenType.BIN);
reservedWords.put("true", TokenType.TRUE);
reservedWords.put("false", TokenType.FALSE);
reservedWords.put("null", TokenType.NULL);
reservedWords.put("microsecond", TokenType.MICROSECOND);
reservedWords.put("second", TokenType.SECOND);
reservedWords.put("minute", TokenType.MINUTE);
reservedWords.put("hour", TokenType.HOUR);
reservedWords.put("day", TokenType.DAY);
reservedWords.put("week", TokenType.WEEK);
reservedWords.put("month", TokenType.MONTH);
reservedWords.put("quarter", TokenType.QUARTER);
reservedWords.put("year", TokenType.YEAR);
reservedWords.put("second_microsecond", TokenType.SECOND_MICROSECOND);
reservedWords.put("minute_microsecond", TokenType.MINUTE_MICROSECOND);
reservedWords.put("minute_second", TokenType.MINUTE_SECOND);
reservedWords.put("hour_microsecond", TokenType.HOUR_MICROSECOND);
reservedWords.put("hour_second", TokenType.HOUR_SECOND);
reservedWords.put("hour_minute", TokenType.HOUR_MINUTE);
reservedWords.put("day_microsecond", TokenType.DAY_MICROSECOND);
reservedWords.put("day_second", TokenType.DAY_SECOND);
reservedWords.put("day_minute", TokenType.DAY_MINUTE);
reservedWords.put("day_hour", TokenType.DAY_HOUR);
reservedWords.put("year_month", TokenType.YEAR_MONTH);
reservedWords.put("asc", TokenType.ORDERBY_ASC);
reservedWords.put("desc", TokenType.ORDERBY_DESC);
reservedWords.put("as", TokenType.AS);
reservedWords.put("cast", TokenType.CAST);
reservedWords.put("decimal", TokenType.DECIMAL);
reservedWords.put("unsigned", TokenType.UNSIGNED);
reservedWords.put("signed", TokenType.SIGNED);
reservedWords.put("integer", TokenType.INTEGER);
reservedWords.put("date", TokenType.DATE);
reservedWords.put("time", TokenType.TIME);
reservedWords.put("datetime", TokenType.DATETIME);
reservedWords.put("char", TokenType.CHAR);
reservedWords.put("binary", TokenType.BINARY);
reservedWords.put("json", TokenType.BINARY);
reservedWords.put("overlaps", TokenType.OVERLAPS);
}
/**
* Does the next character equal the given character? (respects bounds)
*
* @param i
* The current position in the string
* @param c
* character to compare with
* @return true if equals
*/
boolean nextCharEquals(int i, char c) {
return i + 1 < this.string.length() && this.string.charAt(i + 1) == c;
}
/**
* Helper function to match integer or floating point numbers. This function should be called when the position is on the first character of the number (a
* digit or '.').
*
* @param i
* The current position in the string
* @return the next position in the string after the number.
*/
private int lexNumber(int i) {
boolean isInt = true;
char c;
int start = i;
for (; i < this.string.length(); ++i) {
c = this.string.charAt(i);
if (c == '.') {
isInt = false;
} else if (c == 'e' || c == 'E') {
isInt = false;
if (nextCharEquals(i, '-') || nextCharEquals(i, '+')) {
i++;
}
} else if (!Character.isDigit(c)) {
break;
}
}
if (isInt) {
this.tokens.add(new Token(TokenType.LNUM_INT, this.string.substring(start, i)));
} else {
this.tokens.add(new Token(TokenType.LNUM_DOUBLE, this.string.substring(start, i)));
}
--i;
return i;
}
/**
* Lexer for X DevAPI expression language.
*/
void lex() {
for (int i = 0; i < this.string.length(); ++i) {
int start = i; // for routines that consume more than one char
char c = this.string.charAt(i);
if (Character.isWhitespace(c)) {
// ignore
} else if (Character.isDigit(c)) {
i = lexNumber(i);
} else if (!(c == '_' || Character.isUnicodeIdentifierStart(c))) {
// non-identifier, e.g. operator or quoted literal
switch (c) {
case ':':
this.tokens.add(new Token(TokenType.COLON, c));
break;
case '+':
this.tokens.add(new Token(TokenType.PLUS, c));
break;
case '-':
if (nextCharEquals(i, '>')) {
i++;
this.tokens.add(new Token(TokenType.COLDOCPATH, "->"));
} else {
this.tokens.add(new Token(TokenType.MINUS, c));
}
break;
case '*':
if (nextCharEquals(i, '*')) {
i++;
this.tokens.add(new Token(TokenType.DOUBLESTAR, "**"));
} else {
this.tokens.add(new Token(TokenType.STAR, c));
}
break;
case '/':
this.tokens.add(new Token(TokenType.SLASH, c));
break;
case '$':
this.tokens.add(new Token(TokenType.DOLLAR, c));
break;
case '%':
this.tokens.add(new Token(TokenType.MOD, c));
break;
case '=':
if (nextCharEquals(i, '=')) {
i++;
}
this.tokens.add(new Token(TokenType.EQ, "=="));
break;
case '&':
if (nextCharEquals(i, '&')) {
i++;
this.tokens.add(new Token(TokenType.ANDAND, "&&"));
} else {
this.tokens.add(new Token(TokenType.BITAND, c));
}
break;
case '|':
if (nextCharEquals(i, '|')) {
i++;
this.tokens.add(new Token(TokenType.OROR, "||"));
} else {
this.tokens.add(new Token(TokenType.BITOR, c));
}
break;
case '^':
this.tokens.add(new Token(TokenType.BITXOR, c));
break;
case '(':
this.tokens.add(new Token(TokenType.LPAREN, c));
break;
case ')':
this.tokens.add(new Token(TokenType.RPAREN, c));
break;
case '[':
this.tokens.add(new Token(TokenType.LSQBRACKET, c));
break;
case ']':
this.tokens.add(new Token(TokenType.RSQBRACKET, c));
break;
case '{':
this.tokens.add(new Token(TokenType.LCURLY, c));
break;
case '}':
this.tokens.add(new Token(TokenType.RCURLY, c));
break;
case '~':
this.tokens.add(new Token(TokenType.NEG, c));
break;
case ',':
this.tokens.add(new Token(TokenType.COMMA, c));
break;
case '!':
if (nextCharEquals(i, '=')) {
i++;
this.tokens.add(new Token(TokenType.NE, "!="));
} else {
this.tokens.add(new Token(TokenType.BANG, c));
}
break;
case '?':
this.tokens.add(new Token(TokenType.EROTEME, c));
break;
case '<':
if (nextCharEquals(i, '<')) {
i++;
this.tokens.add(new Token(TokenType.LSHIFT, "<<"));
} else if (nextCharEquals(i, '=')) {
i++;
this.tokens.add(new Token(TokenType.LE, "<="));
} else {
this.tokens.add(new Token(TokenType.LT, c));
}
break;
case '>':
if (nextCharEquals(i, '>')) {
i++;
this.tokens.add(new Token(TokenType.RSHIFT, ">>"));
} else if (nextCharEquals(i, '=')) {
i++;
this.tokens.add(new Token(TokenType.GE, ">="));
} else {
this.tokens.add(new Token(TokenType.GT, c));
}
break;
case '.':
if (nextCharEquals(i, '*')) {
i++;
this.tokens.add(new Token(TokenType.DOTSTAR, ".*"));
} else if (i + 1 < this.string.length() && Character.isDigit(this.string.charAt(i + 1))) {
i = lexNumber(i);
} else {
this.tokens.add(new Token(TokenType.DOT, c));
}
break;
case '"':
case '\'':
case '`':
char quoteChar = c;
StringBuilder val = new StringBuilder();
try {
boolean escapeNextChar = false;
for (c = this.string.charAt(++i); c != quoteChar || escapeNextChar
|| i + 1 < this.string.length() && this.string.charAt(i + 1) == quoteChar; c = this.string.charAt(++i)) {
if (escapeNextChar) {
if (escapeChars.containsKey(c)) {
val.append(escapeChars.get(c));
} else if (c == 'u') {
// \\u[4 hex digits] represents a unicode code point (ISO/IEC 10646)
char[] buf = new char[4];
this.string.getChars(++i, i + 4, buf, 0);
String hexCodePoint = String.valueOf(buf);
try {
val.append((char) Integer.parseInt(hexCodePoint, 16));
} catch (NumberFormatException e) {
throw new WrongArgumentException("Invalid Unicode code point '" + hexCodePoint + "'");
}
i += 3;
} else {
val.append('\\').append(c);
}
escapeNextChar = false;
} else if (c == '\\' || c == quoteChar) { // Escape sequence or two consecutive quotes
escapeNextChar = true;
} else {
val.append(c);
}
}
if (escapeNextChar) {
throw new WrongArgumentException("Unterminated escape sequence at " + i);
}
} catch (StringIndexOutOfBoundsException ex) {
throw new WrongArgumentException("Unterminated string starting at " + start);
}
this.tokens.add(new Token(quoteChar == '`' ? TokenType.IDENT : TokenType.LSTRING, val.toString()));
break;
default:
throw new WrongArgumentException("Can't parse at position " + i);
}
} else {
// otherwise, it's an identifier
for (; i < this.string.length() && Character.isUnicodeIdentifierPart(this.string.charAt(i)); ++i) {
}
String val = this.string.substring(start, i);
String valLower = val.toLowerCase();
if (i < this.string.length()) {
// last char, this logic is artifact of the preceding loop
--i;
}
if (reservedWords.containsKey(valLower)) {
// Map operator names to values the server understands
if ("and".equals(valLower)) {
this.tokens.add(new Token(reservedWords.get(valLower), "&&"));
} else if ("or".equals(valLower)) {
this.tokens.add(new Token(reservedWords.get(valLower), "||"));
} else {
// we case-normalize reserved words
this.tokens.add(new Token(reservedWords.get(valLower), valLower));
}
} else {
this.tokens.add(new Token(TokenType.IDENT, val));
}
}
}
}
/**
* Assert that the token at pos is of type type.
*
* @param pos
* The current position in the string
* @param type
* {@link TokenType}
*/
void assertTokenAt(int pos, TokenType type) {
if (this.tokens.size() <= pos) {
throw new WrongArgumentException("No more tokens when expecting " + type + " at token position " + pos);
}
if (this.tokens.get(pos).type != type) {
throw new WrongArgumentException("Expected token type " + type + " at token position " + pos);
}
}
/**
* Does the current token have type `t'?
*
* @param t
* {@link TokenType}
* @return true if equals
*/
boolean currentTokenTypeEquals(TokenType t) {
return posTokenTypeEquals(this.tokenPos, t);
}
/**
* Does the next token have type `t'?
*
* @param t
* {@link TokenType}
* @return true if equals
*/
boolean nextTokenTypeEquals(TokenType t) {
return posTokenTypeEquals(this.tokenPos + 1, t);
}
/**
* Does the token at position `pos' have type `t'?
*
* @param pos
* The current position in the string
* @param t
* {@link TokenType}
* @return true if equals
*/
boolean posTokenTypeEquals(int pos, TokenType t) {
return this.tokens.size() > pos && this.tokens.get(pos).type == t;
}
/**
* Consume token.
*
* @param t
* {@link TokenType}
* @return the string value of the consumed token
*/
String consumeToken(TokenType t) {
assertTokenAt(this.tokenPos, t);
String value = this.tokens.get(this.tokenPos).value;
this.tokenPos++;
return value;
}
/**
* Parse a paren-enclosed expression list. This is used for function params or IN params.
*
* @return a List of expressions
*/
List parenExprList() {
List exprs = new ArrayList<>();
consumeToken(TokenType.LPAREN);
if (!currentTokenTypeEquals(TokenType.RPAREN)) {
exprs.add(expr());
while (currentTokenTypeEquals(TokenType.COMMA)) {
consumeToken(TokenType.COMMA);
exprs.add(expr());
}
}
consumeToken(TokenType.RPAREN);
return exprs;
}
/**
* Parse a function call of the form: IDENTIFIER PAREN_EXPR_LIST.
*
* @return an Expr representing the function call.
*/
Expr functionCall() {
Identifier id = identifier();
FunctionCall.Builder b = FunctionCall.newBuilder();
b.setName(id);
b.addAllParam(parenExprList());
return Expr.newBuilder().setType(Expr.Type.FUNC_CALL).setFunctionCall(b.build()).build();
}
Expr starOperator() {
Operator op = Operator.newBuilder().setName("*").build();
return Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(op).build();
}
/**
* Parse an identifier for a function call: [schema.]name
*
* @return {@link Identifier}
*/
Identifier identifier() {
Identifier.Builder builder = Identifier.newBuilder();
assertTokenAt(this.tokenPos, TokenType.IDENT);
if (nextTokenTypeEquals(TokenType.DOT)) {
builder.setSchemaName(this.tokens.get(this.tokenPos).value);
consumeToken(TokenType.IDENT);
consumeToken(TokenType.DOT);
assertTokenAt(this.tokenPos, TokenType.IDENT);
}
builder.setName(this.tokens.get(this.tokenPos).value);
consumeToken(TokenType.IDENT);
return builder.build();
}
/**
* Parse a document path member.
*
* @return {@link DocumentPathItem}
*/
DocumentPathItem docPathMember() {
consumeToken(TokenType.DOT);
Token t = this.tokens.get(this.tokenPos);
String memberName;
if (currentTokenTypeEquals(TokenType.IDENT)) {
// this shouldn't be allowed to be quoted with backticks, but the lexer allows it
if (!t.value.equals(ExprUnparser.quoteIdentifier(t.value))) {
throw new WrongArgumentException("'" + t.value + "' is not a valid JSON/ECMAScript identifier");
}
consumeToken(TokenType.IDENT);
memberName = t.value;
} else if (currentTokenTypeEquals(TokenType.LSTRING)) {
consumeToken(TokenType.LSTRING);
memberName = t.value;
} else {
throw new WrongArgumentException("Expected token type IDENT or LSTRING in JSON path at token position " + this.tokenPos);
}
DocumentPathItem.Builder item = DocumentPathItem.newBuilder();
item.setType(DocumentPathItem.Type.MEMBER);
item.setValue(memberName);
return item.build();
}
/**
* Parse a document path array index.
*
* @return {@link DocumentPathItem}
*/
DocumentPathItem docPathArrayLoc() {
DocumentPathItem.Builder builder = DocumentPathItem.newBuilder();
consumeToken(TokenType.LSQBRACKET);
if (currentTokenTypeEquals(TokenType.STAR)) {
consumeToken(TokenType.STAR);
consumeToken(TokenType.RSQBRACKET);
return builder.setType(DocumentPathItem.Type.ARRAY_INDEX_ASTERISK).build();
} else if (currentTokenTypeEquals(TokenType.LNUM_INT)) {
Integer v = Integer.valueOf(this.tokens.get(this.tokenPos).value);
if (v < 0) {
throw new WrongArgumentException("Array index cannot be negative at " + this.tokenPos);
}
consumeToken(TokenType.LNUM_INT);
consumeToken(TokenType.RSQBRACKET);
return builder.setType(DocumentPathItem.Type.ARRAY_INDEX).setIndex(v).build();
} else {
throw new WrongArgumentException("Expected token type STAR or LNUM_INT in JSON path array index at token position " + this.tokenPos);
}
}
/**
* Parse a JSON-style document path, like WL#7909, but prefix by @. instead of $.
*
* @return list of {@link DocumentPathItem} objects
*/
public List documentPath() {
List items = new ArrayList<>();
while (true) {
if (currentTokenTypeEquals(TokenType.DOT)) {
items.add(docPathMember());
} else if (currentTokenTypeEquals(TokenType.DOTSTAR)) {
consumeToken(TokenType.DOTSTAR);
items.add(DocumentPathItem.newBuilder().setType(DocumentPathItem.Type.MEMBER_ASTERISK).build());
} else if (currentTokenTypeEquals(TokenType.LSQBRACKET)) {
items.add(docPathArrayLoc());
} else if (currentTokenTypeEquals(TokenType.DOUBLESTAR)) {
consumeToken(TokenType.DOUBLESTAR);
items.add(DocumentPathItem.newBuilder().setType(DocumentPathItem.Type.DOUBLE_ASTERISK).build());
} else {
break;
}
}
if (items.size() > 0 && items.get(items.size() - 1).getType() == DocumentPathItem.Type.DOUBLE_ASTERISK) {
throw new WrongArgumentException("JSON path may not end in '**' at " + this.tokenPos);
}
return items;
}
/**
* Parse a document field.
*
* @return {@link Expr}
*/
public Expr documentField() {
ColumnIdentifier.Builder builder = ColumnIdentifier.newBuilder();
if (currentTokenTypeEquals(TokenType.IDENT)) {
builder.addDocumentPath(DocumentPathItem.newBuilder().setType(DocumentPathItem.Type.MEMBER).setValue(consumeToken(TokenType.IDENT)).build());
}
builder.addAllDocumentPath(documentPath());
return Expr.newBuilder().setType(Expr.Type.IDENT).setIdentifier(builder.build()).build();
}
/**
* Parse a column identifier (which may optionally include a JSON document path).
*
* @return {@link Expr}
*/
Expr columnIdentifier() {
List parts = new LinkedList<>();
parts.add(consumeToken(TokenType.IDENT));
while (currentTokenTypeEquals(TokenType.DOT)) {
consumeToken(TokenType.DOT);
parts.add(consumeToken(TokenType.IDENT));
// identifier can be at most three parts
if (parts.size() == 3) {
break;
}
}
Collections.reverse(parts);
ColumnIdentifier.Builder id = ColumnIdentifier.newBuilder();
for (int i = 0; i < parts.size(); ++i) {
switch (i) {
case 0:
id.setName(parts.get(0));
break;
case 1:
id.setTableName(parts.get(1));
break;
case 2:
id.setSchemaName(parts.get(2));
break;
}
}
if (currentTokenTypeEquals(TokenType.COLDOCPATH)) {
consumeToken(TokenType.COLDOCPATH);
if (currentTokenTypeEquals(TokenType.DOLLAR)) {
consumeToken(TokenType.DOLLAR);
id.addAllDocumentPath(documentPath());
} else if (currentTokenTypeEquals(TokenType.LSTRING)) {
String path = consumeToken(TokenType.LSTRING);
if (path.charAt(0) != '$') {
throw new WrongArgumentException("Invalid document path at " + this.tokenPos);
}
id.addAllDocumentPath(new ExprParser(path.substring(1, path.length())).documentPath());
}
if (id.getDocumentPathCount() == 0) {
throw new WrongArgumentException("Invalid document path at " + this.tokenPos);
}
}
return Expr.newBuilder().setType(Expr.Type.IDENT).setIdentifier(id.build()).build();
}
/**
* Build a unary operator expression.
*
* @param name
* operator name
* @param param
* operator parameter
* @return {@link Expr}
*/
Expr buildUnaryOp(String name, Expr param) {
String opName = "-".equals(name) ? "sign_minus" : "+".equals(name) ? "sign_plus" : name;
Operator op = Operator.newBuilder().setName(opName).addParam(param).build();
return Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(op).build();
}
/**
* Parse an atomic expression. (c.f. grammar at top)
*
* @return {@link Expr}
*/
Expr atomicExpr() { // constant, identifier, variable, function call, etc
if (this.tokenPos >= this.tokens.size()) {
throw new WrongArgumentException("No more tokens when expecting one at token position " + this.tokenPos);
}
Token t = this.tokens.get(this.tokenPos);
this.tokenPos++; // consume
switch (t.type) {
case EROTEME:
case COLON: {
String placeholderName;
if (currentTokenTypeEquals(TokenType.LNUM_INT)) {
// int pos = Integer.parseInt(consumeToken(TokenType.LNUM_INT));
// return Expr.newBuilder().setType(Expr.Type.PLACEHOLDER).setPosition(pos).build();
placeholderName = consumeToken(TokenType.LNUM_INT);
} else if (currentTokenTypeEquals(TokenType.IDENT)) {
placeholderName = consumeToken(TokenType.IDENT);
} else if (t.type == TokenType.EROTEME) {
placeholderName = String.valueOf(this.positionalPlaceholderCount);
} else {
throw new WrongArgumentException("Invalid placeholder name at token position " + this.tokenPos);
}
Expr.Builder placeholder = Expr.newBuilder().setType(Expr.Type.PLACEHOLDER);
if (this.placeholderNameToPosition.containsKey(placeholderName)) {
placeholder.setPosition(this.placeholderNameToPosition.get(placeholderName));
} else {
placeholder.setPosition(this.positionalPlaceholderCount);
this.placeholderNameToPosition.put(placeholderName, this.positionalPlaceholderCount);
this.positionalPlaceholderCount++;
}
return placeholder.build();
}
case LPAREN: {
Expr e = expr();
consumeToken(TokenType.RPAREN);
return e;
}
case LCURLY: { // JSON object
Object.Builder builder = Object.newBuilder();
if (currentTokenTypeEquals(TokenType.LSTRING)) {
parseCommaSeparatedList(() -> {
String key = consumeToken(TokenType.LSTRING);
consumeToken(TokenType.COLON);
Expr value = expr();
return Collections.singletonMap(key, value);
}).stream().map(pair -> pair.entrySet().iterator().next()).map(e -> ObjectField.newBuilder().setKey(e.getKey()).setValue(e.getValue()))
.forEach(builder::addFld);
}
consumeToken(TokenType.RCURLY);
return Expr.newBuilder().setType(Expr.Type.OBJECT).setObject(builder.build()).build();
}
case LSQBRACKET: { // Array
Array.Builder builder = Expr.newBuilder().setType(Expr.Type.ARRAY).getArrayBuilder();
if (!currentTokenTypeEquals(TokenType.RSQBRACKET)) {
parseCommaSeparatedList(this::expr).stream().forEach(builder::addValue);
}
consumeToken(TokenType.RSQBRACKET);
return Expr.newBuilder().setType(Expr.Type.ARRAY).setArray(builder).build();
}
case CAST: {
consumeToken(TokenType.LPAREN);
Operator.Builder builder = Operator.newBuilder().setName(TokenType.CAST.toString().toLowerCase());
builder.addParam(expr());
consumeToken(TokenType.AS);
StringBuilder typeStr = new StringBuilder(this.tokens.get(this.tokenPos).value.toUpperCase());
// ensure next token is a valid type argument to CAST
if (currentTokenTypeEquals(TokenType.DECIMAL)) {
this.tokenPos++;
if (currentTokenTypeEquals(TokenType.LPAREN)) {
typeStr.append(consumeToken(TokenType.LPAREN));
typeStr.append(consumeToken(TokenType.LNUM_INT));
if (currentTokenTypeEquals(TokenType.COMMA)) {
typeStr.append(consumeToken(TokenType.COMMA));
typeStr.append(consumeToken(TokenType.LNUM_INT));
}
typeStr.append(consumeToken(TokenType.RPAREN));
}
} else if (currentTokenTypeEquals(TokenType.CHAR) || currentTokenTypeEquals(TokenType.BINARY)) {
this.tokenPos++;
if (currentTokenTypeEquals(TokenType.LPAREN)) {
typeStr.append(consumeToken(TokenType.LPAREN));
typeStr.append(consumeToken(TokenType.LNUM_INT));
typeStr.append(consumeToken(TokenType.RPAREN));
}
} else if (currentTokenTypeEquals(TokenType.UNSIGNED) || currentTokenTypeEquals(TokenType.SIGNED)) {
this.tokenPos++;
if (currentTokenTypeEquals(TokenType.INTEGER)) {
// don't add optional INTEGER to type string argument
consumeToken(TokenType.INTEGER);
}
} else if (currentTokenTypeEquals(TokenType.JSON) || currentTokenTypeEquals(TokenType.DATE) || currentTokenTypeEquals(TokenType.DATETIME)
|| currentTokenTypeEquals(TokenType.TIME)) {
this.tokenPos++;
} else {
throw new WrongArgumentException("Expected valid CAST type argument at " + this.tokenPos);
}
consumeToken(TokenType.RPAREN);
// TODO charset?
builder.addParam(ExprUtil.buildLiteralScalar(typeStr.toString().getBytes()));
return Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(builder.build()).build();
}
case PLUS:
case MINUS:
if (currentTokenTypeEquals(TokenType.LNUM_INT) || currentTokenTypeEquals(TokenType.LNUM_DOUBLE)) {
// unary operators are handled inline making positive or negative numeric literals
this.tokens.get(this.tokenPos).value = t.value + this.tokens.get(this.tokenPos).value;
return atomicExpr();
}
return buildUnaryOp(t.value, atomicExpr());
case NOT:
case NEG:
case BANG:
return buildUnaryOp(t.value, atomicExpr());
case LSTRING:
return ExprUtil.buildLiteralScalar(t.value);
case NULL:
return ExprUtil.buildLiteralNullScalar();
case LNUM_INT:
return ExprUtil.buildLiteralScalar(Long.parseLong(t.value));
case LNUM_DOUBLE:
return ExprUtil.buildLiteralScalar(Double.parseDouble(t.value));
case TRUE:
case FALSE:
return ExprUtil.buildLiteralScalar(t.type == TokenType.TRUE);
case DOLLAR:
return documentField();
case STAR:
// special "0-ary" consideration of "*" as an operator (for COUNT(*), etc)
return starOperator();
case IDENT:
this.tokenPos--; // stay on the identifier
// check for function call which may be: func(...) or schema.func(...)
if (nextTokenTypeEquals(TokenType.LPAREN) || posTokenTypeEquals(this.tokenPos + 1, TokenType.DOT)
&& posTokenTypeEquals(this.tokenPos + 2, TokenType.IDENT) && posTokenTypeEquals(this.tokenPos + 3, TokenType.LPAREN)) {
return functionCall();
}
if (this.allowRelationalColumns) {
return columnIdentifier();
}
return documentField();
default:
break;
}
throw new WrongArgumentException("Cannot find atomic expression at token position " + (this.tokenPos - 1));
}
/**
* An expression parser. (used in {@link #parseLeftAssocBinaryOpExpr(TokenType[], ParseExpr)})
*/
@FunctionalInterface
static interface ParseExpr {
Expr parseExpr();
}
/**
* Parse a left-associated binary operator.
*
* @param types
* The token types that denote this operator.
* @param innerParser
* The inner parser that should be called to parse operands.
* @return an expression tree of the binary operator or a single operand
*/
Expr parseLeftAssocBinaryOpExpr(TokenType[] types, ParseExpr innerParser) {
Expr lhs = innerParser.parseExpr();
while (this.tokenPos < this.tokens.size() && Arrays.asList(types).contains(this.tokens.get(this.tokenPos).type)) {
Operator.Builder builder = Operator.newBuilder().setName(this.tokens.get(this.tokenPos).value).addParam(lhs);
this.tokenPos++;
builder.addParam(innerParser.parseExpr());
lhs = Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(builder.build()).build();
}
return lhs;
}
Expr addSubIntervalExpr() {
Expr lhs = atomicExpr();
while ((currentTokenTypeEquals(TokenType.PLUS) || currentTokenTypeEquals(TokenType.MINUS)) && nextTokenTypeEquals(TokenType.INTERVAL)) {
Token op = this.tokens.get(this.tokenPos);
this.tokenPos++;
Operator.Builder builder = Operator.newBuilder().addParam(lhs);
// INTERVAL expression
consumeToken(TokenType.INTERVAL);
if (op.type == TokenType.PLUS) {
builder.setName("date_add");
} else {
builder.setName("date_sub");
}
builder.addParam(bitExpr()); // amount
// ensure next token is an interval unit
if (currentTokenTypeEquals(TokenType.MICROSECOND) || currentTokenTypeEquals(TokenType.SECOND) || currentTokenTypeEquals(TokenType.MINUTE)
|| currentTokenTypeEquals(TokenType.HOUR) || currentTokenTypeEquals(TokenType.DAY) || currentTokenTypeEquals(TokenType.WEEK)
|| currentTokenTypeEquals(TokenType.MONTH) || currentTokenTypeEquals(TokenType.QUARTER) || currentTokenTypeEquals(TokenType.YEAR)
|| currentTokenTypeEquals(TokenType.SECOND_MICROSECOND) || currentTokenTypeEquals(TokenType.MINUTE_MICROSECOND)
|| currentTokenTypeEquals(TokenType.MINUTE_SECOND) || currentTokenTypeEquals(TokenType.HOUR_MICROSECOND)
|| currentTokenTypeEquals(TokenType.HOUR_SECOND) || currentTokenTypeEquals(TokenType.HOUR_MINUTE)
|| currentTokenTypeEquals(TokenType.DAY_MICROSECOND) || currentTokenTypeEquals(TokenType.DAY_SECOND)
|| currentTokenTypeEquals(TokenType.DAY_MINUTE) || currentTokenTypeEquals(TokenType.DAY_HOUR)
|| currentTokenTypeEquals(TokenType.YEAR_MONTH)) {
} else {
throw new WrongArgumentException("Expected interval units at " + this.tokenPos);
}
// xplugin demands that intervals be sent uppercase
// TODO: we need to propagate the appropriate encoding here? it's ascii but it might not *always* be a superset encoding??
builder.addParam(ExprUtil.buildLiteralScalar(this.tokens.get(this.tokenPos).value.toUpperCase().getBytes()));
this.tokenPos++;
lhs = Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(builder.build()).build();
}
return lhs;
}
Expr mulDivExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.STAR, TokenType.SLASH, TokenType.MOD }, this::addSubIntervalExpr);
}
Expr addSubExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.PLUS, TokenType.MINUS }, this::mulDivExpr);
}
Expr shiftExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.LSHIFT, TokenType.RSHIFT }, this::addSubExpr);
}
Expr bitExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.BITAND, TokenType.BITOR, TokenType.BITXOR }, this::shiftExpr);
}
Expr compExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.GE, TokenType.GT, TokenType.LE, TokenType.LT, TokenType.EQ, TokenType.NE },
this::bitExpr);
}
Expr ilriExpr() {
Expr lhs = compExpr();
List expected = Arrays
.asList(new TokenType[] { TokenType.IS, TokenType.IN, TokenType.LIKE, TokenType.BETWEEN, TokenType.REGEXP, TokenType.NOT, TokenType.OVERLAPS });
while (this.tokenPos < this.tokens.size() && expected.contains(this.tokens.get(this.tokenPos).type)) {
boolean isNot = false;
if (currentTokenTypeEquals(TokenType.NOT)) {
consumeToken(TokenType.NOT);
isNot = true;
}
if (this.tokenPos < this.tokens.size()) {
List params = new ArrayList<>();
params.add(lhs);
String opName = this.tokens.get(this.tokenPos).value.toLowerCase();
switch (this.tokens.get(this.tokenPos).type) {
case IS: // for IS, NOT comes AFTER
consumeToken(TokenType.IS);
if (currentTokenTypeEquals(TokenType.NOT)) {
consumeToken(TokenType.NOT);
opName = "is_not";
}
params.add(compExpr());
break;
case IN:
consumeToken(TokenType.IN);
if (currentTokenTypeEquals(TokenType.LPAREN)) {
params.addAll(parenExprList());
} else {
opName = "cont_in";
params.add(compExpr());
}
break;
case LIKE:
consumeToken(TokenType.LIKE);
params.add(compExpr());
if (currentTokenTypeEquals(TokenType.ESCAPE)) {
consumeToken(TokenType.ESCAPE);
// add as a third (optional) param
params.add(compExpr());
}
break;
case BETWEEN:
consumeToken(TokenType.BETWEEN);
params.add(compExpr());
assertTokenAt(this.tokenPos, TokenType.AND);
consumeToken(TokenType.AND);
params.add(compExpr());
break;
case REGEXP:
consumeToken(TokenType.REGEXP);
params.add(compExpr());
break;
case OVERLAPS:
consumeToken(TokenType.OVERLAPS);
params.add(compExpr());
break;
default:
throw new WrongArgumentException("Unknown token after NOT at position " + this.tokenPos);
}
if (isNot) {
opName = "not_" + opName;
}
Operator.Builder builder = Operator.newBuilder().setName(opName).addAllParam(params);
lhs = Expr.newBuilder().setType(Expr.Type.OPERATOR).setOperator(builder.build()).build();
}
}
return lhs;
}
Expr andExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.AND, TokenType.ANDAND }, this::ilriExpr);
}
Expr orExpr() {
return parseLeftAssocBinaryOpExpr(new TokenType[] { TokenType.OR, TokenType.OROR }, this::andExpr);
}
Expr expr() {
Expr e = orExpr();
return e;
}
/**
* Parse the entire string as an expression.
*
* @return an X protocol expression tree
*/
public Expr parse() {
try {
Expr e = expr();
if (this.tokenPos != this.tokens.size()) {
throw new WrongArgumentException("Only " + this.tokenPos + " tokens consumed, out of " + this.tokens.size());
}
return e;
} catch (IllegalArgumentException ex) {
throw new WrongArgumentException("Unable to parse query '" + this.string + "'", ex);
}
}
/**
* Utility method to wrap a parser of a list of elements separated by comma.
*
* @param
* the type of element to be parsed
* @param elementParser
* the single element parser
* @return a list of elements parsed
*/
private List parseCommaSeparatedList(Supplier elementParser) {
List elements = new ArrayList<>();
boolean first = true;
while (first || currentTokenTypeEquals(TokenType.COMMA)) {
if (!first) {
consumeToken(TokenType.COMMA);
} else {
first = false;
}
elements.add(elementParser.get());
}
return elements;
}
/**
* Parse an ORDER BY specification which is a comma-separated list of expressions, each may be optionally suffixed by ASC/DESC.
*
* @return list of {@link Order} objects
*/
public List parseOrderSpec() {
return parseCommaSeparatedList(() -> {
Order.Builder builder = Order.newBuilder();
builder.setExpr(expr());
if (currentTokenTypeEquals(TokenType.ORDERBY_ASC)) {
consumeToken(TokenType.ORDERBY_ASC);
builder.setDirection(Order.Direction.ASC);
} else if (currentTokenTypeEquals(TokenType.ORDERBY_DESC)) {
consumeToken(TokenType.ORDERBY_DESC);
builder.setDirection(Order.Direction.DESC);
}
return builder.build();
});
}
/**
* Parse a SELECT projection which is a comma-separated list of expressions, each optionally suffixed with a target alias.
*
* @return list of {@link Projection} objects
*/
public List parseTableSelectProjection() {
return parseCommaSeparatedList(() -> {
Projection.Builder builder = Projection.newBuilder();
builder.setSource(expr());
if (currentTokenTypeEquals(TokenType.AS)) {
consumeToken(TokenType.AS);
builder.setAlias(consumeToken(TokenType.IDENT));
}
return builder.build();
});
}
/**
* Parse an INSERT field name.
*
* @return {@link Column}
*/
// TODO unit test
public Column parseTableInsertField() {
return Column.newBuilder().setName(consumeToken(TokenType.IDENT)).build();
}
/**
* Parse an UPDATE field which can include can document paths.
*
* @return {@link ColumnIdentifier}
*/
public ColumnIdentifier parseTableUpdateField() {
return columnIdentifier().getIdentifier();
}
/**
* Parse a document projection which is similar to SELECT but with document paths as the target alias.
*
* @return list of {@link Projection} objects
*/
public List parseDocumentProjection() {
this.allowRelationalColumns = false;
return parseCommaSeparatedList(() -> {
Projection.Builder builder = Projection.newBuilder();
builder.setSource(expr());
// alias is not optional for document projection
consumeToken(TokenType.AS);
builder.setAlias(consumeToken(TokenType.IDENT));
return builder.build();
});
}
/**
* Parse a list of expressions used for GROUP BY.
*
* @return list of {@link Expr} objects
*/
public List parseExprList() {
return parseCommaSeparatedList(this::expr);
}
/**
* Return the number of positional placeholders in the expression.
*
* @return the number of positional placeholders in the expression
*/
public int getPositionalPlaceholderCount() {
return this.positionalPlaceholderCount;
}
/**
* Get a mapping of parameter names to positions.
*
* @return a mapping of parameter names to positions.
*/
public Map getPlaceholderNameToPositionMap() {
return Collections.unmodifiableMap(this.placeholderNameToPosition);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy