com.hazelcast.org.apache.calcite.sql.advise.SqlSimpleParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.org.apache.calcite.sql.advise;
import com.hazelcast.org.apache.calcite.avatica.util.Quoting;
import com.hazelcast.org.apache.calcite.sql.parser.SqlParser;
import com.hazelcast.org.checkerframework.checker.nullness.qual.Nullable;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import static java.util.Objects.requireNonNull;
/**
* A simple parser that takes an incomplete and turn it into a syntactically
* correct statement. It is used in the SQL editor user-interface.
*/
public class SqlSimpleParser {
//~ Enums ------------------------------------------------------------------
/** Token. */
enum TokenType {
// keywords
SELECT, FROM, JOIN, ON, USING, WHERE, GROUP, HAVING, ORDER, BY,
UNION, INTERSECT, EXCEPT, MINUS,
/** Left parenthesis. */
LPAREN {
@Override public String sql() {
return "(";
}
},
/** Right parenthesis. */
RPAREN {
@Override public String sql() {
return ")";
}
},
/** Identifier, or indeed any miscellaneous sequence of characters. */
ID,
/**
* double-quoted identifier, e.g. "FOO""BAR"
*/
DQID,
/**
* single-quoted string literal, e.g. 'foobar'
*/
SQID, COMMENT,
COMMA {
@Override public String sql() {
return ",";
}
},
/**
* A token created by reducing an entire sub-query.
*/
QUERY;
public String sql() {
return name();
}
}
//~ Instance fields --------------------------------------------------------
private final String hintToken;
private final SqlParser.Config parserConfig;
//~ Constructors -----------------------------------------------------------
/**
* Creates a SqlSimpleParser.
*
* @param hintToken Hint token
* @deprecated Use {@link #SqlSimpleParser(String, SqlParser.Config)}
*/
@Deprecated // to be removed before 2.0
public SqlSimpleParser(String hintToken) {
this(hintToken, SqlParser.Config.DEFAULT);
}
/**
* Creates a SqlSimpleParser.
*
* @param hintToken Hint token
* @param parserConfig parser configuration
*/
public SqlSimpleParser(String hintToken,
SqlParser.Config parserConfig) {
this.hintToken = hintToken;
this.parserConfig = parserConfig;
}
//~ Methods ----------------------------------------------------------------
/**
* Turns a partially completed or syntactically incorrect sql statement into
* a simplified, valid one that can be passed into getCompletionHints().
*
* @param sql A partial or syntactically incorrect sql statement
* @param cursor to indicate column position in the query at which
* completion hints need to be retrieved.
* @return a completed, valid (and possibly simplified SQL statement
*/
public String simplifySql(String sql, int cursor) {
// introduce the hint token into the sql at the cursor pos
if (cursor >= sql.length()) {
sql += " " + hintToken + " ";
} else {
String left = sql.substring(0, cursor);
String right = sql.substring(cursor);
sql = left + " " + hintToken + " " + right;
}
return simplifySql(sql);
}
/**
* Turns a partially completed or syntactically incorrect SQL statement into a
* simplified, valid one that can be validated.
*
* @param sql A partial or syntactically incorrect sql statement
* @return a completed, valid (and possibly simplified) SQL statement
*/
public String simplifySql(String sql) {
Tokenizer tokenizer = new Tokenizer(sql, hintToken, parserConfig.quoting());
List list = new ArrayList<>();
while (true) {
Token token = tokenizer.nextToken();
if (token == null) {
break;
}
if (token.type == TokenType.COMMENT) {
// ignore comments
continue;
}
list.add(token);
}
// Gather consecutive sub-sequences of tokens into sub-queries.
List outList = new ArrayList<>();
consumeQuery(list.listIterator(), outList);
// Simplify.
Query.simplifyList(outList, hintToken);
// Convert to string.
StringBuilder buf = new StringBuilder();
int k = -1;
for (Token token : outList) {
if (++k > 0) {
buf.append(' ');
}
token.unparse(buf);
}
return buf.toString();
}
private static void consumeQuery(ListIterator iter, List outList) {
while (iter.hasNext()) {
consumeSelect(iter, outList);
if (iter.hasNext()) {
Token token = iter.next();
switch (token.type) {
case UNION:
case INTERSECT:
case EXCEPT:
case MINUS:
outList.add(token);
if (iter.hasNext()) {
token = iter.next();
if ((token.type == TokenType.ID)
&& "ALL".equalsIgnoreCase(token.s)) {
outList.add(token);
} else {
iter.previous();
}
}
// Combine SELECT ... UNION SELECT..., so keep trying consumeSelect
break;
default:
// Unknown token detected => end of query detected
iter.previous();
return;
}
}
}
}
private static void consumeSelect(ListIterator iter, List outList) {
boolean isQuery = false;
int start = outList.size();
List subQueryList = new ArrayList<>();
loop:
while (iter.hasNext()) {
Token token = iter.next();
subQueryList.add(token);
switch (token.type) {
case LPAREN:
consumeQuery(iter, subQueryList);
break;
case RPAREN:
if (isQuery) {
subQueryList.remove(subQueryList.size() - 1);
}
break loop;
case SELECT:
isQuery = true;
break;
case UNION:
case INTERSECT:
case EXCEPT:
case MINUS:
subQueryList.remove(subQueryList.size() - 1);
iter.previous();
break loop;
default:
}
}
// Fell off end of list. Pretend we saw the required right-paren.
if (isQuery) {
outList.subList(start, outList.size()).clear();
outList.add(new Query(subQueryList));
if ((outList.size() >= 2)
&& (outList.get(outList.size() - 2).type == TokenType.LPAREN)) {
outList.add(new Token(TokenType.RPAREN));
}
} else {
// not a query - just a parenthesized expr
outList.addAll(subQueryList);
}
}
//~ Inner Classes ----------------------------------------------------------
/** Tokenizer. */
public static class Tokenizer {
private static final Map TOKEN_TYPES = new HashMap<>();
static {
for (TokenType type : TokenType.values()) {
TOKEN_TYPES.put(type.name(), type);
}
}
final String sql;
private final String hintToken;
private final char openQuote;
private int pos;
int start = 0;
@Deprecated // to be removed before 2.0
public Tokenizer(String sql, String hintToken) {
this(sql, hintToken, Quoting.DOUBLE_QUOTE);
}
public Tokenizer(String sql, String hintToken, Quoting quoting) {
this.sql = sql;
this.hintToken = hintToken;
this.openQuote = quoting.string.charAt(0);
this.pos = 0;
}
private Token parseQuotedIdentifier() {
// Parse double-quoted identifier.
start = pos;
++pos;
char closeQuote = openQuote == '[' ? ']' : openQuote;
while (pos < sql.length()) {
char c = sql.charAt(pos);
++pos;
if (c == closeQuote) {
if (pos < sql.length() && sql.charAt(pos) == closeQuote) {
// Double close means escaped closing quote is a part of identifer
++pos;
continue;
}
break;
}
}
String match = sql.substring(start, pos);
if (match.startsWith(openQuote + " " + hintToken + " ")) {
return new Token(TokenType.ID, hintToken);
}
return new Token(TokenType.DQID, match);
}
public @Nullable Token nextToken() {
while (pos < sql.length()) {
char c = sql.charAt(pos);
final String match;
switch (c) {
case ',':
++pos;
return new Token(TokenType.COMMA);
case '(':
++pos;
return new Token(TokenType.LPAREN);
case ')':
++pos;
return new Token(TokenType.RPAREN);
case '\'':
// Parse single-quoted identifier.
start = pos;
++pos;
while (pos < sql.length()) {
c = sql.charAt(pos);
++pos;
if (c == '\'') {
if (pos < sql.length()) {
char c1 = sql.charAt(pos);
if (c1 == '\'') {
// encountered consecutive
// single-quotes; still in identifier
++pos;
} else {
break;
}
} else {
break;
}
}
}
match = sql.substring(start, pos);
return new Token(TokenType.SQID, match);
case '/':
// possible start of '/*' or '//' comment
if (pos + 1 < sql.length()) {
char c1 = sql.charAt(pos + 1);
if (c1 == '*') {
int end = sql.indexOf("*/", pos + 2);
if (end < 0) {
end = sql.length();
} else {
end += "*/".length();
}
pos = end;
return new Token(TokenType.COMMENT);
}
if (c1 == '/') {
pos = indexOfLineEnd(sql, pos + 2);
return new Token(TokenType.COMMENT);
}
}
// fall through
case '-':
// possible start of '--' comment
if (c == '-' && pos + 1 < sql.length() && sql.charAt(pos + 1) == '-') {
pos = indexOfLineEnd(sql, pos + 2);
return new Token(TokenType.COMMENT);
}
// fall through
default:
if (c == openQuote) {
return parseQuotedIdentifier();
}
if (Character.isWhitespace(c)) {
++pos;
break;
} else {
// Probably a letter or digit. Start an identifier.
// Other characters, e.g. *, ! are also included
// in identifiers.
int start = pos;
++pos;
loop:
while (pos < sql.length()) {
c = sql.charAt(pos);
switch (c) {
case '(':
case ')':
case '/':
case ',':
case '\'':
break loop;
case '-':
// possible start of '--' comment
if (c == '-' && pos + 1 < sql.length() && sql.charAt(pos + 1) == '-') {
break loop;
}
// fall through
default:
if (Character.isWhitespace(c) || c == openQuote) {
break loop;
} else {
++pos;
}
}
}
String name = sql.substring(start, pos);
TokenType tokenType = TOKEN_TYPES.get(name.toUpperCase(Locale.ROOT));
if (tokenType == null) {
return new IdToken(TokenType.ID, name);
} else {
// keyword, e.g. SELECT, FROM, WHERE
return new Token(tokenType);
}
}
}
}
return null;
}
private static int indexOfLineEnd(String sql, int i) {
int length = sql.length();
while (i < length) {
char c = sql.charAt(i);
switch (c) {
case '\r':
case '\n':
return i;
default:
++i;
}
}
return i;
}
}
/** Token. */
public static class Token {
private final TokenType type;
private final @Nullable String s;
Token(TokenType tokenType) {
this(tokenType, null);
}
Token(TokenType type, @Nullable String s) {
this.type = type;
this.s = s;
}
@Override public String toString() {
return (s == null) ? type.toString() : (type + "(" + s + ")");
}
public void unparse(StringBuilder buf) {
if (s == null) {
buf.append(type.sql());
} else {
buf.append(s);
}
}
}
/** Token representing an identifier. */
public static class IdToken extends Token {
public IdToken(TokenType type, String s) {
super(type, s);
assert (type == TokenType.DQID) || (type == TokenType.ID);
}
}
/** Token representing a query. */
static class Query extends Token {
private final List tokenList;
Query(List tokenList) {
super(TokenType.QUERY);
this.tokenList = new ArrayList<>(tokenList);
}
@Override public void unparse(StringBuilder buf) {
int k = -1;
for (Token token : tokenList) {
if (++k > 0) {
buf.append(' ');
}
token.unparse(buf);
}
}
public static void simplifyList(List list, String hintToken) {
// Simplify
// SELECT * FROM t UNION ALL SELECT * FROM u WHERE ^
// to
// SELECT * FROM u WHERE ^
for (Token token : list) {
if (token instanceof Query) {
Query query = (Query) token;
if (query.contains(hintToken)) {
list.clear();
list.add(query.simplify(hintToken));
break;
}
}
}
}
public Query simplify(@Nullable String hintToken) {
TokenType clause = TokenType.SELECT;
TokenType foundInClause = null;
Query foundInSubQuery = null;
TokenType majorClause = null;
if (hintToken != null) {
for (Token token : tokenList) {
switch (token.type) {
case ID:
if (hintToken.equals(token.s)) {
foundInClause = clause;
}
break;
case SELECT:
case FROM:
case WHERE:
case GROUP:
case HAVING:
case ORDER:
majorClause = token.type;
// fall through
case JOIN:
case USING:
case ON:
clause = token.type;
break;
case COMMA:
if (majorClause == TokenType.FROM) {
// comma inside from clause
clause = TokenType.FROM;
}
break;
case QUERY:
if (((Query) token).contains(hintToken)) {
foundInClause = clause;
foundInSubQuery = (Query) token;
}
break;
default:
break;
}
}
} else {
foundInClause = TokenType.QUERY;
}
if (foundInClause != null) {
switch (foundInClause) {
case SELECT:
purgeSelectListExcept(hintToken);
purgeWhere();
purgeOrderBy();
break;
case FROM:
case JOIN:
// See comments against ON/USING.
purgeSelect();
purgeFromExcept(hintToken);
purgeWhere();
purgeGroupByHaving();
purgeOrderBy();
break;
case ON:
case USING:
// We need to treat expressions in FROM and JOIN
// differently than ON and USING. Consider
// FROM t1 JOIN t2 ON b1 JOIN t3 USING (c2)
// t1, t2, t3 occur in the FROM clause, and do not depend
// on anything; b1 and c2 occur in ON scope, and depend
// on the FROM clause
purgeSelect();
purgeWhere();
purgeOrderBy();
break;
case WHERE:
purgeSelect();
purgeGroupByHaving();
purgeOrderBy();
break;
case GROUP:
case HAVING:
purgeSelect();
purgeWhere();
purgeOrderBy();
break;
case ORDER:
purgeWhere();
break;
case QUERY:
// Indicates that the expression to be simplified is
// outside this sub-query. Preserve a simplified SELECT
// clause.
// It might be a good idea to purge select expressions, however
// purgeSelectExprsKeepAliases might end up with <<0 as "*">> which is not valid.
// purgeSelectExprsKeepAliases();
purgeWhere();
purgeGroupByHaving();
break;
default:
break;
}
}
// Simplify sub-queries.
for (Token token : tokenList) {
switch (token.type) {
case QUERY: {
Query query = (Query) token;
query.simplify(
(query == foundInSubQuery) ? hintToken : null);
break;
}
default:
break;
}
}
return this;
}
private void purgeSelectListExcept(@Nullable String hintToken) {
List sublist = findClause(TokenType.SELECT);
int parenCount = 0;
int itemStart = 1;
int itemEnd = -1;
boolean found = false;
for (int i = 0; i < sublist.size(); i++) {
Token token = sublist.get(i);
switch (token.type) {
case LPAREN:
++parenCount;
break;
case RPAREN:
--parenCount;
break;
case COMMA:
if (parenCount == 0) {
if (found) {
itemEnd = i;
break;
}
itemStart = i + 1;
}
break;
case ID:
if (requireNonNull(hintToken, "hintToken").equals(token.s)) {
found = true;
}
break;
default:
break;
}
}
if (found) {
if (itemEnd < 0) {
itemEnd = sublist.size();
}
List selectItem =
new ArrayList<>(
sublist.subList(itemStart, itemEnd));
Token select = sublist.get(0);
sublist.clear();
sublist.add(select);
sublist.addAll(selectItem);
}
}
private void purgeSelect() {
List sublist = findClause(TokenType.SELECT);
Token select = sublist.get(0);
sublist.clear();
sublist.add(select);
sublist.add(new Token(TokenType.ID, "*"));
}
@SuppressWarnings("unused")
private void purgeSelectExprsKeepAliases() {
List sublist = findClause(TokenType.SELECT);
List newSelectClause = new ArrayList<>();
newSelectClause.add(sublist.get(0));
int itemStart = 1;
for (int i = 1; i < sublist.size(); i++) {
Token token = sublist.get(i);
if (((i + 1) == sublist.size())
|| (sublist.get(i + 1).type == TokenType.COMMA)) {
if (token.type == TokenType.ID) {
// This might produce <<0 as "a.x+b.y">>, or <<0 as "*">>, or even <<0 as "a.*">>
newSelectClause.add(new Token(TokenType.ID, "0"));
newSelectClause.add(new Token(TokenType.ID, "AS"));
newSelectClause.add(token);
} else {
newSelectClause.addAll(
sublist.subList(itemStart, i + 1));
}
itemStart = i + 2;
if ((i + 1) < sublist.size()) {
newSelectClause.add(new Token(TokenType.COMMA));
}
}
}
sublist.clear();
sublist.addAll(newSelectClause);
}
private void purgeFromExcept(@Nullable String hintToken) {
List sublist = findClause(TokenType.FROM);
int itemStart = -1;
int itemEnd = -1;
int joinCount = 0;
boolean found = false;
for (int i = 0; i < sublist.size(); i++) {
Token token = sublist.get(i);
switch (token.type) {
case QUERY:
if (((Query) token).contains(requireNonNull(hintToken, "hintToken"))) {
found = true;
}
break;
case JOIN:
++joinCount;
// fall through
case FROM:
case ON:
case COMMA:
if (found) {
itemEnd = i;
break;
}
itemStart = i + 1;
break;
case ID:
if (requireNonNull(hintToken, "hintToken").equals(token.s)) {
found = true;
}
break;
default:
break;
}
}
// Don't simplify a FROM clause containing a JOIN: we lose help
// with syntax.
if (found && (joinCount == 0)) {
if (itemEnd == -1) {
itemEnd = sublist.size();
}
List fromItem =
new ArrayList<>(
sublist.subList(itemStart, itemEnd));
Token from = sublist.get(0);
sublist.clear();
sublist.add(from);
sublist.addAll(fromItem);
}
if (sublist.get(sublist.size() - 1).type == TokenType.ON) {
sublist.add(new Token(TokenType.ID, "TRUE"));
}
}
private void purgeWhere() {
List sublist = findClauseOrNull(TokenType.WHERE);
if (sublist != null) {
sublist.clear();
}
}
private void purgeGroupByHaving() {
List sublist = findClauseOrNull(TokenType.GROUP);
if (sublist != null) {
sublist.clear();
}
sublist = findClauseOrNull(TokenType.HAVING);
if (sublist != null) {
sublist.clear();
}
}
private void purgeOrderBy() {
List sublist = findClauseOrNull(TokenType.ORDER);
if (sublist != null) {
sublist.clear();
}
}
private List findClause(TokenType keyword) {
return requireNonNull(
findClauseOrNull(keyword),
() -> "clause does not exist: " + keyword);
}
private @Nullable List findClauseOrNull(TokenType keyword) {
int start = -1;
int k = -1;
EnumSet clauses =
EnumSet.of(
TokenType.SELECT,
TokenType.FROM,
TokenType.WHERE,
TokenType.GROUP,
TokenType.HAVING,
TokenType.ORDER);
for (Token token : tokenList) {
++k;
if (token.type == keyword) {
start = k;
} else if ((start >= 0)
&& clauses.contains(token.type)) {
return tokenList.subList(start, k);
}
}
if (start >= 0) {
return tokenList.subList(start, k + 1);
}
return null;
}
private boolean contains(String hintToken) {
for (Token token : tokenList) {
switch (token.type) {
case ID:
if (hintToken.equals(token.s)) {
return true;
}
break;
case QUERY:
if (((Query) token).contains(hintToken)) {
return true;
}
break;
default:
break;
}
}
return false;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy