com.amazon.redshift.core.Parser Maven / Gradle / Ivy
Show all versions of redshift-jdbc42 Show documentation
/*
* Copyright (c) 2006, PostgreSQL Global Development Group
* See the LICENSE file in the project root for more information.
*/
package com.amazon.redshift.core;
import com.amazon.redshift.jdbc.EscapeSyntaxCallMode;
import com.amazon.redshift.jdbc.EscapedFunctions2;
import com.amazon.redshift.util.GT;
import com.amazon.redshift.util.RedshiftException;
import com.amazon.redshift.util.RedshiftState;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Basic query parser infrastructure.
* Note: This class should not be considered as pgjdbc public API.
*
* @author Michael Paesold ([email protected])
* @author Christopher Deckers ([email protected])
*/
public class Parser {
private static final int[] NO_BINDS = new int[0];
/**
* Parses JDBC query into Redshift's native format. Several queries might be given if separated
* by semicolon.
*
* @param query jdbc query to parse
* @param standardConformingStrings whether to allow backslashes to be used as escape characters
* in single quote literals
* @param withParameters whether to replace ?, ? with $1, $2, etc
* @param splitStatements whether to split statements by semicolon
* @param isBatchedReWriteConfigured whether re-write optimization is enabled
* @param isMultiSqlSupport whether multiple SQL commands support is enabled
* @param returningColumnNames for simple insert, update, delete add returning with given column names
* @return list of native queries
* @throws SQLException if unable to add returning clause (invalid column names)
*/
public static List parseJdbcSql(String query, boolean standardConformingStrings,
boolean withParameters, boolean splitStatements,
boolean isBatchedReWriteConfigured,
boolean isMultiSqlSupport,
String... returningColumnNames) throws SQLException {
if (!withParameters && !splitStatements
&& returningColumnNames != null && returningColumnNames.length == 0) {
return Collections.singletonList(new NativeQuery(query,
SqlCommand.createStatementTypeInfo(SqlCommandType.BLANK)));
}
int fragmentStart = 0;
int inParen = 0;
char[] aChars = query.toCharArray();
StringBuilder nativeSql = new StringBuilder(query.length() + 10);
List bindPositions = null; // initialized on demand
Set redshiftBindNames = null; // initialized on demand
List redshiftParamMarkers = null; // initialized on demand
List nativeQueries = null;
boolean isCurrentReWriteCompatible = false;
boolean isValuesFound = false;
int valuesBraceOpenPosition = -1;
int valuesBraceClosePosition = -1;
boolean valuesBraceCloseFound = false;
boolean isInsertPresent = false;
boolean isReturningPresent = false;
boolean isReturningPresentPrev = false;
SqlCommandType currentCommandType = SqlCommandType.BLANK;
SqlCommandType prevCommandType = SqlCommandType.BLANK;
int numberOfStatements = 0;
boolean whitespaceOnly = true;
int keyWordCount = 0;
int keywordStart = -1;
int keywordEnd = -1;
boolean jdbcParameterMarker = false;
boolean redshiftParameterMarker = false;
for (int i = 0; i < aChars.length; ++i) {
char aChar = aChars[i];
boolean isKeyWordChar = false;
// ';' is ignored as it splits the queries
whitespaceOnly &= aChar == ';' || Character.isWhitespace(aChar);
keywordEnd = i; // parseSingleQuotes, parseDoubleQuotes, etc move index so we keep old value
switch (aChar) {
case '\'': // single-quotes
i = Parser.parseSingleQuotes(aChars, i, standardConformingStrings);
break;
case '"': // double-quotes
i = Parser.parseDoubleQuotes(aChars, i);
break;
case '-': // possibly -- style comment
i = Parser.parseLineComment(aChars, i);
break;
case '/': // possibly /* */ style comment
i = Parser.parseBlockComment(aChars, i);
break;
case '$': // possibly dollar quote start
int savPos = i;
i = Parser.parseDollarQuotes(aChars, i);
// PREPARE SQL command has own way of replacing $ marker.
// Those are not JDBC Bind values but values pass during EXECUTE SQL call.
// Also check for whether it is part of an identifier or not.
if (savPos == i
&& withParameters
&& currentCommandType != SqlCommandType.PREPARE
&& keywordStart == -1) {
i = Parser.parseDollarParam(aChars, i);
if (i != savPos) {
if(jdbcParameterMarker) {
// Throw an exception, if application uses $ and ? both as parameter marker.
throw new RedshiftException(GT.tr("Redshift parameter marker and JDBC parameter marker in same SQL command is not allowed."),
RedshiftState.UNEXPECTED_ERROR);
}
redshiftParameterMarker = true;
// Get $ and all digits
String paramName = new String(aChars, savPos, i - savPos + 1);
nativeSql.append(aChars, fragmentStart, (i + 1) - fragmentStart);
fragmentStart = i + 1; // Point at after the last digit
// We found $n parameter marker
if (redshiftBindNames == null) {
redshiftBindNames = new HashSet();
}
if (bindPositions == null) {
bindPositions = new ArrayList();
}
if (redshiftParamMarkers == null) {
redshiftParamMarkers = new ArrayList();
}
// is it unique?
if (!redshiftBindNames.contains(paramName)) {
redshiftBindNames.add(paramName);
int dollarSignPos = nativeSql.length() - (i - savPos) - 1;
bindPositions.add(dollarSignPos); // Point at $
redshiftParamMarkers.add(Integer.parseInt(paramName.substring(1)));
}
}
}
break;
// case '(' moved below to parse "values(" properly
case ')':
inParen--;
if (inParen == 0 && isValuesFound && !valuesBraceCloseFound) {
// If original statement is multi-values like VALUES (...), (...), ... then
// search for the latest closing paren
valuesBraceClosePosition = nativeSql.length() + i - fragmentStart;
}
break;
case '?':
nativeSql.append(aChars, fragmentStart, i - fragmentStart);
if (i + 1 < aChars.length && aChars[i + 1] == '?') /* replace ?? with ? */ {
nativeSql.append('?');
i++; // make sure the coming ? is not treated as a bind
} else {
if (!withParameters) {
nativeSql.append('?');
} else {
if(redshiftParameterMarker) {
// Throw an exception, if application uses $ and ? both as parameter marker.
throw new RedshiftException(GT.tr("Redshift parameter marker and JDBC parameter marker in same SQL command is not allowed."),
RedshiftState.UNEXPECTED_ERROR);
}
jdbcParameterMarker = true;
if (bindPositions == null) {
bindPositions = new ArrayList();
}
bindPositions.add(nativeSql.length());
int bindIndex = bindPositions.size();
nativeSql.append(NativeQuery.bindName(bindIndex));
}
}
fragmentStart = i + 1;
break;
case ';':
if (inParen == 0) {
if (!whitespaceOnly) {
numberOfStatements++;
nativeSql.append(aChars, fragmentStart, i - fragmentStart);
nativeSql.append(';');
whitespaceOnly = true;
}
fragmentStart = i + 1;
if (nativeSql.length() > 0) {
if (addReturning(nativeSql, currentCommandType, returningColumnNames, isReturningPresent)) {
isReturningPresent = true;
}
if(!isMultiSqlSupport) {
// Throw an exception, if application doesn't need multiple SQL commands support.
throw new RedshiftException(GT.tr("Multiple SQL commands support is disabled."),
RedshiftState.UNEXPECTED_ERROR);
}
if (splitStatements) {
if (nativeQueries == null) {
nativeQueries = new ArrayList();
}
if (!isValuesFound || !isCurrentReWriteCompatible || valuesBraceClosePosition == -1
|| (bindPositions != null
&& valuesBraceClosePosition < bindPositions.get(bindPositions.size() - 1))) {
valuesBraceOpenPosition = -1;
valuesBraceClosePosition = -1;
}
nativeQueries.add(new NativeQuery(nativeSql.toString(),
toIntArray(bindPositions), false,
SqlCommand.createStatementTypeInfo(
currentCommandType, isBatchedReWriteConfigured, valuesBraceOpenPosition,
valuesBraceClosePosition,
isReturningPresent, nativeQueries.size()),
(redshiftParamMarkers != null) ? toIntArray(redshiftParamMarkers) : null));
}
}
prevCommandType = currentCommandType;
isReturningPresentPrev = isReturningPresent;
currentCommandType = SqlCommandType.BLANK;
isReturningPresent = false;
if (splitStatements) {
// Prepare for next query
if (bindPositions != null) {
bindPositions.clear();
}
nativeSql.setLength(0);
isValuesFound = false;
isCurrentReWriteCompatible = false;
valuesBraceOpenPosition = -1;
valuesBraceClosePosition = -1;
valuesBraceCloseFound = false;
}
}
break;
default:
if (keywordStart >= 0) {
// When we are inside a keyword, we need to detect keyword end boundary
// Note that isKeyWordChar is initialized to false before the switch, so
// all other characters would result in isKeyWordChar=false
isKeyWordChar = isIdentifierContChar(aChar);
break;
}
// Not in keyword, so just detect next keyword start
isKeyWordChar = isIdentifierStartChar(aChar);
if (isKeyWordChar) {
keywordStart = i;
if (valuesBraceOpenPosition != -1 && inParen == 0) {
// When the statement already has multi-values, stop looking for more of them
// Since values(?,?),(?,?),... should not contain keywords in the middle
valuesBraceCloseFound = true;
}
}
break;
}
if (keywordStart >= 0 && (i == aChars.length - 1 || !isKeyWordChar)) {
int wordLength = (isKeyWordChar ? i + 1 : keywordEnd) - keywordStart;
if (currentCommandType == SqlCommandType.BLANK) {
if (wordLength == 6 && parseUpdateKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.UPDATE;
} else if (wordLength == 6 && parseDeleteKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.DELETE;
} else if (wordLength == 4 && parseMoveKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.MOVE;
} else if (wordLength == 6 && parseSelectKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.SELECT;
} else if (wordLength == 4 && parseWithKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.WITH;
} else if (wordLength == 6 && parseInsertKeyword(aChars, keywordStart)) {
if (!isInsertPresent && (nativeQueries == null || nativeQueries.isEmpty())) {
// Only allow rewrite for insert command starting with the insert keyword.
// Else, too many risks of wrong interpretation.
isCurrentReWriteCompatible = keyWordCount == 0;
isInsertPresent = true;
currentCommandType = SqlCommandType.INSERT;
} else {
isCurrentReWriteCompatible = false;
}
}
else if (wordLength == 7 && parsePrepareKeyword(aChars, keywordStart)) {
currentCommandType = SqlCommandType.PREPARE;
}
} else if (currentCommandType == SqlCommandType.WITH
&& inParen == 0) {
SqlCommandType command = parseWithCommandType(aChars, i, keywordStart, wordLength);
if (command != null) {
currentCommandType = command;
}
}
if (inParen != 0 || aChar == ')') {
// RETURNING and VALUES cannot be present in braces
} else if (wordLength == 9 && parseReturningKeyword(aChars, keywordStart)) {
isReturningPresent = true;
} else if (wordLength == 6 && parseValuesKeyword(aChars, keywordStart)) {
isValuesFound = true;
}
keywordStart = -1;
keyWordCount++;
}
if (aChar == '(') {
inParen++;
if (inParen == 1 && isValuesFound && valuesBraceOpenPosition == -1) {
valuesBraceOpenPosition = nativeSql.length() + i - fragmentStart;
}
}
} // Loop for each char
if (!isValuesFound || !isCurrentReWriteCompatible || valuesBraceClosePosition == -1
|| (bindPositions != null
&& valuesBraceClosePosition < bindPositions.get(bindPositions.size() - 1))) {
valuesBraceOpenPosition = -1;
valuesBraceClosePosition = -1;
}
if (fragmentStart < aChars.length && !whitespaceOnly) {
nativeSql.append(aChars, fragmentStart, aChars.length - fragmentStart);
} else {
if (numberOfStatements > 1) {
isReturningPresent = false;
currentCommandType = SqlCommandType.BLANK;
} else if (numberOfStatements == 1) {
isReturningPresent = isReturningPresentPrev;
currentCommandType = prevCommandType;
}
}
if (nativeSql.length() == 0) {
return nativeQueries != null ? nativeQueries : Collections.emptyList();
}
if (addReturning(nativeSql, currentCommandType, returningColumnNames, isReturningPresent)) {
isReturningPresent = true;
}
NativeQuery lastQuery = new NativeQuery(nativeSql.toString(),
toIntArray(bindPositions), !splitStatements,
SqlCommand.createStatementTypeInfo(currentCommandType,
isBatchedReWriteConfigured, valuesBraceOpenPosition, valuesBraceClosePosition,
isReturningPresent, (nativeQueries == null ? 0 : nativeQueries.size())),
(redshiftParamMarkers != null) ? toIntArray(redshiftParamMarkers) : null);
if (nativeQueries == null) {
return Collections.singletonList(lastQuery);
}
if (!whitespaceOnly) {
nativeQueries.add(lastQuery);
}
return nativeQueries;
}
private static SqlCommandType parseWithCommandType(char[] aChars, int i, int keywordStart,
int wordLength) {
// This parses `with x as (...) ...`
// Corner case is `with select as (insert ..) select * from select
SqlCommandType command;
if (wordLength == 6 && parseUpdateKeyword(aChars, keywordStart)) {
command = SqlCommandType.UPDATE;
} else if (wordLength == 6 && parseDeleteKeyword(aChars, keywordStart)) {
command = SqlCommandType.DELETE;
} else if (wordLength == 6 && parseInsertKeyword(aChars, keywordStart)) {
command = SqlCommandType.INSERT;
} else if (wordLength == 6 && parseSelectKeyword(aChars, keywordStart)) {
command = SqlCommandType.SELECT;
} else {
return null;
}
// update/delete/insert/select keyword detected
// Check if `AS` follows
int nextInd = i;
// The loop should skip whitespace and comments
for (; nextInd < aChars.length; nextInd++) {
char nextChar = aChars[nextInd];
if (nextChar == '-') {
nextInd = Parser.parseLineComment(aChars, nextInd);
} else if (nextChar == '/') {
nextInd = Parser.parseBlockComment(aChars, nextInd);
} else if (Character.isWhitespace(nextChar)) {
// Skip whitespace
continue;
} else {
break;
}
}
if (nextInd + 2 >= aChars.length
|| (!parseAsKeyword(aChars, nextInd)
|| isIdentifierContChar(aChars[nextInd + 2]))) {
return command;
}
return null;
}
private static boolean addReturning(StringBuilder nativeSql, SqlCommandType currentCommandType,
String[] returningColumnNames, boolean isReturningPresent) throws SQLException {
if (isReturningPresent || returningColumnNames.length == 0) {
return false;
}
if (currentCommandType != SqlCommandType.INSERT
&& currentCommandType != SqlCommandType.UPDATE
&& currentCommandType != SqlCommandType.DELETE
&& currentCommandType != SqlCommandType.WITH) {
return false;
}
nativeSql.append("\nRETURNING ");
if (returningColumnNames.length == 1 && returningColumnNames[0].charAt(0) == '*') {
nativeSql.append('*');
return true;
}
for (int col = 0; col < returningColumnNames.length; col++) {
String columnName = returningColumnNames[col];
if (col > 0) {
nativeSql.append(", ");
}
Utils.escapeIdentifier(nativeSql, columnName);
}
return true;
}
/**
* Converts {@code List} to {@code int[]}. Empty and {@code null} lists are converted to
* empty array.
*
* @param list input list
* @return output array
*/
private static int[] toIntArray(List list) {
if (list == null || list.isEmpty()) {
return NO_BINDS;
}
int[] res = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
res[i] = list.get(i); // must not be null
}
return res;
}
/**
* Find the end of the single-quoted string starting at the given offset.
*
* Note: for {@code 'single '' quote in string'}, this method currently returns the offset of
* first {@code '} character after the initial one. The caller must call the method a second time
* for the second part of the quoted string.
*
* @param query query
* @param offset start offset
* @param standardConformingStrings standard conforming strings
* @return position of the end of the single-quoted string
*/
public static int parseSingleQuotes(final char[] query, int offset,
boolean standardConformingStrings) {
// check for escape string syntax (E'')
if (standardConformingStrings
&& offset >= 2
&& (query[offset - 1] == 'e' || query[offset - 1] == 'E')
&& charTerminatesIdentifier(query[offset - 2])) {
standardConformingStrings = false;
}
if (standardConformingStrings) {
// do NOT treat backslashes as escape characters
while (++offset < query.length) {
switch (query[offset]) {
case '\'':
return offset;
default:
break;
}
}
} else {
// treat backslashes as escape characters
while (++offset < query.length) {
switch (query[offset]) {
case '\\':
++offset;
break;
case '\'':
return offset;
default:
break;
}
}
}
return query.length;
}
/**
* Find the end of the double-quoted string starting at the given offset.
*
* Note: for {@code "double "" quote in string"}, this method currently
* returns the offset of first {@code "} character after the initial one. The caller must
* call the method a second time for the second part of the quoted string.
*
* @param query query
* @param offset start offset
* @return position of the end of the double-quoted string
*/
public static int parseDoubleQuotes(final char[] query, int offset) {
while (++offset < query.length && query[offset] != '"') {
// do nothing
}
return offset;
}
/**
* Test if the dollar character ({@code $}) at the given offset starts a dollar-quoted string and
* return the offset of the ending dollar character.
*
* @param query query
* @param offset start offset
* @return offset of the ending dollar character
*/
public static int parseDollarQuotes(final char[] query, int offset) {
if (offset + 1 < query.length
&& (offset == 0 || !isIdentifierContChar(query[offset - 1]))) {
int endIdx = -1;
if (query[offset + 1] == '$') {
endIdx = offset + 1;
} else if (isDollarQuoteStartChar(query[offset + 1])) {
for (int d = offset + 2; d < query.length; ++d) {
if (query[d] == '$') {
endIdx = d;
break;
} else if (!isDollarQuoteContChar(query[d])) {
break;
}
}
}
if (endIdx > 0) {
// found; note: tag includes start and end $ character
int tagIdx = offset;
int tagLen = endIdx - offset + 1;
offset = endIdx; // loop continues at endIdx + 1
for (++offset; offset < query.length; ++offset) {
if (query[offset] == '$'
&& subArraysEqual(query, tagIdx, offset, tagLen)) {
offset += tagLen - 1;
break;
}
}
}
}
return offset;
}
/**
* Skip all digits for backend parameter marker e.g. $1, $10 etc.
*
* @param query User query
* @param offset start offset
* @return offset of the ending digit
*/
private static int parseDollarParam(final char[] query, int offset) {
int numDigits = 0;
int savOffset = offset;
// Skip $
offset++;
// Loop for each digits
while(offset < query.length) {
if (Character.isDigit(query[offset])) {
offset++;
numDigits++;
}
else
break;
}
return savOffset + numDigits;
}
/**
* Test if the {@code -} character at {@code offset} starts a {@code --} style line comment,
* and return the position of the first {@code \r} or {@code \n} character.
*
* @param query query
* @param offset start offset
* @return position of the first {@code \r} or {@code \n} character
*/
public static int parseLineComment(final char[] query, int offset) {
if (offset + 1 < query.length && query[offset + 1] == '-') {
while (offset + 1 < query.length) {
offset++;
if (query[offset] == '\r' || query[offset] == '\n') {
break;
}
}
}
return offset;
}
/**
* Test if the {@code /} character at {@code offset} starts a block comment, and return the
* position of the last {@code /} character.
*
* @param query query
* @param offset start offset
* @return position of the last {@code /} character
*/
public static int parseBlockComment(final char[] query, int offset) {
if (offset + 1 < query.length && query[offset + 1] == '*') {
// /* /* */ */ nest, according to SQL spec
int level = 1;
for (offset += 2; offset < query.length; ++offset) {
switch (query[offset - 1]) {
case '*':
if (query[offset] == '/') {
--level;
++offset; // don't parse / in */* twice
}
break;
case '/':
if (query[offset] == '*') {
++level;
++offset; // don't parse * in /*/ twice
}
break;
default:
break;
}
if (level == 0) {
--offset; // reset position to last '/' char
break;
}
}
}
return offset;
}
/**
* Parse string to check presence of DELETE keyword regardless of case. The initial character is
* assumed to have been matched.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseDeleteKeyword(final char[] query, int offset) {
if (query.length < (offset + 6)) {
return false;
}
return (query[offset] | 32) == 'd'
&& (query[offset + 1] | 32) == 'e'
&& (query[offset + 2] | 32) == 'l'
&& (query[offset + 3] | 32) == 'e'
&& (query[offset + 4] | 32) == 't'
&& (query[offset + 5] | 32) == 'e';
}
/**
* Parse string to check presence of INSERT keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseInsertKeyword(final char[] query, int offset) {
if (query.length < (offset + 7)) {
return false;
}
return (query[offset] | 32) == 'i'
&& (query[offset + 1] | 32) == 'n'
&& (query[offset + 2] | 32) == 's'
&& (query[offset + 3] | 32) == 'e'
&& (query[offset + 4] | 32) == 'r'
&& (query[offset + 5] | 32) == 't';
}
/**
* Parse string to check presence of MOVE keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseMoveKeyword(final char[] query, int offset) {
if (query.length < (offset + 4)) {
return false;
}
return (query[offset] | 32) == 'm'
&& (query[offset + 1] | 32) == 'o'
&& (query[offset + 2] | 32) == 'v'
&& (query[offset + 3] | 32) == 'e';
}
/**
* Parse string to check presence of PREPARE keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parsePrepareKeyword(final char[] query, int offset) {
if (query.length < (offset + 7)) {
return false;
}
return (query[offset] | 32) == 'p'
&& (query[offset + 1] | 32) == 'r'
&& (query[offset + 2] | 32) == 'e'
&& (query[offset + 3] | 32) == 'p'
&& (query[offset + 4] | 32) == 'a'
&& (query[offset + 5] | 32) == 'r'
&& (query[offset + 6] | 32) == 'e';
}
/**
* Parse string to check presence of RETURNING keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseReturningKeyword(final char[] query, int offset) {
if (query.length < (offset + 9)) {
return false;
}
return (query[offset] | 32) == 'r'
&& (query[offset + 1] | 32) == 'e'
&& (query[offset + 2] | 32) == 't'
&& (query[offset + 3] | 32) == 'u'
&& (query[offset + 4] | 32) == 'r'
&& (query[offset + 5] | 32) == 'n'
&& (query[offset + 6] | 32) == 'i'
&& (query[offset + 7] | 32) == 'n'
&& (query[offset + 8] | 32) == 'g';
}
/**
* Parse string to check presence of SELECT keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseSelectKeyword(final char[] query, int offset) {
if (query.length < (offset + 6)) {
return false;
}
return (query[offset] | 32) == 's'
&& (query[offset + 1] | 32) == 'e'
&& (query[offset + 2] | 32) == 'l'
&& (query[offset + 3] | 32) == 'e'
&& (query[offset + 4] | 32) == 'c'
&& (query[offset + 5] | 32) == 't';
}
/**
* Parse string to check presence of UPDATE keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseUpdateKeyword(final char[] query, int offset) {
if (query.length < (offset + 6)) {
return false;
}
return (query[offset] | 32) == 'u'
&& (query[offset + 1] | 32) == 'p'
&& (query[offset + 2] | 32) == 'd'
&& (query[offset + 3] | 32) == 'a'
&& (query[offset + 4] | 32) == 't'
&& (query[offset + 5] | 32) == 'e';
}
/**
* Parse string to check presence of VALUES keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseValuesKeyword(final char[] query, int offset) {
if (query.length < (offset + 6)) {
return false;
}
return (query[offset] | 32) == 'v'
&& (query[offset + 1] | 32) == 'a'
&& (query[offset + 2] | 32) == 'l'
&& (query[offset + 3] | 32) == 'u'
&& (query[offset + 4] | 32) == 'e'
&& (query[offset + 5] | 32) == 's';
}
/**
* Faster version of {@link Long#parseLong(String)} when parsing a substring is required
*
* @param s string to parse
* @param beginIndex begin index
* @param endIndex end index
* @return long value
*/
public static long parseLong(String s, int beginIndex, int endIndex) {
// Fallback to default implementation in case the string is long
if (endIndex - beginIndex > 16) {
return Long.parseLong(s.substring(beginIndex, endIndex));
}
long res = digitAt(s, beginIndex);
for (beginIndex++; beginIndex < endIndex; beginIndex++) {
res = res * 10 + digitAt(s, beginIndex);
}
return res;
}
/**
* Parse string to check presence of WITH keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseWithKeyword(final char[] query, int offset) {
if (query.length < (offset + 4)) {
return false;
}
return (query[offset] | 32) == 'w'
&& (query[offset + 1] | 32) == 'i'
&& (query[offset + 2] | 32) == 't'
&& (query[offset + 3] | 32) == 'h';
}
/**
* Parse string to check presence of AS keyword regardless of case.
*
* @param query char[] of the query statement
* @param offset position of query to start checking
* @return boolean indicates presence of word
*/
public static boolean parseAsKeyword(final char[] query, int offset) {
if (query.length < (offset + 2)) {
return false;
}
return (query[offset] | 32) == 'a'
&& (query[offset + 1] | 32) == 's';
}
/**
* Returns true if a given string {@code s} has digit at position {@code pos}.
* @param s input string
* @param pos position (0-based)
* @return true if input string s has digit at position pos
*/
public static boolean isDigitAt(String s, int pos) {
return pos > 0 && pos < s.length() && Character.isDigit(s.charAt(pos));
}
/**
* Converts digit at position {@code pos} in string {@code s} to integer or throws.
* @param s input string
* @param pos position (0-based)
* @return integer value of a digit at position pos
* @throws NumberFormatException if character at position pos is not an integer
*/
public static int digitAt(String s, int pos) {
int c = s.charAt(pos) - '0';
if (c < 0 || c > 9) {
throw new NumberFormatException("Input string: \"" + s + "\", position: " + pos);
}
return c;
}
/**
* @param c character
* @return true if the character is a whitespace character as defined in the backend's parser
*/
public static boolean isSpace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
}
/**
* @param c character
* @return true if the given character is a valid character for an operator in the backend's
* parser
*/
public static boolean isOperatorChar(char c) {
/*
* Extracted from operators defined by {self} and {op_chars}
* in pgsql/src/backend/parser/scan.l.
*/
return ",()[].;:+-*/%^<>=~!@#&|`?".indexOf(c) != -1;
}
/**
* Checks if a character is valid as the start of an identifier.
* PostgreSQL 9.4 allows column names like _, ‿, ⁀, ⁔, ︳, ︴, ﹍, ﹎, ﹏, _, so
* it is assumed isJavaIdentifierPart is good enough for Redshift.
*
* @param c the character to check
* @return true if valid as first character of an identifier; false if not
* @see Identifiers and Key Words
*/
public static boolean isIdentifierStartChar(char c) {
/*
* Redshift's implmementation is located in
* pgsql/src/backend/parser/scan.l:
* ident_start [A-Za-z\200-\377_]
* ident_cont [A-Za-z\200-\377_0-9\$]
* however is is not clear how that interacts with unicode, so we just use Java's implementation.
*/
return Character.isJavaIdentifierStart(c);
}
/**
* Checks if a character is valid as the second or later character of an identifier.
*
* @param c the character to check
* @return true if valid as second or later character of an identifier; false if not
*/
public static boolean isIdentifierContChar(char c) {
return Character.isJavaIdentifierPart(c);
}
/**
* @param c character
* @return true if the character terminates an identifier
*/
public static boolean charTerminatesIdentifier(char c) {
return c == '"' || isSpace(c) || isOperatorChar(c);
}
/**
* Checks if a character is valid as the start of a dollar quoting tag.
*
* @param c the character to check
* @return true if valid as first character of a dollar quoting tag; false if not
*/
public static boolean isDollarQuoteStartChar(char c) {
/*
* The allowed dollar quote start and continuation characters
* must stay in sync with what the backend defines in
* pgsql/src/backend/parser/scan.l
*
* The quoted string starts with $foo$ where "foo" is an optional string
* in the form of an identifier, except that it may not contain "$",
* and extends to the first occurrence of an identical string.
* There is *no* processing of the quoted text.
*/
return c != '$' && isIdentifierStartChar(c);
}
/**
* Checks if a character is valid as the second or later character of a dollar quoting tag.
*
* @param c the character to check
* @return true if valid as second or later character of a dollar quoting tag; false if not
*/
public static boolean isDollarQuoteContChar(char c) {
return c != '$' && isIdentifierContChar(c);
}
/**
* Compares two sub-arrays of the given character array for equalness. If the length is zero, the
* result is true if and only if the offsets are within the bounds of the array.
*
* @param arr a char array
* @param offA first sub-array start offset
* @param offB second sub-array start offset
* @param len length of the sub arrays to compare
* @return true if the sub-arrays are equal; false if not
*/
private static boolean subArraysEqual(final char[] arr,
final int offA, final int offB,
final int len) {
if (offA < 0 || offB < 0
|| offA >= arr.length || offB >= arr.length
|| offA + len > arr.length || offB + len > arr.length) {
return false;
}
for (int i = 0; i < len; ++i) {
if (arr[offA + i] != arr[offB + i]) {
return false;
}
}
return true;
}
/**
* Converts JDBC-specific callable statement escapes {@code { [? =] call [(?,
* [?,..])] }} into the Redshift format which is {@code select (?, [?, ...]) as
* result} or {@code select * from (?, [?, ...]) as result} (7.3)
*
* @param jdbcSql sql text with JDBC escapes
* @param stdStrings if backslash in single quotes should be regular character or escape one
* @param serverVersion server version
* @param protocolVersion protocol version
* @param escapeSyntaxCallMode mode specifying whether JDBC escape call syntax is transformed into a CALL/SELECT statement
* @return SQL in appropriate for given server format
* @throws SQLException if given SQL is malformed
*/
public static JdbcCallParseInfo modifyJdbcCall(String jdbcSql, boolean stdStrings,
int serverVersion, int protocolVersion, EscapeSyntaxCallMode escapeSyntaxCallMode) throws SQLException {
// Mini-parser for JDBC function-call syntax (only)
// TODO: Merge with escape processing (and parameter parsing?) so we only parse each query once.
// RE: frequently used statements are cached (see {@link com.amazon.redshift.jdbc.PgConnection#borrowQuery}), so this "merge" is not that important.
String sql = jdbcSql;
boolean isFunction = false;
boolean outParamBeforeFunc = false;
int len = jdbcSql.length();
int state = 1;
boolean inQuotes = false;
boolean inEscape = false;
int startIndex = -1;
int endIndex = -1;
boolean syntaxError = false;
int i = 0;
while (i < len && !syntaxError) {
char ch = jdbcSql.charAt(i);
switch (state) {
case 1: // Looking for { at start of query
if (ch == '{') {
++i;
++state;
} else if (Character.isWhitespace(ch)) {
++i;
} else {
// Not function-call syntax. Skip the rest of the string.
i = len;
}
break;
case 2: // After {, looking for ? or =, skipping whitespace
if (ch == '?') {
outParamBeforeFunc =
isFunction = true; // { ? = call ... } -- function with one out parameter
++i;
++state;
} else if (ch == 'c' || ch == 'C') { // { call ... } -- proc with no out parameters
state += 3; // Don't increase 'i'
} else if (Character.isWhitespace(ch)) {
++i;
} else {
// "{ foo ...", doesn't make sense, complain.
syntaxError = true;
}
break;
case 3: // Looking for = after ?, skipping whitespace
if (ch == '=') {
++i;
++state;
} else if (Character.isWhitespace(ch)) {
++i;
} else {
syntaxError = true;
}
break;
case 4: // Looking for 'call' after '? =' skipping whitespace
if (ch == 'c' || ch == 'C') {
++state; // Don't increase 'i'.
} else if (Character.isWhitespace(ch)) {
++i;
} else {
syntaxError = true;
}
break;
case 5: // Should be at 'call ' either at start of string or after ?=
if ((ch == 'c' || ch == 'C') && i + 4 <= len && jdbcSql.substring(i, i + 4)
.equalsIgnoreCase("call")) {
isFunction = true;
i += 4;
++state;
} else if (Character.isWhitespace(ch)) {
++i;
} else {
syntaxError = true;
}
break;
case 6: // Looking for whitespace char after 'call'
if (Character.isWhitespace(ch)) {
// Ok, we found the start of the real call.
++i;
++state;
startIndex = i;
} else {
syntaxError = true;
}
break;
case 7: // In "body" of the query (after "{ [? =] call ")
if (ch == '\'') {
inQuotes = !inQuotes;
++i;
} else if (inQuotes && ch == '\\' && !stdStrings) {
// Backslash in string constant, skip next character.
i += 2;
} else if (!inQuotes && ch == '{') {
inEscape = !inEscape;
++i;
} else if (!inQuotes && ch == '}') {
if (!inEscape) {
// Should be end of string.
endIndex = i;
++i;
++state;
} else {
inEscape = false;
}
} else if (!inQuotes && ch == ';') {
syntaxError = true;
} else {
// Everything else is ok.
++i;
}
break;
case 8: // At trailing end of query, eating whitespace
if (Character.isWhitespace(ch)) {
++i;
} else {
syntaxError = true;
}
break;
default:
throw new IllegalStateException("somehow got into bad state " + state);
}
}
// We can only legally end in a couple of states here.
if (i == len && !syntaxError) {
if (state == 1) {
// Not an escaped syntax.
// Detect Redshift native CALL.
// (OUT parameter registration, needed for stored procedures with INOUT arguments, will fail without this)
i = 0;
while (i < len && Character.isWhitespace(jdbcSql.charAt(i))) {
i++; // skip any preceding whitespace
}
if (i < len - 5) { // 5 == length of "call" + 1 whitespace
//Check for CALL followed by whitespace
char ch = jdbcSql.charAt(i);
if ((ch == 'c' || ch == 'C') && jdbcSql.substring(i, i + 4).equalsIgnoreCase("call")
&& Character.isWhitespace(jdbcSql.charAt(i + 4))) {
isFunction = true;
}
}
return new JdbcCallParseInfo(sql, isFunction);
}
if (state != 8) {
syntaxError = true; // Ran out of query while still parsing
}
}
if (syntaxError) {
throw new RedshiftException(
GT.tr("Malformed function or procedure escape syntax at offset {0}.", i),
RedshiftState.STATEMENT_NOT_ALLOWED_IN_FUNCTION_CALL);
}
String prefix;
String suffix;
if (escapeSyntaxCallMode == EscapeSyntaxCallMode.SELECT /* || serverVersion < 110000 */
|| (outParamBeforeFunc && escapeSyntaxCallMode == EscapeSyntaxCallMode.CALL_IF_NO_RETURN)) {
// prefix = "select * from ";
// suffix = " as result";
prefix = "select ";
suffix = "";
} else {
prefix = "call ";
suffix = "";
}
String s = jdbcSql.substring(startIndex, endIndex);
int prefixLength = prefix.length();
StringBuilder sb = new StringBuilder(prefixLength + jdbcSql.length() + suffix.length() + 10);
sb.append(prefix);
sb.append(s);
int opening = s.indexOf('(') + 1;
if (opening == 0) {
// here the function call has no parameters declaration eg : "{ ? = call pack_getValue}"
sb.append(outParamBeforeFunc ? "(?)" : "()");
} else if (outParamBeforeFunc) {
// move the single out parameter into the function call
// so that it can be treated like all other parameters
boolean needComma = false;
// the following loop will check if the function call has parameters
// eg "{ ? = call pack_getValue(?) }" vs "{ ? = call pack_getValue() }
for (int j = opening + prefixLength; j < sb.length(); j++) {
char c = sb.charAt(j);
if (c == ')') {
break;
}
if (!Character.isWhitespace(c)) {
needComma = true;
break;
}
}
// insert the return parameter as the first parameter of the function call
if (needComma) {
sb.insert(opening + prefixLength, "?,");
} else {
sb.insert(opening + prefixLength, "?");
}
}
if (!suffix.isEmpty()) {
sql = sb.append(suffix).toString();
} else {
sql = sb.toString();
}
return new JdbcCallParseInfo(sql, isFunction);
}
/**
* Filter the SQL string of Java SQL Escape clauses.
*
* Currently implemented Escape clauses are those mentioned in 11.3 in the specification.
* Basically we look through the sql string for {d xxx}, {t xxx}, {ts xxx}, {oj xxx} or {fn xxx}
* in non-string sql code. When we find them, we just strip the escape part leaving only the xxx
* part. So, something like "select * from x where d={d '2001-10-09'}" would return "select * from
* x where d= '2001-10-09'".
*
* @param sql the original query text
* @param replaceProcessingEnabled whether replace_processing_enabled is on
* @param standardConformingStrings whether standard_conforming_strings is on
* @return Redshift-compatible SQL
* @throws SQLException if given SQL is wrong
*/
public static String replaceProcessing(String sql, boolean replaceProcessingEnabled,
boolean standardConformingStrings) throws SQLException {
if (replaceProcessingEnabled) {
// Since escape codes can only appear in SQL CODE, we keep track
// of if we enter a string or not.
int len = sql.length();
char[] chars = sql.toCharArray();
StringBuilder newsql = new StringBuilder(len);
int i = 0;
while (i < len) {
i = parseSql(chars, i, newsql, false, standardConformingStrings);
// We need to loop here in case we encounter invalid
// SQL, consider: SELECT a FROM t WHERE (1 > 0)) ORDER BY a
// We can't ending replacing after the extra closing paren
// because that changes a syntax error to a valid query
// that isn't what the user specified.
if (i < len) {
newsql.append(chars[i]);
i++;
}
}
return newsql.toString();
} else {
return sql;
}
}
/**
* parse the given sql from index i, appending it to the given buffer until we hit an unmatched
* right parentheses or end of string. When the stopOnComma flag is set we also stop processing
* when a comma is found in sql text that isn't inside nested parenthesis.
*
* @param sql the original query text
* @param i starting position for replacing
* @param newsql where to write the replaced output
* @param stopOnComma should we stop after hitting the first comma in sql text?
* @param stdStrings whether standard_conforming_strings is on
* @return the position we stopped processing at
* @throws SQLException if given SQL is wrong
*/
private static int parseSql(char[] sql, int i, StringBuilder newsql, boolean stopOnComma,
boolean stdStrings) throws SQLException {
SqlParseState state = SqlParseState.IN_SQLCODE;
int len = sql.length;
int nestedParenthesis = 0;
boolean endOfNested = false;
// because of the ++i loop
i--;
while (!endOfNested && ++i < len) {
char c = sql[i];
state_switch:
switch (state) {
case IN_SQLCODE:
if (c == '$') {
int i0 = i;
i = parseDollarQuotes(sql, i);
checkParsePosition(i, len, i0, sql,
"Unterminated dollar quote started at position {0} in SQL {1}. Expected terminating $$");
newsql.append(sql, i0, i - i0 + 1);
break;
} else if (c == '\'') {
// start of a string?
int i0 = i;
i = parseSingleQuotes(sql, i, stdStrings);
checkParsePosition(i, len, i0, sql,
"Unterminated string literal started at position {0} in SQL {1}. Expected ' char");
newsql.append(sql, i0, i - i0 + 1);
break;
} else if (c == '"') {
// start of a identifier?
int i0 = i;
i = parseDoubleQuotes(sql, i);
checkParsePosition(i, len, i0, sql,
"Unterminated identifier started at position {0} in SQL {1}. Expected \" char");
newsql.append(sql, i0, i - i0 + 1);
break;
} else if (c == '/') {
int i0 = i;
i = parseBlockComment(sql, i);
checkParsePosition(i, len, i0, sql,
"Unterminated block comment started at position {0} in SQL {1}. Expected */ sequence");
newsql.append(sql, i0, i - i0 + 1);
break;
} else if (c == '-') {
int i0 = i;
i = parseLineComment(sql, i);
newsql.append(sql, i0, i - i0 + 1);
break;
} else if (c == '(') { // begin nested sql
nestedParenthesis++;
} else if (c == ')') { // end of nested sql
nestedParenthesis--;
if (nestedParenthesis < 0) {
endOfNested = true;
break;
}
} else if (stopOnComma && c == ',' && nestedParenthesis == 0) {
endOfNested = true;
break;
} else if (c == '{') { // start of an escape code?
if (i + 1 < len) {
SqlParseState[] availableStates = SqlParseState.VALUES;
// skip first state, it's not a escape code state
for (int j = 1; j < availableStates.length; j++) {
SqlParseState availableState = availableStates[j];
int matchedPosition = availableState.getMatchedPosition(sql, i + 1);
if (matchedPosition == 0) {
continue;
}
i += matchedPosition;
if (availableState.replacementKeyword != null) {
newsql.append(availableState.replacementKeyword);
}
state = availableState;
break state_switch;
}
}
}
newsql.append(c);
break;
case ESC_FUNCTION:
// extract function name
i = escapeFunction(sql, i, newsql, stdStrings);
state = SqlParseState.IN_SQLCODE; // end of escaped function (or query)
break;
case ESC_DATE:
case ESC_TIME:
case ESC_TIMESTAMP:
case ESC_OUTERJOIN:
case ESC_ESCAPECHAR:
if (c == '}') {
state = SqlParseState.IN_SQLCODE; // end of escape code.
} else {
newsql.append(c);
}
break;
} // end switch
}
return i;
}
private static int findOpenBrace(char[] sql, int i) {
int posArgs = i;
while (posArgs < sql.length && sql[posArgs] != '(') {
posArgs++;
}
return posArgs;
}
private static void checkParsePosition(int i, int len, int i0, char[] sql,
String message)
throws RedshiftException {
if (i < len) {
return;
}
throw new RedshiftException(
GT.tr(message, i0, new String(sql)),
RedshiftState.SYNTAX_ERROR);
}
private static int escapeFunction(char[] sql, int i, StringBuilder newsql, boolean stdStrings) throws SQLException {
String functionName;
int argPos = findOpenBrace(sql, i);
if (argPos < sql.length) {
functionName = new String(sql, i, argPos - i).trim();
// extract arguments
i = argPos + 1;// we start the scan after the first (
i = escapeFunctionArguments(newsql, functionName, sql, i, stdStrings);
}
// go to the end of the function copying anything found
i++;
while (i < sql.length && sql[i] != '}') {
newsql.append(sql[i++]);
}
return i;
}
/**
* Generate sql for escaped functions.
*
* @param newsql destination StringBuilder
* @param functionName the escaped function name
* @param sql input SQL text (containing arguments of a function call with possible JDBC escapes)
* @param i position in the input SQL
* @param stdStrings whether standard_conforming_strings is on
* @return the right Redshift sql
* @throws SQLException if something goes wrong
*/
private static int escapeFunctionArguments(StringBuilder newsql, String functionName, char[] sql, int i,
boolean stdStrings)
throws SQLException {
// Maximum arity of functions in EscapedFunctions is 3
List parsedArgs = new ArrayList(3);
while (true) {
StringBuilder arg = new StringBuilder();
int lastPos = i;
i = parseSql(sql, i, arg, true, stdStrings);
if (i != lastPos) {
parsedArgs.add(arg);
}
if (i >= sql.length // should not happen
|| sql[i] != ',') {
break;
}
i++;
}
Method method = EscapedFunctions2.getFunction(functionName);
if (method == null) {
newsql.append(functionName);
EscapedFunctions2.appendCall(newsql, "(", ",", ")", parsedArgs);
return i;
}
try {
method.invoke(null, newsql, parsedArgs);
} catch (InvocationTargetException e) {
Throwable targetException = e.getTargetException();
if (targetException instanceof SQLException) {
throw (SQLException) targetException;
} else {
throw new RedshiftException(targetException.getMessage(), RedshiftState.SYSTEM_ERROR);
}
} catch (IllegalAccessException e) {
throw new RedshiftException(e.getMessage(), RedshiftState.SYSTEM_ERROR);
}
return i;
}
private static final char[] QUOTE_OR_ALPHABETIC_MARKER = {'\"', '0'};
private static final char[] QUOTE_OR_ALPHABETIC_MARKER_OR_PARENTHESIS = {'\"', '0', '('};
private static final char[] SINGLE_QUOTE = {'\''};
// Static variables for parsing SQL when replaceProcessing is true.
private enum SqlParseState {
IN_SQLCODE,
ESC_DATE("d", SINGLE_QUOTE, "DATE "),
ESC_TIME("t", SINGLE_QUOTE, "TIME "),
ESC_TIMESTAMP("ts", SINGLE_QUOTE, "TIMESTAMP "),
ESC_FUNCTION("fn", QUOTE_OR_ALPHABETIC_MARKER, null),
ESC_OUTERJOIN("oj", QUOTE_OR_ALPHABETIC_MARKER_OR_PARENTHESIS, null),
ESC_ESCAPECHAR("escape", SINGLE_QUOTE, "ESCAPE ");
private static final SqlParseState[] VALUES = values();
private final char[] escapeKeyword;
private final char[] allowedValues;
private final String replacementKeyword;
SqlParseState() {
this("", new char[0], null);
}
SqlParseState(String escapeKeyword, char[] allowedValues, String replacementKeyword) {
this.escapeKeyword = escapeKeyword.toCharArray();
this.allowedValues = allowedValues;
this.replacementKeyword = replacementKeyword;
}
private boolean startMatches(char[] sql, int pos) {
// check for the keyword
for (char c : escapeKeyword) {
if (pos >= sql.length) {
return false;
}
char curr = sql[pos++];
if (curr != c && curr != Character.toUpperCase(c)) {
return false;
}
}
return pos < sql.length;
}
private int getMatchedPosition(char[] sql, int pos) {
// check for the keyword
if (!startMatches(sql, pos)) {
return 0;
}
int newPos = pos + escapeKeyword.length;
// check for the beginning of the value
char curr = sql[newPos];
// ignore any in-between whitespace
while (curr == ' ') {
newPos++;
if (newPos >= sql.length) {
return 0;
}
curr = sql[newPos];
}
for (char c : allowedValues) {
if (curr == c || (c == '0' && Character.isLetter(curr))) {
return newPos - pos;
}
}
return 0;
}
}
}