com.scalar.db.sql.util.Tokenizer Maven / Gradle / Ivy
package com.scalar.db.sql.util;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
public final class Tokenizer {
private Tokenizer() {}
public static List tokenize(String sql) {
return tokenize(sql, Integer.MAX_VALUE);
}
public static List tokenize(String sql, int limit) {
CharBuffer buffer = CharBuffer.wrap(sql.trim());
List tokens = new ArrayList<>();
StringBuilder builder = new StringBuilder();
boolean stringLiteral = false;
boolean singleQuoteInStringLiteral = false;
boolean objectName = false;
boolean lineComment = false;
boolean blockComment = false;
boolean slash = false;
boolean hyphen = false;
boolean gt = false;
boolean lt = false;
while (buffer.hasRemaining() && tokens.size() < limit) {
char c = buffer.get();
if (stringLiteral) {
if (!singleQuoteInStringLiteral) {
if (c == '\'') {
builder.append(c);
singleQuoteInStringLiteral = true;
} else {
builder.append(c);
}
continue;
} else {
if (c == '\'') {
// for single quote escape
singleQuoteInStringLiteral = false;
continue;
} else {
// end the string literal
stringLiteral = false;
singleQuoteInStringLiteral = false;
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
}
}
if (objectName) {
if (c == '"') {
// end the object name
builder.append(c);
objectName = false;
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
} else {
builder.append(c);
}
continue;
}
if (lineComment) {
// skip a line comment
if (c == '\n') {
lineComment = false;
}
continue;
}
if (blockComment) {
// skip a block comment
if (c == '*') {
if (buffer.hasRemaining() && buffer.get() == '/') {
blockComment = false;
}
}
continue;
}
if (slash) {
slash = false;
if (c == '*') {
blockComment = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
continue;
} else if (c == '/') {
lineComment = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
continue;
} else {
builder.append('/');
}
}
if (hyphen) {
hyphen = false;
if (c == '-') {
lineComment = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
continue;
} else {
builder.append('-');
}
}
if (gt) {
gt = false;
if (c == '=') {
tokens.add(">=");
continue;
} else {
tokens.add(">");
}
}
if (lt) {
lt = false;
if (c == '=') {
tokens.add("<=");
continue;
} else {
tokens.add("<");
}
}
switch (c) {
case '\'':
builder.append(c);
stringLiteral = true;
break;
case '"':
builder.append(c);
objectName = true;
break;
case '/':
slash = true;
break;
case '-':
hyphen = true;
break;
case '#':
lineComment = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
break;
case '\n':
case '\r':
case ' ':
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
break;
case ';':
case '.':
case ',':
case '?':
case '(':
case ')':
case '=':
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
// add a token
tokens.add(String.valueOf(c));
break;
case '>':
gt = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
break;
case '<':
lt = true;
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
builder = new StringBuilder();
}
break;
default:
builder.append(c);
break;
}
}
if (slash) {
builder.append('/');
}
if (hyphen) {
builder.append('-');
}
if (gt) {
builder.append('>');
}
if (lt) {
builder.append('<');
}
if (builder.length() > 0) {
// add a token
tokens.add(builder.toString());
}
return tokens;
}
}