studio.raptor.sqlparser.dialect.odps.parser.OdpsLexer Maven / Gradle / Ivy
/*
* Copyright 1999-2017 Alibaba Group Holding Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package studio.raptor.sqlparser.dialect.odps.parser;
import static studio.raptor.sqlparser.parser.CharTypes.isFirstIdentifierChar;
import static studio.raptor.sqlparser.parser.CharTypes.isIdentifierChar;
import static studio.raptor.sqlparser.parser.LayoutCharacters.EOI;
import java.util.HashMap;
import java.util.Map;
import studio.raptor.sqlparser.parser.Keywords;
import studio.raptor.sqlparser.parser.Lexer;
import studio.raptor.sqlparser.parser.NotAllowCommentException;
import studio.raptor.sqlparser.parser.ParserException;
import studio.raptor.sqlparser.parser.Token;
public class OdpsLexer extends Lexer {
public final static Keywords DEFAULT_ODPS_KEYWORDS;
static {
Map map = new HashMap();
map.putAll(Keywords.DEFAULT_KEYWORDS.getKeywords());
map.put("SHOW", Token.SHOW);
map.put("PARTITION", Token.PARTITION);
map.put("PARTITIONED", Token.PARTITIONED);
map.put("OVERWRITE", Token.OVERWRITE);
map.put("OVER", Token.OVER);
map.put("LIMIT", Token.LIMIT);
map.put("IF", Token.IF);
map.put("DISTRIBUTE", Token.DISTRIBUTE);
map.put("TRUE", Token.TRUE);
map.put("FALSE", Token.FALSE);
DEFAULT_ODPS_KEYWORDS = new Keywords(map);
}
public OdpsLexer(String input) {
super(input);
super.keywods = DEFAULT_ODPS_KEYWORDS;
}
public OdpsLexer(String input, boolean skipComment, boolean keepComments) {
super(input, skipComment);
this.skipComment = skipComment;
this.keepComments = keepComments;
super.keywods = DEFAULT_ODPS_KEYWORDS;
}
public OdpsLexer(String input, CommentHandler commentHandler) {
super(input, commentHandler);
super.keywods = DEFAULT_ODPS_KEYWORDS;
}
public void scanComment() {
if (ch != '/' && ch != '-') {
throw new IllegalStateException();
}
Token lastToken = this.token;
mark = pos;
bufPos = 0;
scanChar();
// /*+ */
if (ch == '*') {
scanChar();
bufPos++;
while (ch == ' ') {
scanChar();
bufPos++;
}
boolean isHint = false;
int startHintSp = bufPos + 1;
if (ch == '+') {
isHint = true;
scanChar();
bufPos++;
}
for (; ; ) {
if (ch == '*' && charAt(pos + 1) == '/') {
bufPos += 2;
scanChar();
scanChar();
break;
}
scanChar();
bufPos++;
}
if (isHint) {
stringVal = subString(mark + startHintSp, (bufPos - startHintSp) - 1);
token = Token.HINT;
} else {
stringVal = subString(mark, bufPos + 1);
token = Token.MULTI_LINE_COMMENT;
commentCount++;
if (keepComments) {
addComment(stringVal);
}
}
if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
return;
}
if (token != Token.HINT && !isAllowComment()) {
throw new NotAllowCommentException();
}
return;
}
if (!isAllowComment()) {
throw new NotAllowCommentException();
}
if (ch == '/' || ch == '-') {
scanChar();
bufPos++;
for (; ; ) {
if (ch == '\r') {
if (charAt(pos + 1) == '\n') {
line++;
bufPos += 2;
scanChar();
break;
}
bufPos++;
break;
} else if (ch == EOI) {
break;
}
if (ch == '\n') {
line++;
scanChar();
bufPos++;
break;
}
scanChar();
bufPos++;
}
stringVal = subString(mark, ch != EOI ? bufPos : bufPos + 1);
token = Token.LINE_COMMENT;
commentCount++;
if (keepComments) {
addComment(stringVal);
}
endOfComment = isEOF();
if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
return;
}
return;
}
}
public void scanIdentifier() {
final char first = ch;
if (first == '`') {
mark = pos;
bufPos = 1;
char ch;
for (; ; ) {
ch = charAt(++pos);
if (ch == '`') {
bufPos++;
ch = charAt(++pos);
break;
} else if (ch == EOI) {
throw new ParserException("illegal identifier");
}
bufPos++;
continue;
}
this.ch = charAt(pos);
stringVal = subString(mark, bufPos);
token = Token.IDENTIFIER;
return;
}
final boolean firstFlag = isFirstIdentifierChar(first);
if (!firstFlag) {
throw new ParserException("illegal identifier");
}
mark = pos;
bufPos = 1;
char ch;
for (; ; ) {
ch = charAt(++pos);
if (!isIdentifierChar(ch)) {
break;
}
bufPos++;
continue;
}
this.ch = charAt(pos);
if (ch == '@') { // for user identifier, like email, [email protected]
bufPos++;
for (; ; ) {
ch = charAt(++pos);
if (ch != '-' && ch != '.' && !isIdentifierChar(ch)) {
break;
}
bufPos++;
continue;
}
}
this.ch = charAt(pos);
stringVal = addSymbol();
Token tok = keywods.getKeyword(stringVal);
if (tok != null) {
token = tok;
} else {
token = Token.IDENTIFIER;
}
}
public void scanNumber() {
mark = pos;
if (ch == '-') {
bufPos++;
ch = charAt(++pos);
}
for (; ; ) {
if (ch >= '0' && ch <= '9') {
bufPos++;
} else {
break;
}
ch = charAt(++pos);
}
boolean isDouble = false;
if (ch == '.') {
if (charAt(pos + 1) == '.') {
token = Token.LITERAL_INT;
return;
}
bufPos++;
ch = charAt(++pos);
isDouble = true;
for (; ; ) {
if (ch >= '0' && ch <= '9') {
bufPos++;
} else {
break;
}
ch = charAt(++pos);
}
}
if (ch == 'e' || ch == 'E') {
bufPos++;
ch = charAt(++pos);
if (ch == '+' || ch == '-') {
bufPos++;
ch = charAt(++pos);
}
for (; ; ) {
if (ch >= '0' && ch <= '9') {
bufPos++;
} else {
break;
}
ch = charAt(++pos);
}
isDouble = true;
}
if (isDouble) {
token = Token.LITERAL_FLOAT;
} else {
if (isFirstIdentifierChar(ch) && !(ch == 'b' && bufPos == 1 && charAt(pos - 1) == '0')) {
bufPos++;
for (; ; ) {
ch = charAt(++pos);
if (!isIdentifierChar(ch)) {
break;
}
bufPos++;
continue;
}
stringVal = addSymbol();
token = Token.IDENTIFIER;
} else {
token = Token.LITERAL_INT;
}
}
}
public void scanVariable() {
if (ch == ':') {
token = Token.COLON;
ch = charAt(++pos);
return;
}
super.scanVariable();
}
protected final void scanString() {
scanString2();
}
protected final void scanAlias() {
scanAlias2();
}
}