All Downloads are FREE. Search and download functionalities are using the official Maven repository.

studio.raptor.sqlparser.dialect.odps.parser.OdpsLexer Maven / Gradle / Ivy

/*
 * Copyright 1999-2017 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package studio.raptor.sqlparser.dialect.odps.parser;

import static studio.raptor.sqlparser.parser.CharTypes.isFirstIdentifierChar;
import static studio.raptor.sqlparser.parser.CharTypes.isIdentifierChar;
import static studio.raptor.sqlparser.parser.LayoutCharacters.EOI;

import java.util.HashMap;
import java.util.Map;
import studio.raptor.sqlparser.parser.Keywords;
import studio.raptor.sqlparser.parser.Lexer;
import studio.raptor.sqlparser.parser.NotAllowCommentException;
import studio.raptor.sqlparser.parser.ParserException;
import studio.raptor.sqlparser.parser.Token;

public class OdpsLexer extends Lexer {

  public final static Keywords DEFAULT_ODPS_KEYWORDS;

  static {
    Map map = new HashMap();

    map.putAll(Keywords.DEFAULT_KEYWORDS.getKeywords());

    map.put("SHOW", Token.SHOW);
    map.put("PARTITION", Token.PARTITION);
    map.put("PARTITIONED", Token.PARTITIONED);
    map.put("OVERWRITE", Token.OVERWRITE);
    map.put("OVER", Token.OVER);
    map.put("LIMIT", Token.LIMIT);
    map.put("IF", Token.IF);
    map.put("DISTRIBUTE", Token.DISTRIBUTE);
    map.put("TRUE", Token.TRUE);
    map.put("FALSE", Token.FALSE);

    DEFAULT_ODPS_KEYWORDS = new Keywords(map);
  }

  public OdpsLexer(String input) {
    super(input);
    super.keywods = DEFAULT_ODPS_KEYWORDS;
  }

  public OdpsLexer(String input, boolean skipComment, boolean keepComments) {
    super(input, skipComment);
    this.skipComment = skipComment;
    this.keepComments = keepComments;
    super.keywods = DEFAULT_ODPS_KEYWORDS;
  }

  public OdpsLexer(String input, CommentHandler commentHandler) {
    super(input, commentHandler);
    super.keywods = DEFAULT_ODPS_KEYWORDS;
  }

  public void scanComment() {
    if (ch != '/' && ch != '-') {
      throw new IllegalStateException();
    }

    Token lastToken = this.token;

    mark = pos;
    bufPos = 0;
    scanChar();

    // /*+ */
    if (ch == '*') {
      scanChar();
      bufPos++;

      while (ch == ' ') {
        scanChar();
        bufPos++;
      }

      boolean isHint = false;
      int startHintSp = bufPos + 1;
      if (ch == '+') {
        isHint = true;
        scanChar();
        bufPos++;
      }

      for (; ; ) {
        if (ch == '*' && charAt(pos + 1) == '/') {
          bufPos += 2;
          scanChar();
          scanChar();
          break;
        }

        scanChar();
        bufPos++;
      }

      if (isHint) {
        stringVal = subString(mark + startHintSp, (bufPos - startHintSp) - 1);
        token = Token.HINT;
      } else {
        stringVal = subString(mark, bufPos + 1);
        token = Token.MULTI_LINE_COMMENT;
        commentCount++;
        if (keepComments) {
          addComment(stringVal);
        }
      }

      if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
        return;
      }

      if (token != Token.HINT && !isAllowComment()) {
        throw new NotAllowCommentException();
      }

      return;
    }

    if (!isAllowComment()) {
      throw new NotAllowCommentException();
    }

    if (ch == '/' || ch == '-') {
      scanChar();
      bufPos++;

      for (; ; ) {
        if (ch == '\r') {
          if (charAt(pos + 1) == '\n') {
            line++;
            bufPos += 2;
            scanChar();
            break;
          }
          bufPos++;
          break;
        } else if (ch == EOI) {
          break;
        }

        if (ch == '\n') {
          line++;
          scanChar();
          bufPos++;
          break;
        }

        scanChar();
        bufPos++;
      }

      stringVal = subString(mark, ch != EOI ? bufPos : bufPos + 1);
      token = Token.LINE_COMMENT;
      commentCount++;
      if (keepComments) {
        addComment(stringVal);
      }
      endOfComment = isEOF();

      if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
        return;
      }

      return;
    }
  }

  public void scanIdentifier() {
    final char first = ch;

    if (first == '`') {

      mark = pos;
      bufPos = 1;
      char ch;
      for (; ; ) {
        ch = charAt(++pos);

        if (ch == '`') {
          bufPos++;
          ch = charAt(++pos);
          break;
        } else if (ch == EOI) {
          throw new ParserException("illegal identifier");
        }

        bufPos++;
        continue;
      }

      this.ch = charAt(pos);

      stringVal = subString(mark, bufPos);
      token = Token.IDENTIFIER;

      return;
    }

    final boolean firstFlag = isFirstIdentifierChar(first);
    if (!firstFlag) {
      throw new ParserException("illegal identifier");
    }

    mark = pos;
    bufPos = 1;
    char ch;
    for (; ; ) {
      ch = charAt(++pos);

      if (!isIdentifierChar(ch)) {
        break;
      }

      bufPos++;
      continue;
    }

    this.ch = charAt(pos);

    if (ch == '@') { // for user identifier, like email, [email protected]
      bufPos++;
      for (; ; ) {
        ch = charAt(++pos);

        if (ch != '-' && ch != '.' && !isIdentifierChar(ch)) {
          break;
        }

        bufPos++;
        continue;
      }
    }
    this.ch = charAt(pos);

    stringVal = addSymbol();
    Token tok = keywods.getKeyword(stringVal);
    if (tok != null) {
      token = tok;
    } else {
      token = Token.IDENTIFIER;
    }
  }


  public void scanNumber() {
    mark = pos;

    if (ch == '-') {
      bufPos++;
      ch = charAt(++pos);
    }

    for (; ; ) {
      if (ch >= '0' && ch <= '9') {
        bufPos++;
      } else {
        break;
      }
      ch = charAt(++pos);
    }

    boolean isDouble = false;

    if (ch == '.') {
      if (charAt(pos + 1) == '.') {
        token = Token.LITERAL_INT;
        return;
      }
      bufPos++;
      ch = charAt(++pos);
      isDouble = true;

      for (; ; ) {
        if (ch >= '0' && ch <= '9') {
          bufPos++;
        } else {
          break;
        }
        ch = charAt(++pos);
      }
    }

    if (ch == 'e' || ch == 'E') {
      bufPos++;
      ch = charAt(++pos);

      if (ch == '+' || ch == '-') {
        bufPos++;
        ch = charAt(++pos);
      }

      for (; ; ) {
        if (ch >= '0' && ch <= '9') {
          bufPos++;
        } else {
          break;
        }
        ch = charAt(++pos);
      }

      isDouble = true;
    }

    if (isDouble) {
      token = Token.LITERAL_FLOAT;
    } else {
      if (isFirstIdentifierChar(ch) && !(ch == 'b' && bufPos == 1 && charAt(pos - 1) == '0')) {
        bufPos++;
        for (; ; ) {
          ch = charAt(++pos);

          if (!isIdentifierChar(ch)) {
            break;
          }

          bufPos++;
          continue;
        }

        stringVal = addSymbol();
        token = Token.IDENTIFIER;
      } else {
        token = Token.LITERAL_INT;
      }
    }
  }

  public void scanVariable() {
    if (ch == ':') {
      token = Token.COLON;
      ch = charAt(++pos);
      return;
    }

    super.scanVariable();
  }

  protected final void scanString() {
    scanString2();
  }

  protected final void scanAlias() {
    scanAlias2();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy