All Downloads are FREE. Search and download functionalities are using the official Maven repository.

studio.raptor.sqlparser.parser.Lexer Maven / Gradle / Ivy

There is a newer version: 3.0.3
Show newest version
/*
 * Copyright 1999-2017 Alibaba Group Holding Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package studio.raptor.sqlparser.parser;

import static studio.raptor.sqlparser.parser.CharTypes.isFirstIdentifierChar;
import static studio.raptor.sqlparser.parser.CharTypes.isIdentifierChar;
import static studio.raptor.sqlparser.parser.CharTypes.isWhitespace;
import static studio.raptor.sqlparser.parser.LayoutCharacters.EOI;
import static studio.raptor.sqlparser.parser.Token.COLONCOLON;
import static studio.raptor.sqlparser.parser.Token.COLONEQ;
import static studio.raptor.sqlparser.parser.Token.COMMA;
import static studio.raptor.sqlparser.parser.Token.EOF;
import static studio.raptor.sqlparser.parser.Token.ERROR;
import static studio.raptor.sqlparser.parser.Token.LBRACE;
import static studio.raptor.sqlparser.parser.Token.LBRACKET;
import static studio.raptor.sqlparser.parser.Token.LITERAL_ALIAS;
import static studio.raptor.sqlparser.parser.Token.LITERAL_CHARS;
import static studio.raptor.sqlparser.parser.Token.LPAREN;
import static studio.raptor.sqlparser.parser.Token.RBRACE;
import static studio.raptor.sqlparser.parser.Token.RBRACKET;
import static studio.raptor.sqlparser.parser.Token.RPAREN;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import studio.raptor.sqlparser.util.JdbcConstants;
import studio.raptor.sqlparser.util.StringUtils;

/**
 */
public class Lexer {

  private static final long MULTMIN_RADIX_TEN = Long.MIN_VALUE / 10;
  private static final long N_MULTMAX_RADIX_TEN = -Long.MAX_VALUE / 10;
  private final static int[] digits = new int[(int) '9' + 1];

  static {
    for (int i = '0'; i <= '9'; ++i) {
      digits[i] = i - '0';
    }
  }

  protected final String text;
  protected int pos;
  protected int mark;
  protected char ch;
  protected char[] buf;
  protected int bufPos;
  protected Token token;
  protected Keywords keywods = Keywords.DEFAULT_KEYWORDS;
  protected String stringVal;
  protected int commentCount = 0;
  protected List comments = new ArrayList(2);
  protected boolean skipComment = true;
  protected CommentHandler commentHandler;
  protected boolean endOfComment = false;
  protected boolean keepComments = false;
  protected int line = 0;
  protected int lines = 0;
  protected String dbType;
  private SavePoint savePoint = null;
  /*
   * anti sql injection
   */
  private boolean allowComment = true;
  private int varIndex = -1;

  public Lexer(String input) {
    this(input, null);
  }

  public Lexer(String input, CommentHandler commentHandler) {
    this(input, true);
    this.commentHandler = commentHandler;
  }

  public Lexer(String input, CommentHandler commentHandler, String dbType) {
    this(input, true);
    this.commentHandler = commentHandler;
    this.dbType = dbType;

    if (JdbcConstants.SQLITE.equals(dbType)) {
      this.keywods = Keywords.SQLITE_KEYWORDS;
    }
  }

  public Lexer(String input, boolean skipComment) {
    this.skipComment = skipComment;

    this.text = input;
    this.pos = -1;

    scanChar();
  }

  public Lexer(char[] input, int inputLength, boolean skipComment) {
    this(new String(input, 0, inputLength), skipComment);
  }

  public boolean isKeepComments() {
    return keepComments;
  }

  public void setKeepComments(boolean keepComments) {
    this.keepComments = keepComments;
  }

  public CommentHandler getCommentHandler() {
    return commentHandler;
  }

  public void setCommentHandler(CommentHandler commentHandler) {
    this.commentHandler = commentHandler;
  }

  public final char charAt(int index) {
    if (index >= text.length()) {
      return EOI;
    }

    return text.charAt(index);
  }

  public final String addSymbol() {
    return subString(mark, bufPos);
  }

  public final String subString(int offset, int count) {
    return text.substring(offset, offset + count);
  }

  protected void initBuff(int size) {
    if (buf == null) {
      if (size < 32) {
        buf = new char[32];
      } else {
        buf = new char[size + 32];
      }
    } else if (buf.length < size) {
      buf = Arrays.copyOf(buf, size);
    }
  }

  public void arraycopy(int srcPos, char[] dest, int destPos, int length) {
    text.getChars(srcPos, srcPos + length, dest, destPos);
  }

  public boolean isAllowComment() {
    return allowComment;
  }

  public void setAllowComment(boolean allowComment) {
    this.allowComment = allowComment;
  }

  public int nextVarIndex() {
    return ++varIndex;
  }

  public Keywords getKeywods() {
    return keywods;
  }

  public void mark() {
    SavePoint savePoint = new SavePoint();
    savePoint.bp = pos;
    savePoint.sp = bufPos;
    savePoint.np = mark;
    savePoint.ch = ch;
    savePoint.token = token;
    this.savePoint = savePoint;
  }

  public void reset() {
    this.pos = savePoint.bp;
    this.bufPos = savePoint.sp;
    this.mark = savePoint.np;
    this.ch = savePoint.ch;
    this.token = savePoint.token;
  }

  protected final void scanChar() {
    ch = charAt(++pos);
  }

  protected void unscan() {
    ch = charAt(--pos);
  }

  public boolean isEOF() {
    return pos >= text.length();
  }

  /**
   * Report an error at the given position using the provided arguments.
   */
  protected void lexError(String key, Object... args) {
    token = ERROR;
  }

  /**
   * Return the current token, set by nextToken().
   */
  public final Token token() {
    return token;
  }

  public final String getDbType() {
    return this.dbType;
  }

  public String info() {
    return this.token + " " + this.stringVal();
  }

  public final void nextTokenComma() {
    if (ch == ' ') {
      scanChar();
    }

    if (ch == ',' || ch == ',') {
      scanChar();
      token = COMMA;
      return;
    }

    if (ch == ')') {
      scanChar();
      token = RPAREN;
      return;
    }

    nextToken();
  }

  public final void nextTokenLParen() {
    if (ch == ' ') {
      scanChar();
    }

    if (ch == '(') {
      scanChar();
      token = LPAREN;
      return;
    }
    nextToken();
  }

  public final void nextTokenValue() {
    if (ch == ' ') {
      scanChar();
    }

    if (ch == '\'') {
      bufPos = 0;
      scanString();
      return;
    }

    if (ch >= '0' && ch <= '9') {
      bufPos = 0;
      scanNumber();
      return;
    }

    if (ch == '?') {
      scanChar();
      token = Token.QUES;
      return;
    }

    if (isFirstIdentifierChar(ch) && ch != 'N') {
      scanIdentifier();
      return;
    }

    nextToken();
  }

  public final void nextToken() {
    bufPos = 0;
    if (comments != null && comments.size() > 0) {
      comments = null;
    }

    this.lines = 0;
    int startLine = line;

    for (; ; ) {
      if (isWhitespace(ch)) {
        if (ch == '\n') {
          line++;

          lines = line - startLine;
        }

        scanChar();
        continue;
      }

      if (ch == '$' && charAt(pos + 1) == '{') {
        scanVariable();
        return;
      }

      if (isFirstIdentifierChar(ch)) {
        if (ch == 'N') {
          if (charAt(pos + 1) == '\'') {
            ++pos;
            ch = '\'';
            scanString();
            token = Token.LITERAL_NCHARS;
            return;
          }
        }

        scanIdentifier();
        return;
      }

      switch (ch) {
        case '0':
          if (charAt(pos + 1) == 'x') {
            scanChar();
            scanChar();
            scanHexaDecimal();
          } else {
            scanNumber();
          }
          return;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          scanNumber();
          return;
        case ',':
        case ',':
          scanChar();
          token = COMMA;
          return;
        case '(':
          scanChar();
          token = LPAREN;
          return;
        case ')':
          scanChar();
          token = RPAREN;
          return;
        case '[':
          scanLBracket();
          return;
        case ']':
          scanChar();
          token = RBRACKET;
          return;
        case '{':
          scanChar();
          token = LBRACE;
          return;
        case '}':
          scanChar();
          token = RBRACE;
          return;
        case ':':
          scanChar();
          if (ch == '=') {
            scanChar();
            token = COLONEQ;
          } else if (ch == ':') {
            scanChar();
            token = COLONCOLON;
          } else {
            unscan();
            scanVariable();
          }
          return;
        case '#':
          scanSharp();
          if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT)
              && skipComment) {
            bufPos = 0;
            continue;
          }
          return;
        case '.':
          scanChar();
          if (isDigit(ch) && !isFirstIdentifierChar(charAt(pos - 2))) {
            unscan();
            scanNumber();
            return;
          } else if (ch == '.') {
            scanChar();
            if (ch == '.') {
              scanChar();
              token = Token.DOTDOTDOT;
            } else {
              token = Token.DOTDOT;
            }
          } else {
            token = Token.DOT;
          }
          return;
        case '\'':
          scanString();
          return;
        case '\"':
          scanAlias();
          return;
        case '*':
          scanChar();
          token = Token.STAR;
          return;
        case '?':
          scanChar();
          if (ch == '?' && JdbcConstants.POSTGRESQL.equals(dbType)) {
            scanChar();
            token = Token.QUESQUES;
          } else if (ch == '|' && JdbcConstants.POSTGRESQL.equals(dbType)) {
            scanChar();
            token = Token.QUESBAR;
          } else if (ch == '&' && JdbcConstants.POSTGRESQL.equals(dbType)) {
            scanChar();
            token = Token.QUESAMP;
          } else {
            token = Token.QUES;
          }
          return;
        case ';':
          scanChar();
          token = Token.SEMI;
          return;
        case '`':
          throw new ParserException("TODO"); // TODO
        case '@':
          scanVariable();
          return;
        case '-':
          if (charAt(pos + 1) == '-') {
            scanComment();
            if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT)
                && skipComment) {
              bufPos = 0;
              continue;
            }
          } else {
            scanOperator();
          }
          return;
        case '/':
          int nextChar = charAt(pos + 1);
          if (nextChar == '/' || nextChar == '*') {
            scanComment();
            if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT)
                && skipComment) {
              bufPos = 0;
              continue;
            }
          } else {
            token = Token.SLASH;
            scanChar();
          }
          return;
        default:
          if (Character.isLetter(ch)) {
            scanIdentifier();
            return;
          }

          if (isOperator(ch)) {
            scanOperator();
            return;
          }

          // QS_TODO ?
          if (isEOF()) { // JLS
            token = EOF;
          } else {
            lexError("illegal.char", String.valueOf((int) ch));
            scanChar();
          }

          return;
      }
    }

  }

  protected void scanLBracket() {
    scanChar();
    token = LBRACKET;
  }

  private final void scanOperator() {
    switch (ch) {
      case '+':
        scanChar();
        token = Token.PLUS;
        break;
      case '-':
        scanChar();
        if (ch == '>') {
          scanChar();
          if (ch == '>') {
            scanChar();
            token = Token.SUBGTGT;
          } else {
            token = Token.SUBGT;
          }
        } else {
          token = Token.SUB;
        }
        break;
      case '*':
        scanChar();
        token = Token.STAR;
        break;
      case '/':
        scanChar();
        token = Token.SLASH;
        break;
      case '&':
        scanChar();
        if (ch == '&') {
          scanChar();
          token = Token.AMPAMP;
        } else {
          token = Token.AMP;
        }
        break;
      case '|':
        scanChar();
        if (ch == '|') {
          scanChar();
          if (ch == '/') {
            scanChar();
            token = Token.BARBARSLASH;
          } else {
            token = Token.BARBAR;
          }
        } else if (ch == '/') {
          scanChar();
          token = Token.BARSLASH;
        } else {
          token = Token.BAR;
        }
        break;
      case '^':
        scanChar();
        token = Token.CARET;
        break;
      case '%':
        scanChar();
        token = Token.PERCENT;
        break;
      case '=':
        scanChar();
        if (ch == '=') {
          scanChar();
          token = Token.EQEQ;
        } else {
          token = Token.EQ;
        }
        break;
      case '>':
        scanChar();
        if (ch == '=') {
          scanChar();
          token = Token.GTEQ;
        } else if (ch == '>') {
          scanChar();
          token = Token.GTGT;
        } else {
          token = Token.GT;
        }
        break;
      case '<':
        scanChar();
        if (ch == '=') {
          scanChar();
          if (ch == '>') {
            token = Token.LTEQGT;
            scanChar();
          } else {
            token = Token.LTEQ;
          }
        } else if (ch == '>') {
          scanChar();
          token = Token.LTGT;
        } else if (ch == '<') {
          scanChar();
          token = Token.LTLT;
        } else if (ch == '@') {
          scanChar();
          token = Token.LT_MONKEYS_AT;
        } else {
          token = Token.LT;
        }
        break;
      case '!':
        scanChar();
        if (ch == '=') {
          scanChar();
          token = Token.BANGEQ;
        } else if (ch == '>') {
          scanChar();
          token = Token.BANGGT;
        } else if (ch == '<') {
          scanChar();
          token = Token.BANGLT;
        } else if (ch == '!') {
          scanChar();
          token = Token.BANGBANG; // postsql
        } else if (ch == '~') {
          scanChar();
          if (ch == '*') {
            scanChar();
            token = Token.BANG_TILDE_STAR; // postsql
          } else {
            token = Token.BANG_TILDE; // postsql
          }
        } else {
          token = Token.BANG;
        }
        break;
      case '?':
        scanChar();
        token = Token.QUES;
        break;
      case '~':
        scanChar();
        if (ch == '*') {
          scanChar();
          token = Token.TILDE_STAR;
        } else if (ch == '=') {
          scanChar();
          token = Token.TILDE_EQ; // postsql
        } else {
          token = Token.TILDE;
        }
        break;
      default:
        throw new ParserException("TODO");
    }
  }

  protected void scanString() {
    mark = pos;
    boolean hasSpecial = false;

    for (; ; ) {
      if (isEOF()) {
        lexError("unclosed.str.lit");
        return;
      }

      ch = charAt(++pos);

      if (ch == '\'') {
        scanChar();
        if (ch != '\'') {
          token = LITERAL_CHARS;
          break;
        } else {
          if (!hasSpecial) {
            initBuff(bufPos);
            arraycopy(mark + 1, buf, 0, bufPos);
            hasSpecial = true;
          }
          putChar('\'');
          continue;
        }
      }

      if (!hasSpecial) {
        bufPos++;
        continue;
      }

      if (bufPos == buf.length) {
        putChar(ch);
      } else {
        buf[bufPos++] = ch;
      }
    }

    if (!hasSpecial) {
      stringVal = subString(mark + 1, bufPos);
    } else {
      stringVal = new String(buf, 0, bufPos);
    }
  }

  protected final void scanString2() {
    {
      boolean hasSpecial = false;
      int startIndex = pos + 1;
      int endIndex = -1; // text.indexOf('\'', startIndex);
      for (int i = startIndex; i < text.length(); ++i) {
        final char ch = text.charAt(i);
        if (ch == '\\') {
          hasSpecial = true;
          continue;
        }
        if (ch == '\'') {
          endIndex = i;
          break;
        }
      }

      if (endIndex == -1) {
        throw new ParserException("unclosed str");
      }

      String stringVal = subString(startIndex, endIndex - startIndex);
      // hasSpecial = stringVal.indexOf('\\') != -1;

      if (!hasSpecial) {
        this.stringVal = stringVal;
        int pos = endIndex + 1;
        char ch = charAt(pos);
        if (ch != '\'') {
          this.pos = pos;
          this.ch = ch;
          token = LITERAL_CHARS;
          return;
        }
      }
    }

    mark = pos;
    boolean hasSpecial = false;
    for (; ; ) {
      if (isEOF()) {
        lexError("unclosed.str.lit");
        return;
      }

      ch = charAt(++pos);

      if (ch == '\\') {
        scanChar();
        if (!hasSpecial) {
          initBuff(bufPos);
          arraycopy(mark + 1, buf, 0, bufPos);
          hasSpecial = true;
        }

        switch (ch) {
          case '0':
            putChar('\0');
            break;
          case '\'':
            putChar('\'');
            break;
          case '"':
            putChar('"');
            break;
          case 'b':
            putChar('\b');
            break;
          case 'n':
            putChar('\n');
            break;
          case 'r':
            putChar('\r');
            break;
          case 't':
            putChar('\t');
            break;
          case '\\':
            putChar('\\');
            break;
          case 'Z':
            putChar((char) 0x1A); // ctrl + Z
            break;
          default:
            putChar(ch);
            break;
        }

        continue;
      }
      if (ch == '\'') {
        scanChar();
        if (ch != '\'') {
          token = LITERAL_CHARS;
          break;
        } else {
          if (!hasSpecial) {
            initBuff(bufPos);
            arraycopy(mark + 1, buf, 0, bufPos);
            hasSpecial = true;
          }
          putChar('\'');
          continue;
        }
      }

      if (!hasSpecial) {
        bufPos++;
        continue;
      }

      if (bufPos == buf.length) {
        putChar(ch);
      } else {
        buf[bufPos++] = ch;
      }
    }

    if (!hasSpecial) {
      stringVal = subString(mark + 1, bufPos);
    } else {
      stringVal = new String(buf, 0, bufPos);
    }
  }

  protected void scanAlias() {
    mark = pos;

    if (buf == null) {
      buf = new char[32];
    }

    boolean hasSpecial = false;
    for (; ; ) {
      if (isEOF()) {
        lexError("unclosed.str.lit");
        return;
      }

      ch = charAt(++pos);

      if (ch == '\"' && charAt(pos - 1) != '\\') {
        scanChar();
        token = LITERAL_ALIAS;
        break;
      }

      if (ch == '\\') {
        scanChar();
        if (ch == '"') {
          hasSpecial = true;
        } else {
          unscan();
        }
      }

      if (bufPos == buf.length) {
        putChar(ch);
      } else {
        buf[bufPos++] = ch;
      }
    }

    if (!hasSpecial) {
      stringVal = subString(mark + 1, bufPos);
    } else {
      stringVal = new String(buf, 0, bufPos);
    }

    //stringVal = subString(mark + 1, bufPos);
  }

  protected final void scanAlias2() {
    {
      boolean hasSpecial = false;
      int startIndex = pos + 1;
      int endIndex = -1; // text.indexOf('\'', startIndex);
      for (int i = startIndex; i < text.length(); ++i) {
        final char ch = text.charAt(i);
        if (ch == '\\') {
          hasSpecial = true;
          continue;
        }
        if (ch == '"') {
          endIndex = i;
          break;
        }
      }

      if (endIndex == -1) {
        throw new ParserException("unclosed str");
      }

      String stringVal = subString(startIndex, endIndex - startIndex);
      // hasSpecial = stringVal.indexOf('\\') != -1;

      if (!hasSpecial) {
        this.stringVal = stringVal;
        int pos = endIndex + 1;
        char ch = charAt(pos);
        if (ch != '\'') {
          this.pos = pos;
          this.ch = ch;
          token = LITERAL_CHARS;
          return;
        }
      }
    }

    mark = pos;
    boolean hasSpecial = false;
    for (; ; ) {
      if (isEOF()) {
        lexError("unclosed.str.lit");
        return;
      }

      ch = charAt(++pos);

      if (ch == '\\') {
        scanChar();
        if (!hasSpecial) {
          initBuff(bufPos);
          arraycopy(mark + 1, buf, 0, bufPos);
          hasSpecial = true;
        }

        switch (ch) {
          case '0':
            putChar('\0');
            break;
          case '\'':
            putChar('\'');
            break;
          case '"':
            putChar('"');
            break;
          case 'b':
            putChar('\b');
            break;
          case 'n':
            putChar('\n');
            break;
          case 'r':
            putChar('\r');
            break;
          case 't':
            putChar('\t');
            break;
          case '\\':
            putChar('\\');
            break;
          case 'Z':
            putChar((char) 0x1A); // ctrl + Z
            break;
          default:
            putChar(ch);
            break;
        }

        continue;
      }
      if (ch == '\"') {
        scanChar();
        token = LITERAL_CHARS;
        break;
      }

      if (!hasSpecial) {
        bufPos++;
        continue;
      }

      if (bufPos == buf.length) {
        putChar(ch);
      } else {
        buf[bufPos++] = ch;
      }
    }

    if (!hasSpecial) {
      stringVal = subString(mark + 1, bufPos);
    } else {
      stringVal = new String(buf, 0, bufPos);
    }
  }

  public void scanSharp() {
    scanVariable();
  }

  public void scanVariable() {
    if (ch != '@' && ch != ':' && ch != '#' && ch != '$') {
      throw new ParserException("illegal variable");
    }

    mark = pos;
    bufPos = 1;
    char ch;

    final char c1 = charAt(pos + 1);
    if (c1 == '@') {
      if (JdbcConstants.POSTGRESQL.equalsIgnoreCase(dbType)) {
        pos += 2;
        token = Token.MONKEYS_AT_AT;
        this.ch = charAt(++pos);
        return;
      }
      ch = charAt(++pos);

      bufPos++;
    } else if (c1 == '>' && JdbcConstants.POSTGRESQL.equalsIgnoreCase(dbType)) {
      pos += 2;
      token = Token.MONKEYS_AT_GT;
      this.ch = charAt(++pos);
      return;
    } else if (c1 == '{') {
      pos++;
      bufPos++;

      for (; ; ) {
        ch = charAt(++pos);

        if (ch == '}') {
          break;
        }

        bufPos++;
        continue;
      }

      if (ch != '}') {
        throw new ParserException("syntax error");
      }
      ++pos;
      bufPos++;

      this.ch = charAt(pos);

      stringVal = addSymbol();
      token = Token.VARIANT;
      return;
    }

    for (; ; ) {
      ch = charAt(++pos);

      if (!isIdentifierChar(ch)) {
        break;
      }

      bufPos++;
      continue;
    }

    this.ch = charAt(pos);

    stringVal = addSymbol();
    token = Token.VARIANT;
  }

  public void scanComment() {
    if (!allowComment) {
      throw new NotAllowCommentException();
    }

    if ((ch == '/' && charAt(pos + 1) == '/')
        || (ch == '-' && charAt(pos + 1) == '-')) {
      scanSingleLineComment();
    } else if (ch == '/' && charAt(pos + 1) == '*') {
      scanMultiLineComment();
    } else {
      throw new IllegalStateException();
    }
  }

  private void scanMultiLineComment() {
    Token lastToken = this.token;

    scanChar();
    scanChar();
    mark = pos;
    bufPos = 0;

    for (; ; ) {
      if (ch == '*' && charAt(pos + 1) == '/') {
        scanChar();
        scanChar();
        break;
      }

      // multiline comment结束符错误
      if (ch == EOI) {
        throw new ParserException("unterminated /* comment.");
      }
      scanChar();
      bufPos++;
    }

    stringVal = subString(mark, bufPos);
    token = Token.MULTI_LINE_COMMENT;
    commentCount++;
    if (keepComments) {
      addComment(stringVal);
    }

    if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
      return;
    }

    if (!isAllowComment() && !isSafeComment(stringVal)) {
      throw new NotAllowCommentException();
    }
  }

  private void scanSingleLineComment() {
    Token lastToken = this.token;

    scanChar();
    scanChar();
    mark = pos;
    bufPos = 0;

    for (; ; ) {
      if (ch == '\r') {
        if (charAt(pos + 1) == '\n') {
          line++;
          scanChar();
          break;
        }
        bufPos++;
        break;
      }

      if (ch == '\n') {
        line++;
        scanChar();
        break;
      }

      // single line comment结束符错误
      if (ch == EOI) {
        throw new ParserException("syntax error at end of input.");
      }

      scanChar();
      bufPos++;
    }

    stringVal = subString(mark, bufPos);
    token = Token.LINE_COMMENT;
    commentCount++;
    if (keepComments) {
      addComment(stringVal);
    }

    if (commentHandler != null && commentHandler.handle(lastToken, stringVal)) {
      return;
    }

    if (!isAllowComment() && !isSafeComment(stringVal)) {
      throw new NotAllowCommentException();
    }
  }

  public void scanIdentifier() {
    final char first = ch;

    final boolean firstFlag = isFirstIdentifierChar(first);
    if (!firstFlag) {
      throw new ParserException("illegal identifier");
    }

    mark = pos;
    bufPos = 1;
    char ch;
    for (; ; ) {
      ch = charAt(++pos);

      if (!isIdentifierChar(ch)) {
        break;
      }

      bufPos++;
      continue;
    }

    this.ch = charAt(pos);

    stringVal = addSymbol();
    Token tok = keywods.getKeyword(stringVal);
    if (tok != null) {
      token = tok;
    } else {
      token = Token.IDENTIFIER;
    }
  }

  public void scanNumber() {
    mark = pos;

    if (ch == '-') {
      bufPos++;
      ch = charAt(++pos);
    }

    for (; ; ) {
      if (ch >= '0' && ch <= '9') {
        bufPos++;
      } else {
        break;
      }
      ch = charAt(++pos);
    }

    boolean isDouble = false;

    if (ch == '.') {
      if (charAt(pos + 1) == '.') {
        token = Token.LITERAL_INT;
        return;
      }
      bufPos++;
      ch = charAt(++pos);
      isDouble = true;

      for (; ; ) {
        if (ch >= '0' && ch <= '9') {
          bufPos++;
        } else {
          break;
        }
        ch = charAt(++pos);
      }
    }

    if (ch == 'e' || ch == 'E') {
      bufPos++;
      ch = charAt(++pos);

      if (ch == '+' || ch == '-') {
        bufPos++;
        ch = charAt(++pos);
      }

      for (; ; ) {
        if (ch >= '0' && ch <= '9') {
          bufPos++;
        } else {
          break;
        }
        ch = charAt(++pos);
      }

      isDouble = true;
    }

    if (isDouble) {
      token = Token.LITERAL_FLOAT;
    } else {
      token = Token.LITERAL_INT;
    }
  }

  public void scanHexaDecimal() {
    mark = pos;

    if (ch == '-') {
      bufPos++;
      ch = charAt(++pos);
    }

    for (; ; ) {
      if (CharTypes.isHex(ch)) {
        bufPos++;
      } else {
        break;
      }
      ch = charAt(++pos);
    }

    token = Token.LITERAL_HEX;
  }

  public String hexString() {
    return subString(mark, bufPos);
  }

  public final boolean isDigit(char ch) {
    return ch >= '0' && ch <= '9';
  }

  /**
   * Append a character to sbuf.
   */
  protected final void putChar(char ch) {
    if (bufPos == buf.length) {
      char[] newsbuf = new char[buf.length * 2];
      System.arraycopy(buf, 0, newsbuf, 0, buf.length);
      buf = newsbuf;
    }
    buf[bufPos++] = ch;
  }

  /**
   * Return the current token's position: a 0-based offset from beginning of the raw input stream
   * (before unicode translation)
   */
  public final int pos() {
    return pos;
  }

  /**
   * The value of a literal token, recorded as a string. For integers, leading 0x and 'l' suffixes
   * are suppressed.
   */
  public final String stringVal() {
    return stringVal;
  }

  public final List readAndResetComments() {
    List comments = this.comments;

    this.comments = null;

    return comments;
  }

  private boolean isOperator(char ch) {
    switch (ch) {
      case '!':
      case '%':
      case '&':
      case '*':
      case '+':
      case '-':
      case '<':
      case '=':
      case '>':
      case '^':
      case '|':
      case '~':
      case ';':
        return true;
      default:
        return false;
    }
  }

  // QS_TODO negative number is invisible for lexer
  public Number integerValue() {
    long result = 0;
    boolean negative = false;
    int i = mark, max = mark + bufPos;
    long limit;
    long multmin;
    int digit;

    if (charAt(mark) == '-') {
      negative = true;
      limit = Long.MIN_VALUE;
      i++;
    } else {
      limit = -Long.MAX_VALUE;
    }
    multmin = negative ? MULTMIN_RADIX_TEN : N_MULTMAX_RADIX_TEN;
    if (i < max) {
      digit = digits[charAt(i++)];
      result = -digit;
    }
    while (i < max) {
      // Accumulating negatively avoids surprises near MAX_VALUE
      digit = digits[charAt(i++)];
      if (result < multmin) {
        return new BigInteger(numberString());
      }
      result *= 10;
      if (result < limit + digit) {
        return new BigInteger(numberString());
      }
      result -= digit;
    }

    if (negative) {
      if (i > mark + 1) {
        if (result >= Integer.MIN_VALUE) {
          return (int) result;
        }
        return result;
      } else { /* Only got "-" */
        throw new NumberFormatException(numberString());
      }
    } else {
      result = -result;
      if (result <= Integer.MAX_VALUE) {
        return (int) result;
      }
      return result;
    }
  }

  public int bp() {
    return this.pos;
  }

  public char current() {
    return this.ch;
  }

  public void reset(int mark, char markChar, Token token) {
    this.pos = mark;
    this.ch = markChar;
    this.token = token;
  }

  public final String numberString() {
    return subString(mark, bufPos);
  }

  public BigDecimal decimalValue() {
    String value = subString(mark, bufPos);
    if (!StringUtils.isNumber(value)) {
      throw new ParserException(value + " is not a number!");
    }
    return new BigDecimal(value.toCharArray());
  }

  public boolean hasComment() {
    return comments != null;
  }

  public int getCommentCount() {
    return commentCount;
  }

  public void skipToEOF() {
    pos = text.length();
    this.token = Token.EOF;
  }

  public boolean isEndOfComment() {
    return endOfComment;
  }

  protected boolean isSafeComment(String comment) {
    if (comment == null) {
      return true;
    }
    comment = comment.toLowerCase();
    if (comment.indexOf("select") != -1 //
        || comment.indexOf("delete") != -1 //
        || comment.indexOf("insert") != -1 //
        || comment.indexOf("update") != -1 //
        || comment.indexOf("into") != -1 //
        || comment.indexOf("where") != -1 //
        || comment.indexOf("or") != -1 //
        || comment.indexOf("and") != -1 //
        || comment.indexOf("union") != -1 //
        || comment.indexOf('\'') != -1 //
        || comment.indexOf('=') != -1 //
        || comment.indexOf('>') != -1 //
        || comment.indexOf('<') != -1 //
        || comment.indexOf('&') != -1 //
        || comment.indexOf('|') != -1 //
        || comment.indexOf('^') != -1 //
        ) {
      return false;
    }
    return true;
  }

  protected void addComment(String comment) {
    if (comments == null) {
      comments = new ArrayList(2);
    }
    comments.add(stringVal);
  }

  public int getLine() {
    return line;
  }

  public static interface CommentHandler {

    boolean handle(Token lastToken, String comment);
  }

  private static class SavePoint {

    int bp;
    int sp;
    int np;
    char ch;
    Token token;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy