All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.scalar.db.sql.util.Tokenizer Maven / Gradle / Ivy

The newest version!
package com.scalar.db.sql.util;

import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;

public final class Tokenizer {

  private Tokenizer() {}

  public static List tokenize(String sql) {
    return tokenize(sql, Integer.MAX_VALUE);
  }

  public static List tokenize(String sql, int limit) {
    CharBuffer buffer = CharBuffer.wrap(sql.trim());

    List tokens = new ArrayList<>();
    StringBuilder builder = new StringBuilder();
    boolean stringLiteral = false;
    boolean singleQuoteInStringLiteral = false;
    boolean objectName = false;
    boolean lineComment = false;
    boolean blockComment = false;

    boolean slash = false;
    boolean hyphen = false;
    boolean gt = false;
    boolean lt = false;

    while (buffer.hasRemaining() && tokens.size() < limit) {
      char c = buffer.get();

      if (stringLiteral) {
        if (!singleQuoteInStringLiteral) {
          if (c == '\'') {
            builder.append(c);
            singleQuoteInStringLiteral = true;
          } else {
            builder.append(c);
          }
          continue;
        } else {
          if (c == '\'') {
            // for single quote escape
            singleQuoteInStringLiteral = false;
            continue;
          } else {
            // end the string literal

            stringLiteral = false;
            singleQuoteInStringLiteral = false;

            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
        }
      }

      if (objectName) {
        if (c == '"') {
          // end the object name

          builder.append(c);
          objectName = false;

          // add a token
          tokens.add(builder.toString());
          builder = new StringBuilder();
        } else {
          builder.append(c);
        }
        continue;
      }

      if (lineComment) {
        // skip a line comment
        if (c == '\n') {
          lineComment = false;
        }
        continue;
      }

      if (blockComment) {
        // skip a block comment
        if (c == '*') {
          if (buffer.hasRemaining() && buffer.get() == '/') {
            blockComment = false;
          }
        }
        continue;
      }

      if (slash) {
        slash = false;
        if (c == '*') {
          blockComment = true;

          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          continue;
        } else if (c == '/') {
          lineComment = true;

          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          continue;
        } else {
          builder.append('/');
        }
      }

      if (hyphen) {
        hyphen = false;
        if (c == '-') {
          lineComment = true;

          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          continue;
        } else {
          builder.append('-');
        }
      }

      if (gt) {
        gt = false;
        if (c == '=') {
          tokens.add(">=");
          continue;
        } else {
          tokens.add(">");
        }
      }

      if (lt) {
        lt = false;
        if (c == '=') {
          tokens.add("<=");
          continue;
        } else {
          tokens.add("<");
        }
      }

      switch (c) {
        case '\'':
          builder.append(c);
          stringLiteral = true;
          break;
        case '"':
          builder.append(c);
          objectName = true;
          break;
        case '/':
          slash = true;
          break;
        case '-':
          hyphen = true;
          break;
        case '#':
          lineComment = true;

          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          break;
        case '\n':
        case '\r':
        case ' ':
          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          break;
        case ';':
        case '.':
        case ',':
        case '?':
        case '(':
        case ')':
        case '=':
          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          // add a token
          tokens.add(String.valueOf(c));
          break;
        case '>':
          gt = true;
          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          break;
        case '<':
          lt = true;
          if (builder.length() > 0) {
            // add a token
            tokens.add(builder.toString());
            builder = new StringBuilder();
          }
          break;
        default:
          builder.append(c);
          break;
      }
    }

    if (slash) {
      builder.append('/');
    }
    if (hyphen) {
      builder.append('-');
    }
    if (gt) {
      builder.append('>');
    }
    if (lt) {
      builder.append('<');
    }
    if (builder.length() > 0) {
      // add a token
      tokens.add(builder.toString());
    }

    return tokens;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy