All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.paradox.parser.Scanner Maven / Gradle / Ivy

/*
 * Copyright (c) 2009 Leonardo Alves da Costa
 *
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
 * later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
 * License for more details. You should have received a copy of the GNU General Public License along with this
 * program. If not, see .
 */
package com.googlecode.paradox.parser;

import com.googlecode.paradox.exceptions.ParadoxSyntaxErrorException;
import com.googlecode.paradox.exceptions.SyntaxError;

import java.nio.CharBuffer;
import java.sql.SQLException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Queue;

/**
 * SQL Scanner (read tokens from SQL String).
 *
 * @version 1.5
 * @since 1.0
 */
public class Scanner {

    /**
     * Separators char.
     */
    private static final char[] SEPARATORS = {' ', '\b', '\t', '\n', '\0', '\r'};

    /**
     * Special chars.
     */
    private static final char[] SPECIAL = {'(', ')', '+', '-', ',', '.', '=', ';', '*'};

    /**
     * Character buffer used to parse the SQL.
     */
    private final CharBuffer buffer;

    /**
     * Preloaded chars.
     */
    private final Queue preloaded = new ArrayDeque<>();

    /**
     * Read tokens.
     */
    private final ArrayList tokens = new ArrayList<>();

    /**
     * Value buffer.
     */
    private final StringBuilder value = new StringBuilder(299);

    /**
     * The SQL current parser position.
     */
    private final ScannerPosition position = new ScannerPosition();

    /**
     * Start position.
     */
    private ScannerPosition startPosition;

    /**
     * Creates a new instance.
     *
     * @param buffer the buffer to read of.
     * @throws SQLException in case of parse errors.
     */
    Scanner(final String buffer) throws SQLException {
        if (buffer == null || buffer.trim().isEmpty()) {
            throw new ParadoxSyntaxErrorException(SyntaxError.EMPTY_SQL);
        }

        this.buffer = CharBuffer.wrap(buffer.trim());
    }

    /**
     * Checks for maximum number dots allowed.
     *
     * @param dotCount the dot count.
     * @throws SQLException in case of invalid dot count.
     */
    private static void checkDotCount(final int dotCount) throws SQLException {
        if (dotCount > 1) {
            throw new ParadoxSyntaxErrorException(SyntaxError.NUMBER_FORMAT);
        }
    }

    /**
     * Check if is a character or a string.
     *
     * @param c the char to verify.
     * @return true if c is a char.
     */
    private static boolean isCharacters(final char c) {
        boolean characters = false;
        if (c == '\'') {
            // characters
            characters = true;
        }

        return characters;
    }

    /**
     * If the char is a separator.
     *
     * @param value the char to identify.
     * @return true if the char is a separator.
     */
    private static boolean isSeparator(final char value) {
        for (final char c : Scanner.SEPARATORS) {
            if (c == value) {
                return true;
            }
        }

        return false;
    }

    /**
     * if the value is a special char.
     *
     * @param value the value to identify.
     * @return true if the value is a special char.
     */
    private static boolean isSpecial(final char value) {
        for (final char c : Scanner.SPECIAL) {
            if (c == value) {
                return true;
            }
        }

        return false;
    }

    /**
     * Creates a token by value.
     *
     * @param value to convert.
     * @return a new {@link Token}.
     */
    private Token getToken(final String value) {
        if (value.isEmpty()) {
            return null;
        }

        final TokenType token = TokenType.get(value);
        if (token != null) {
            return new Token(token, value, startPosition);
        }

        return new Token(TokenType.IDENTIFIER, value, startPosition);
    }

    /**
     * Gets the next value in buffer.
     *
     * @return the next char.
     */
    private char nextChar() {
        if (!preloaded.isEmpty()) {
            return preloaded.poll();
        }

        char c = this.buffer.get();
        position.setTrackingPosition(c);

        // Handles escape characters.
        if (c == '\\') {
            final char next = this.buffer.get();
            switch (next) {
                case 'n':
                    c = '\n';
                    break;
                case 'b':
                    c = '\b';
                    break;
                case 'r':
                    c = '\r';
                    break;
                case 't':
                    c = '\t';
                    break;
                case '\\':
                    // Keep the \ char.
                    break;
                default:
                    buffer.position(buffer.position() - 1);
            }
        }
        return c;
    }

    /**
     * Parses identifier tokens.
     */
    private void parseIdentifier() {
        do {
            char c = nextChar();
            if (isSeparator(c)) {
                return;
            } else if (isSpecial(c)) {
                pushBack(c);
                return;
            }

            this.value.append(c);
        } while (this.hasNext());
    }

    /**
     * Parses a numeric char.
     *
     * @param start the char to start of.
     * @throws SQLException in case of parse errors.
     */
    private void parseNumber(final char start) throws SQLException {
        char c = start;
        int dotCount = 0;
        do {
            this.value.append(c);
            if (c == '.') {
                dotCount++;

                // Only one dot per numeric value
                Scanner.checkDotCount(dotCount);
            }
            if (this.hasNext()) {
                c = this.nextChar();
            } else {
                break;
            }
        } while ((!Scanner.isSeparator(c) && !Scanner.isSpecial(c)) || c == '.');

        if (Scanner.isSpecial(c)) {
            this.pushBack(c);
        }
    }

    /**
     * Parses a character stream value.
     *
     * @param type the string type (special char used to start the string).
     */
    private void parseString(final char type) throws ParadoxSyntaxErrorException {
        char c = '\0';

        while (this.hasNext() && c != type) {
            c = this.nextChar();

            if (c != type) {
                this.value.append(c);
            } else if (hasNext()) {
                final char nextChar = this.nextChar();
                // Escaped string
                if (nextChar == type) {
                    this.value.append(c);
                    // Prevent breaking.
                    c = '\0';
                } else {
                    pushBack(nextChar);
                }
            }
        }

        if (c != type) {
            throw new ParadoxSyntaxErrorException(SyntaxError.UNTERMINATED_STRING, position);
        }
    }

    /**
     * Push back the read char.
     *
     * @param character the character to push back.
     */
    private void pushBack(char character) {
        preloaded.add(character);
    }

    /**
     * If buffer has tokens.
     *
     * @return true if the buffer still have tokens.
     */
    boolean hasNext() {
        return !this.preloaded.isEmpty() || !this.tokens.isEmpty() || this.buffer.hasRemaining();
    }

    /**
     * Test for empty statement.
     *
     * @throws ParadoxSyntaxErrorException in case of empty statement.
     */
    private void assertNotEmptyStatement() throws ParadoxSyntaxErrorException {
        if (!this.hasNext()) {
            throw new ParadoxSyntaxErrorException(SyntaxError.UNEXPECTED_END_OF_STATEMENT);
        }
    }

    /**
     * Gets the next {@link Token} in buffer.
     *
     * @return the next {@link Token}.
     * @throws SQLException in case of parse errors.
     */
    public Token nextToken() throws SQLException {
        final int size = this.tokens.size();
        if (size > 0) {
            final Token token = this.tokens.get(size - 1);
            this.tokens.remove(size - 1);
            return token;
        }

        assertNotEmptyStatement();

        this.value.delete(0, this.value.length());

        // Ignore separators
        char c = nextNonSeparatorChar();
        Token ret = null;
        if ((c == '"') || (c == '\'')) {
            ret = parseIdentifier(c);
        } else if (c == '/') {
            // Test for multiline comment.
            char nextChar = this.nextChar();

            if (nextChar == '*') {
                parseMultilineComment();

                // Redo this from beginning.
                ret = nextToken();
            } else {
                // Restore the original scanner state and treat is as a normal identifier.
                pushBack(nextChar);
            }
        } else if (Character.isDigit(c)) {
            parseNumber(c);
            ret = new Token(TokenType.NUMERIC, this.value.toString(), startPosition);
        } else if (c == '-') {
            ret = parseMinusSign(c);
        } else if (isSpecial(c)) {
            ret = getToken(Character.toString(c));
        }

        // The token is already handled?
        if (ret == null) {
            pushBack(c);
            parseIdentifier();

            ret = getToken(this.value.toString());
        }

        return ret;
    }

    /**
     * Handle the minus sign.
     *
     * @param c the current char.
     * @return the token.
     * @throws SQLException in case of failures.
     */
    private Token parseMinusSign(char c) throws SQLException {
        // Can be a minus sign only or a negative number.
        char nextChar = this.nextChar();
        // Restore the original scanner state.
        pushBack(nextChar);

        if (Character.isDigit(nextChar)) {
            // It is a number.
            parseNumber(c);
            return new Token(TokenType.NUMERIC, this.value.toString(), startPosition);
        } else if (nextChar == '-') {
            // It is a comment.
            parseComment();

            // Redo this from beginning.
            return nextToken();
        }

        // Only a minus sign.
        return getToken(Character.toString(c));
    }

    private void parseComment() {
        char c;

        do {
            c = nextChar();
        } while (hasNext() && c != '\n');
    }

    private void parseMultilineComment() {
        char last;
        char c = '\0';

        do {
            last = c;
            c = nextChar();
        } while (hasNext() && (last != '*' || c != '/'));
    }

    private Token parseIdentifier(char c) throws ParadoxSyntaxErrorException {
        // identifiers with special chars
        final boolean characters = Scanner.isCharacters(c);
        this.parseString(c);

        if (characters) {
            return new Token(TokenType.CHARACTER, this.value.toString(), startPosition);
        } else {
            return new Token(TokenType.IDENTIFIER, this.value.toString(), startPosition);
        }
    }

    private char nextNonSeparatorChar() {
        char c;
        do {
            c = this.nextChar();
        } while (isSeparator(c));

        startPosition = position.lastPosition();

        return c;
    }

    /**
     * Push back the given token in buffer.
     *
     * @param token the token to push back.
     */
    public void pushBack(final Token token) {
        this.tokens.add(token);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy