All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.netbeans.modules.el.lexer.ELLexer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.netbeans.modules.el.lexer;

import java.util.logging.Level;
import java.util.logging.Logger;
import org.netbeans.api.lexer.Token;
import org.netbeans.modules.el.lexer.api.ELTokenId;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;

/**
 * Lexical analyzer for Expression Language.
 * It does NOT recognizes the EL delimiters ${ } and #{ }
 *
 * @author Petr Pisl
 * @author Marek Fukala
 *
 * @version 1.00
 */

public class ELLexer implements Lexer {
    
    private static final Logger LOGGER = Logger.getLogger(ELLexer.class.getName());
    private static final boolean LOG = Boolean.getBoolean("j2ee_lexer_debug"); //NOI18N
    
    private static final int EOF = LexerInput.EOF;
    
    private final LexerInput input;
    
    private final TokenFactory tokenFactory;
    
    @Override
    public Object state() {
        return new ELState(lexerState, conditionalOperatorCount);
    }
    
    /** Internal state of the lexical analyzer before entering subanalyzer of
     * character references. It is initially set to INIT, but before first usage,
     * this will be overwritten with state, which originated transition to
     * charref subanalyzer.
     */
    private int lexerState = INIT;
    
    /*
     * Contains counter of possible start of conditional operator "a ? b :c".   
     */
    private int conditionalOperatorCount;
    
    
    /* Internal states used internally by analyzer. There
     * can be any number of them declared by the analyzer.
     */
    private static final int INIT = 1; //initial lexer state
    private static final int ISI_IDENTIFIER = 2;
    private static final int ISI_CHAR = 3; // inside char constant
    private static final int ISI_CHAR_A_BSLASH = 4; // inside char constant after backslash
    private static final int ISI_STRING = 5; // inside a string " ... "
    private static final int ISI_STRING_A_BSLASH = 6; // inside string "..." constant after backslash
    private static final int ISI_CHAR_STRING = 7;  // inside a string '...'
    private static final int ISI_CHAR_STRING_A_BSLASH = 8; // inside string '...'contant after backslash
    private static final int ISA_ZERO = 9; // after '0'
    private static final int ISI_INT = 10; // integer number
    private static final int ISI_OCTAL = 11; // octal number
    private static final int ISI_DOUBLE = 12; // double number
    private static final int ISI_DOUBLE_EXP = 13; // double number
    private static final int ISI_HEX = 14; // hex number
    private static final int ISA_DOT = 15; // after '.'
    private static final int ISI_WHITESPACE = 16; // inside white space
    private static final int ISA_EQ = 17; // after '='
    private static final int ISA_GT = 18; // after '>'
    private static final int ISA_LT = 19; // after '<'
    private static final int ISA_PLUS = 20; // after '+'
    private static final int ISA_MINUS = 21; // after '-'
    //private static final int ISA_STAR = 22; // after '*'
    private static final int ISA_PIPE = 23; // after '|'
    private static final int ISA_AND = 24; // after '&'
    private static final int ISA_EXCLAMATION = 25; // after '!'
    private static final int ISI_BRACKET = 26; // after '['
    private static final int ISI_BRACKET_A_WHITESPACE = 27;
    private static final int ISI_BRACKET_A_IDENTIFIER = 28;
    private static final int ISI_BRACKET_ISA_EQ = 29;
    private static final int ISI_BRACKET_ISA_GT = 30;
    private static final int ISI_BRACKET_ISA_LT = 31;
    private static final int ISI_BRACKET_ISA_PIPE = 32; // after '|'
    private static final int ISI_BRACKET_ISA_AND = 33; // after '&'
    private static final int ISI_BRACKET_ISA_ZERO = 34; // after '0'
    private static final int ISI_BRACKET_ISA_DOT = 35; // after '.'
    private static final int ISI_BRACKET_ISI_INT = 36; // after '.'
    private static final int ISI_BRACKET_ISI_OCTAL = 37; // octal number
    private static final int ISI_BRACKET_ISI_DOUBLE = 38; // double number
    private static final int ISI_BRACKET_ISI_DOUBLE_EXP = 39; // double number
    private static final int ISI_BRACKET_ISI_HEX = 40; // hex number
    private static final int ISI_DOULE_EXP_ISA_SIGN = 41;
    private static final int ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN = 42;
    //private static final int ISA_PERCENT = 24; // after '%'
    private static final int ISI_BRACKET_ISA_MINUS = 43;
    private static final int ISI_BRACKET_ISA_PLUS = 44;
    
    
    public ELLexer(LexerRestartInfo info) {
        this.input = info.input();
        this.tokenFactory = info.tokenFactory();
        if (info.state() == null) {
            lexerState = INIT;
            conditionalOperatorCount = 0;
        } else {
            ELState current = (ELState) info.state();
            lexerState = current.getState();
            conditionalOperatorCount = current.getConditionalCount();
        }
    }
    
    /** This is core function of analyzer and it returns one of following numbers:
     * a) token number of next token from scanned text
     * b) EOL when end of line was found in scanned buffer
     * c) EOT when there is no more chars available in scanned buffer.
     *
     * The function scans the active character and does one or more
     * of the following actions:
     * 1. change internal analyzer state (state = new-state)
     * 2. return token ID (return token-ID)
     * 3. adjust current position to signal different end of token;
     *    the character that offset points to is not included in the token
     */
    @Override
    public Token nextToken() {
        
        int actChar;
        
        while (true) {
            actChar = input.read();
            
            if (actChar == EOF) {
                if(input.readLengthEOF() == 1) {
                    return null; //just EOL is read
                } else {
                    //there is something else in the buffer except EOL
                    //we will return last token now
                    input.backup(1); //backup the EOL, we will return null in next nextToken() call
                    break;
                }
            }
            
            switch (lexerState) { // switch by the current internal state
                case INIT:
                    
                    switch (actChar) {
                        case '"':
                            lexerState = ISI_STRING;
                            break;
                        case '\'':
                            lexerState = ISI_CHAR;
                            break;
                        case '/':
                            return token(ELTokenId.DIV);
                        case '=':
                            lexerState = ISA_EQ;
                            break;
                        case '>':
                            lexerState = ISA_GT;
                            break;
                        case '<':
                            lexerState = ISA_LT;
                            break;
                        case '+':
                            lexerState = ISA_PLUS;
                            break;
                        case '-':
                            lexerState = ISA_MINUS;
                            break;
                        case '*':
                            return token(ELTokenId.MUL);
                        case '|':
                            lexerState = ISA_PIPE;
                            break;
                        case '&':
                            lexerState = ISA_AND;
                            break;
                        case '[':
                            return token(ELTokenId.LBRACKET);
                        case ']':
                            return token(ELTokenId.RBRACKET);
                        case '%':
                            return token(ELTokenId.MOD);
                        case ':':
                            conditionalOperatorCount--;
                            return token(ELTokenId.COLON);
                        case ';':
                            return token(ELTokenId.SEMICOLON);
                        case '!':
                            lexerState = ISA_EXCLAMATION;
                            break;
                        case '(':
                            return token(ELTokenId.LPAREN);
                        case ')':
                            return token(ELTokenId.RPAREN);
                        case ',':
                            return token(ELTokenId.COMMA);
                        case '?':
                            conditionalOperatorCount++;
                            return token(ELTokenId.QUESTION);
                        case '\n':
                            return token(ELTokenId.EOL);
                        case '0':
                            lexerState = ISA_ZERO;
                            break;
                        case '.':
                            lexerState = ISA_DOT;
                            break;
                        case '\\':
                            // issue #242361 - coloring in case of EL inside quoted JSP attr_value
                            int nextChar = input.read();
                            input.backup(1);
                            if (nextChar == '"') {
                                return token(ELTokenId.STRING_LITERAL);
                            }
                            break;
                        default:
                            // Check for whitespace
                            if (Character.isWhitespace(actChar)) {
                                lexerState = ISI_WHITESPACE;
                                break;
                            }
                            
                            // check whether it can be identifier
                            if (Character.isJavaIdentifierStart(actChar)){
                                lexerState = ISI_IDENTIFIER;
                                break;
                            }
                            // Check for digit
                            if (Character.isDigit(actChar)) {
                                lexerState = ISI_INT;
                                break;
                            }
                            return token(ELTokenId.INVALID_CHAR);
                            //break;
                    }
                    break;
                    
                    
                case ISI_WHITESPACE: // white space
                    if (!Character.isWhitespace(actChar)) {
                        lexerState = INIT;
                        input.backup(1);
                        return token(ELTokenId.WHITESPACE);
                    }
                    break;
                    
                case ISI_BRACKET:
                    switch (actChar){
                        case ']':
                            lexerState = INIT;
                            input.backup(1);
                            return token(ELTokenId.IDENTIFIER);
                        case '"':
                            return token(ELTokenId.LBRACKET);
                        case '\'':
                            return token(ELTokenId.LBRACKET);
                        case '/':
                            return token(ELTokenId.DIV);
                        case '+':
                            lexerState = ISI_BRACKET_ISA_PLUS;
                            break;
                        case '-':
                            lexerState = ISI_BRACKET_ISA_MINUS;
                            break;
                        case '*':
                            return token(ELTokenId.MUL);
                        case '[':
                            return token(ELTokenId.LBRACKET);
                        case '%':
                            return token(ELTokenId.MOD);
                        case ':':
                            return token(ELTokenId.COLON);
                        case ';':
                            return token(ELTokenId.SEMICOLON);
                        case '(':
                            return token(ELTokenId.LPAREN);
                        case ')':
                            return token(ELTokenId.RPAREN);
                        case ',':
                            return token(ELTokenId.COMMA);
                        case '?':
                            return token(ELTokenId.QUESTION);
                        case '=':
                            lexerState = ISI_BRACKET_ISA_EQ;
                            break;
                        case '>':
                            lexerState = ISI_BRACKET_ISA_GT;
                            break;
                        case '<':
                            lexerState = ISI_BRACKET_ISA_LT;
                            break;
                        case '|':
                            lexerState = ISI_BRACKET_ISA_PIPE;
                            break;
                        case '&':
                            lexerState = ISI_BRACKET_ISA_AND;
                            break;
                        case '0':
                            lexerState = ISI_BRACKET_ISA_ZERO;
                            break;
                        case '.':
                            lexerState = ISI_BRACKET_ISA_DOT;
                            break;
                        default :
                            // Check for whitespace
                            if (Character.isWhitespace(actChar)) {
                                lexerState = ISI_BRACKET_A_WHITESPACE;
                                break;
                            }
                            if (Character.isJavaIdentifierStart(actChar)){
                                // - System.out.print(" state->ISI_IDENTIFIER ");
                                lexerState = ISI_BRACKET_A_IDENTIFIER;
                                break;
                            }
                            // Check for digit
                            if (Character.isDigit(actChar)) {
                                lexerState = ISI_BRACKET_ISI_INT;
                                break;
                            }
                            return token(ELTokenId.INVALID_CHAR);
                            //break;
                    }
                    break;
                    
                case ISI_BRACKET_A_WHITESPACE:
                    if (!Character.isWhitespace(actChar)) {
                        lexerState = ISI_BRACKET;
                        input.backup(1);
                        return token(ELTokenId.WHITESPACE);
                    }
                    break;
                    
                case ISI_BRACKET_ISA_EQ:
                case ISA_EQ:
                    switch (actChar) {
                        case '=':
                            lexerState = INIT;
                            return token(ELTokenId.EQ_EQ);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.EQ);
                    }

                case ISI_BRACKET_ISA_MINUS:
                case ISA_MINUS:
                    switch (actChar) {
                        case '>':
                            lexerState = INIT;
                            return token(ELTokenId.LAMBDA);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_MINUS) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.MINUS);
                    }

                case ISI_BRACKET_ISA_PLUS:
                case ISA_PLUS:
                    switch (actChar) {
                        case '=':
                            lexerState = INIT;
                            return token(ELTokenId.CONCAT);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_PLUS) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.PLUS);
                    }

                case ISI_BRACKET_ISA_GT:
                case ISA_GT:
                    switch (actChar) {
                        case '=':
                            lexerState = INIT;
                            return token(ELTokenId.GT_EQ);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_GT) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.GT);
                    }
                    //break;
                case ISI_BRACKET_ISA_LT:
                case ISA_LT:
                    switch (actChar) {
                        case '=':
                            lexerState = INIT;
                            return token(ELTokenId.LT_EQ);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_LT) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.LT);
                    }
                    //break;
                case ISI_BRACKET_ISA_PIPE:
                case ISA_PIPE:
                    switch (actChar) {
                        case '|':
                            lexerState = INIT;
                            return token(ELTokenId.OR_OR);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            break;
                    }
                    break;
                case ISI_BRACKET_ISA_AND:
                case ISA_AND:
                    switch (actChar) {
                        case '&':
                            lexerState = INIT;
                            return token(ELTokenId.AND_AND);
                        default:
                            lexerState = (lexerState == ISI_BRACKET_ISA_AND) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            break;
                    }
                    break;
                case ISA_EXCLAMATION:
                    switch (actChar) {
                        case '=':
                            lexerState = INIT;
                            return token(ELTokenId.NOT_EQ);
                        default:
                            lexerState = INIT;
                            input.backup(1);
                            return token(ELTokenId.NOT);
                    }
                case ISI_STRING:
                    switch (actChar) {
                        case '\\':
                            lexerState = ISI_STRING_A_BSLASH;
                            break;
                        case '\n':
                            lexerState = INIT;
                            input.backup(1);
                            return token(ELTokenId.STRING_LITERAL);
                        case '"': // NOI18N
                            lexerState = INIT;
                            return token(ELTokenId.STRING_LITERAL);
                    }
                    break;
                case ISI_STRING_A_BSLASH:
                    lexerState = ISI_STRING;
                    break;
                case ISI_BRACKET_A_IDENTIFIER:
                case ISI_IDENTIFIER:
                    if (!(Character.isJavaIdentifierPart(actChar))){
                        switch (lexerState){
                            case ISI_IDENTIFIER:
                                lexerState = INIT; break;
                            case ISI_BRACKET_A_IDENTIFIER:
                                lexerState = ISI_BRACKET;
                                break;
                        }
                        input.backup(1);
                        Token tid = matchKeyword(input);
                        if (tid == null){
                            if (actChar == ':'){
                                if ( conditionalOperatorCount >0 ){
                                    tid = token(ELTokenId.IDENTIFIER);
                                }
                                else {
                                    tid = token(ELTokenId.TAG_LIB_PREFIX);
                                }
                            } else{
                                tid = token(ELTokenId.IDENTIFIER);
                            }
                        }
                        return tid;
                    }
                    break;
                    
                case ISI_CHAR:
                    switch (actChar) {
                        case '\\':
                            lexerState = ISI_CHAR_A_BSLASH;
                            break;
                        case '\n':
                            lexerState = INIT;
                            input.backup(1);
                            return token(ELTokenId.CHAR_LITERAL);
                        case '\'':
                            lexerState = INIT;
                            return token(ELTokenId.CHAR_LITERAL);
                        default :
                            char prevChar = input.readText().charAt(input.readLength() - 1);
                            if (prevChar != '\'' && prevChar != '\\'){
                                lexerState = ISI_CHAR_STRING;
                            }
                    }
                    break;
                    
                case ISI_CHAR_A_BSLASH:
                    switch (actChar) {
                        case '\'':
                        case '\\':
                            break;
                        default:
                            input.backup(1);
                            break;
                    }
                    lexerState = ISI_CHAR;
                    break;
                    
                case ISI_CHAR_STRING:
                    // - System.out.print(" ISI_CHAR_STRING (");
                    switch (actChar) {
                        case '\\':
                            // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
                            lexerState = ISI_CHAR_STRING_A_BSLASH;
                            break;
                        case '\n':
                            lexerState = INIT;
                            input.backup(1);
                            return token(ELTokenId.STRING_LITERAL);
                        case '\'':
                            lexerState = INIT;
                            return token(ELTokenId.STRING_LITERAL);
                    }
                    // - System.out.print(")");
                    break;
                    
                case ISI_CHAR_STRING_A_BSLASH:
                    switch (actChar) {
                        case '\'':
                        case '\\':
                            break;
                        default:
                            input.backup(1);
                            break;
                    }
                    lexerState = ISI_CHAR_STRING;
                    break;
                    
                case ISI_BRACKET_ISA_ZERO:
                case ISA_ZERO:
                    switch (actChar) {
                        case '.':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
                            break;
                        case 'x':
                        case 'X':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX : ISI_HEX;
                            break;
                        case 'l':
                        case 'L':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.LONG_LITERAL);
                        case 'f':
                        case 'F':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.FLOAT_LITERAL);
                        case 'd':
                        case 'D':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.DOUBLE_LITERAL);
                        case '8': // it's error to have '8' and '9' in octal number
                        case '9':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.INVALID_OCTAL_LITERAL);
                        case 'e':
                        case 'E':
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
                            break;
                        default:
                            if (Character.isDigit(actChar)) { // '8' and '9' already handled
                                lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL : ISI_OCTAL;
                                break;
                            }
                            lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
                            input.backup(1);
                            return token(ELTokenId.INT_LITERAL);
                    }
                    break;
                    
                case ISI_BRACKET_ISI_INT:
                case ISI_INT:
                    switch (actChar) {
                        case 'l':
                        case 'L':
                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.LONG_LITERAL);
                        case '.':
                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
                            break;
                        case 'f':
                        case 'F':
                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.FLOAT_LITERAL);
                        case 'd':
                        case 'D':
                            lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.DOUBLE_LITERAL);
                        case 'e':
                        case 'E':
                            lexerState = ISI_DOUBLE_EXP;
                            break;
                        default:
                            if (!(actChar >= '0' && actChar <= '9')) {
                                lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
                                input.backup(1);
                                return token(ELTokenId.INT_LITERAL);
                            }
                    }
                    break;
                    
                case ISI_BRACKET_ISI_OCTAL:
                case ISI_OCTAL:
                    if (!(actChar >= '0' && actChar <= '7')) {
                        lexerState = (lexerState == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET : INIT;
                        input.backup(1);
                        return token(ELTokenId.OCTAL_LITERAL);
                    }
                    break;
                    
                case ISI_BRACKET_ISI_DOUBLE:
                case ISI_DOUBLE:
                    switch (actChar) {
                        case 'f':
                        case 'F':
                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.FLOAT_LITERAL);
                        case 'd':
                        case 'D':
                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.DOUBLE_LITERAL);
                        case 'e':
                        case 'E':
                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
                            break;
                        default:
                            if (!((actChar >= '0' && actChar <= '9')
                            || actChar == '.')) {
                                lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
                                input.backup(1);
                                return token(ELTokenId.DOUBLE_LITERAL);
                            }
                    }
                    break;
                    
                case ISI_DOUBLE_EXP:
                case ISI_BRACKET_ISI_DOUBLE_EXP:
                    switch (actChar) {
                        case 'f':
                        case 'F':
                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.FLOAT_LITERAL);
                        case 'd':
                        case 'D':
                            lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
                            return token(ELTokenId.DOUBLE_LITERAL);
                        case '-':
                        case '+':
                            lexerState = ISI_DOULE_EXP_ISA_SIGN;
                            break;
                        default:
                            if (!Character.isDigit(actChar)){
                                //|| ch == '-' || ch == '+')) {
                                lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
                                input.backup(1);
                                return token(ELTokenId.DOUBLE_LITERAL);
                            }
                    }
                    break;
                    
                case ISI_DOULE_EXP_ISA_SIGN:
                case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
                    if (!Character.isDigit(actChar)){
                        lexerState = (lexerState == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET : INIT;
                        switch (actChar) {
                            case 'f':
                            case 'F':
                                return token(ELTokenId.FLOAT_LITERAL);
                            case 'd':
                            case 'D':
                                return token(ELTokenId.DOUBLE_LITERAL);
                            default:
                                input.backup(1);
                                return token(ELTokenId.DOUBLE_LITERAL);
                        }
                    }
                    break;
                    
                case ISI_BRACKET_ISI_HEX:
                case ISI_HEX:
                    if (!((actChar >= 'a' && actChar <= 'f')
                    || (actChar >= 'A' && actChar <= 'F')
                    || Character.isDigit(actChar))
                    ) {
                        lexerState = (lexerState == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET : INIT;
                        input.backup(1);
                        return token(ELTokenId.HEX_LITERAL);
                    }
                    break;
                    
                case ISI_BRACKET_ISA_DOT:
                case ISA_DOT:
                    if (Character.isDigit(actChar)) {
                        lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
                        
                    } else { // only single dot
                        lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET : INIT;
                        input.backup(1);
                        return token(ELTokenId.DOT);
                    }
                    break;
                    
            } // end of switch(state)
            
        } //end of big while
        
        /** At this stage there's no more text in the scanned buffer.
         * Scanner first checks whether this is completely the last
         * available buffer.
         */
        switch (lexerState) {
            case INIT:
                if (input.readLength() == 0) {
                    return null;
                }
                break;
            case ISI_WHITESPACE:
                lexerState = INIT;
                return token(ELTokenId.WHITESPACE);
            case ISI_IDENTIFIER:
                lexerState = INIT;
                Token kwd = matchKeyword(input);
                return (kwd != null) ? kwd : token(ELTokenId.IDENTIFIER);
            case ISI_STRING:
            case ISI_STRING_A_BSLASH:
                return token(ELTokenId.STRING_LITERAL); // hold the state
            case ISI_CHAR:
            case ISI_CHAR_A_BSLASH:
                return token(ELTokenId.CHAR_LITERAL);
            case ISI_CHAR_STRING :
            case ISI_CHAR_STRING_A_BSLASH :
                return token(ELTokenId.STRING_LITERAL);
            case ISA_ZERO:
            case ISI_INT:
                lexerState = INIT;
                return token(ELTokenId.INT_LITERAL);
            case ISI_OCTAL:
                lexerState = INIT;
                return token(ELTokenId.OCTAL_LITERAL);
            case ISI_DOUBLE:
            case ISI_DOUBLE_EXP:
            case ISI_DOULE_EXP_ISA_SIGN:
            case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
                lexerState = INIT;
                return token(ELTokenId.DOUBLE_LITERAL);
            case ISI_HEX:
                lexerState = INIT;
                return token(ELTokenId.HEX_LITERAL);
            case ISA_DOT:
                lexerState = INIT;
                return token(ELTokenId.DOT);
            case ISA_EQ:
                lexerState = INIT;
                return token(ELTokenId.EQ);
            case ISA_MINUS:
                lexerState = INIT;
                return token(ELTokenId.MINUS);
            case ISA_PLUS:
                lexerState = INIT;
                return token(ELTokenId.PLUS);
            case ISA_GT:
                lexerState = INIT;
                return token(ELTokenId.GT);
            case ISA_LT:
                lexerState = INIT;
                return token(ELTokenId.LT);
            case ISA_PIPE:
                lexerState = INIT;
                return token(ELTokenId.OR_OR);
            case ISA_AND:
                lexerState = INIT;
                return token(ELTokenId.AND_AND);
            case ISA_EXCLAMATION:
                lexerState = INIT;
                return token(ELTokenId.NOT);
            case ISI_BRACKET:
            case ISI_BRACKET_A_IDENTIFIER:
                lexerState = INIT;
                return token(ELTokenId.IDENTIFIER);
            case ISI_BRACKET_A_WHITESPACE:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.WHITESPACE);
            case ISI_BRACKET_ISA_EQ:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.EQ);
            case ISI_BRACKET_ISA_MINUS:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.MINUS);
            case ISI_BRACKET_ISA_PLUS:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.PLUS);
            case ISI_BRACKET_ISA_GT:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.GT_EQ);
            case ISI_BRACKET_ISA_LT:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.LT_EQ);
            case ISI_BRACKET_ISA_AND:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.AND_AND);
            case ISI_BRACKET_ISA_PIPE:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.OR_OR);
            case ISI_BRACKET_ISA_DOT:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.DOT);
            case ISI_BRACKET_ISA_ZERO:
            case ISI_BRACKET_ISI_INT:
                lexerState = ISI_BRACKET;
                return token(ELTokenId.INT_LITERAL);
        }
        
        
        return null;
    }
    
    
    public Token matchKeyword(LexerInput lexerInput) {
        int len = lexerInput.readLength();
        char[] buffer = new char[len];
        String read = lexerInput.readText().toString();
        read.getChars(0, read.length(), buffer, 0);
        int offset = 0;

        if (len > 10)
            return null;
        if (len <= 1)
            return null;
        switch (buffer[offset++]) {
            case 'a':
                if (len <= 2) return null;
                return (len == 3
                        && buffer[offset++] == 'n'
                        && buffer[offset++] == 'd')
                        ? token(ELTokenId.AND_KEYWORD) : null;
            case 'd':
                if (len <= 2) return null;
                return (len == 3
                        && buffer[offset++] == 'i'
                        && buffer[offset++] == 'v')
                        ? token(ELTokenId.DIV_KEYWORD) : null;
            case 'e':
                switch (buffer[offset++]) {
                    case 'q':
                        return (len == 2) ? token(ELTokenId.EQ_KEYWORD) : null;
                    case 'm':
                        return (len == 5
                                && buffer[offset++] == 'p'
                                && buffer[offset++] == 't'
                                && buffer[offset++] == 'y')
                                ? token(ELTokenId.EMPTY_KEYWORD) : null;
                    default:
                        return null;
                }
            case 'f':
                return (len == 5
                        && buffer[offset++] == 'a'
                        && buffer[offset++] == 'l'
                        && buffer[offset++] == 's'
                        && buffer[offset++] == 'e')
                        ? token(ELTokenId.FALSE_KEYWORD) : null;
            case 'g':
                switch (buffer[offset++]){
                    case 'e':
                        return (len == 2) ? token(ELTokenId.GE_KEYWORD) : null;
                    case 't':
                        return (len == 2) ? token(ELTokenId.GT_KEYWORD) : null;
                    default:
                        return null;
                }
            case 'l':
                switch (buffer[offset++]){
                    case 'e':
                        return (len == 2) ? token(ELTokenId.LE_KEYWORD) : null;
                    case 't':
                        return (len == 2) ? token(ELTokenId.LT_KEYWORD) : null;
                    default:
                        return null;
                }
            case 'i':
                if (len <= 9) return null;
                return (len == 10
                        && buffer[offset++] == 'n'
                        && buffer[offset++] == 's'
                        && buffer[offset++] == 't'
                        && buffer[offset++] == 'a'
                        && buffer[offset++] == 'n'
                        && buffer[offset++] == 'c'
                        && buffer[offset++] == 'e'
                        && buffer[offset++] == 'o'
                        && buffer[offset++] == 'f')
                        ? token(ELTokenId.INSTANCEOF_KEYWORD) : null;
            case 'm':
                if (len <= 2) return null;
                return (len == 3
                        && buffer[offset++] == 'o'
                        && buffer[offset++] == 'd')
                        ? token(ELTokenId.MOD_KEYWORD) : null;
            case 'n':
                switch (buffer[offset++]){
                    case 'e':
                        return (len == 2) ? token(ELTokenId.NE_KEYWORD) : null;
                    case 'o':
                        return (len == 3
                                && buffer[offset++] == 't')
                                ? token(ELTokenId.NOT_KEYWORD) : null;
                    case 'u':
                        return (len == 4
                                && buffer[offset++] == 'l'
                                && buffer[offset++] == 'l')
                                ? token(ELTokenId.NULL_KEYWORD) : null;
                    default:
                        return null;
                }
            case 'o':
                return (len == 2
                        && buffer[offset++] == 'r')
                        ? token(ELTokenId.OR_KEYWORD) : null;
            case 't':
                return (len == 4
                        && buffer[offset++] == 'r'
                        && buffer[offset++] == 'u'
                        && buffer[offset++] == 'e')
                        ? token(ELTokenId.TRUE_KEYWORD) : null;
                
            default :
                return null;
        }
    }
    
    private Token token(ELTokenId tokenId) {
        if(LOG) {
            if(input.readLength() == 0) {
                LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] Found zero length token: "); //NOI18N
            }
            LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] token ('" + input.readText().toString() + "'; id=" + tokenId + ")\n"); //NOI18N
        }
        return tokenFactory.createToken(tokenId);
    }
    
    @Override
    public void release() {
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy