All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bidib.wizard.common.highlight.BidibScriptScanner Maven / Gradle / Ivy

package org.bidib.wizard.common.highlight;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * BidibScriptScanner
 */
public class BidibScriptScanner extends Scanner {

    private static final Logger LOGGER = LoggerFactory.getLogger(BidibScriptScanner.class);

    public static final String KEY_DEFINE = "define";

    public static final String KEY_SET = "set";

    public static final String KEY_SELECT = "select";

    public static final String KEY_DELETE = "delete";

    public static final String KEY_ADD = "add";

    public static final String KEY_ASSERT = "assert";

    public static final String KEY_RESET = "reset";

    public static final String KEY_RESTART = "restart";

    public static final String KEY_WAIT = "wait";

    public static final String KEY_RESELECT = "reselect";

    public static final String KEY2_ANALOG = "analog";

    public static final String KEY2_MACRO = "macro";

    public static final String KEY2_SWITCH = "switch";

    public static final String KEY2_SWITCHPAIR = "switchpair";

    public static final String KEY2_SERVO = "servo";

    public static final String KEY2_SOUND = "sound";

    public static final String KEY2_LIGHT = "light";

    public static final String KEY2_MOTOR = "motor";

    public static final String KEY2_BACKLIGHT = "backlight";

    public static final String KEY2_INPUT = "input";

    public static final String KEY2_FEEDBACK = "feedback";

    public static final String KEY2_FLAG = "flag";

    // public static final String KEY2_LABEL = "label";

    public static final String KEY2_CV = "cv";

    public static final String KEY2_STEP = "step";

    public static final String KEY2_ACCESSORY = "accessory";

    public static final String KEY2_ASPECT = "aspect";

    public static final String KEY2_ASPECTNAME = "aspectname";

    public static final String KEY2_DELAY = "delay";

    public static final String KEY2_PTYPE = "ptype";

    public static final String KEY2_PNUM = "pnum";

    public static final String KEY2_ACTION = "action";

    public static final String KEY2_TARGET = "target";

    public static final String KEY2_NAME = "name";

    public static final String KEY2_NUMBER = "number";

    public static final String KEY2_MACRONAME = "macroname";

    public static final String KEY2_MACRONUMBER = "macronumber";

    public static final String KEY_CONFIG = "config";

    public static final String KEY2_PORT = "port";

    public static final String KEY2_MACROTIME = "macrotime";

    public static final String KEY2_MOVESERVOQUERY = "moveservoquery";

    public static final String KEY2_LOWERLIMIT = "lowerlimit";

    public static final String KEY2_UPPERLIMIT = "upperlimit";

    public static final String KEY2_TURNTIME = "turntime";

    public static final String KEY2_REPEAT = "repeat";

    public static final String KEY2_SLOWDOWN = "slowdown";

    public static final String KEY2_VALUEOFF = "valueoff";

    public static final String KEY2_VALUEON = "valueon";

    public static final String KEY2_DIMMOFF = "dimmoff";

    public static final String KEY2_DIMMON = "dimmon";

    public static final String KEY2_DIMMOFF88 = "dimmoff88";

    public static final String KEY2_DIMMON88 = "dimmon88";

    public static final String KEY2_MAPPING = "mapping";

    public static final String KEY2_SWITCH_CONTROL = "switchControl";

    public static final String KEY2_TICKS = "ticks";

    public static final String KEY2_LOADTYPE = "loadtype";

    public static final String KEY2_DAY = "day";

    public static final String KEY2_HOUR = "hour";

    public static final String KEY2_MINUTE = "minute";

    public static final String KEY2_STARTUP = "startup";

    public static final String KEY2_RESTORE = "restore";

    public static final String KEY2_NONE = "none";

    public static final String KEY2_PENDINGONLY = "pendingonly";

    // DO NOT FORGET TO REGISTER THE NEW KEYWORDS BELOW !!!!

    private final boolean debug = false;

    private Symbol tempSymbol;

    public BidibScriptScanner() {
        super();

        setCaseInsensitive(true);
        initKind();
        initUniKind();
    }

    // Override initSymbolTable

    @Override
    protected void initSymbolTable() {

        tempSymbol = new Symbol(0, null);

        lookup(KEYWORD, KEY_DEFINE);
        lookup(KEYWORD, KEY_SET);
        lookup(KEYWORD, KEY_SELECT);
        lookup(KEYWORD, KEY_DELETE);
        lookup(KEYWORD, KEY_ADD);
        lookup(KEYWORD, KEY_ASSERT);
        lookup(KEYWORD, KEY_RESET);
        lookup(KEYWORD, KEY_RESTART);
        lookup(KEYWORD, KEY_WAIT);
        lookup(KEYWORD, KEY_RESELECT);
        lookup(KEYWORD, KEY_CONFIG);

        lookup(KEYWORD2, KEY2_FEEDBACK);
        lookup(KEYWORD2, KEY2_INPUT);
        lookup(KEYWORD2, KEY2_FLAG);
        lookup(KEYWORD2, KEY2_CV);
        lookup(KEYWORD2, KEY2_STEP);
        lookup(KEYWORD2, KEY2_DELAY);
        lookup(KEYWORD2, KEY2_PTYPE);
        lookup(KEYWORD2, KEY2_PNUM);
        lookup(KEYWORD2, KEY2_ACTION);
        lookup(KEYWORD2, KEY2_TARGET);
        lookup(KEYWORD2, KEY2_MACRO);
        lookup(KEYWORD2, KEY2_ACCESSORY);
        lookup(KEYWORD2, KEY2_ASPECT);
        lookup(KEYWORD2, KEY2_ASPECTNAME);
        lookup(KEYWORD2, KEY2_PORT);
        lookup(KEYWORD2, KEY2_MACROTIME);
        lookup(KEYWORD2, KEY2_MOVESERVOQUERY);

        lookup(KEYWORD2, KEY2_LOWERLIMIT);
        lookup(KEYWORD2, KEY2_UPPERLIMIT);
        lookup(KEYWORD2, KEY2_TURNTIME);

        lookup(KEYWORD2, KEY2_NUMBER);
        lookup(KEYWORD2, KEY2_NAME);
        lookup(KEYWORD2, KEY2_MACRONAME);
        lookup(KEYWORD2, KEY2_MACRONUMBER);

        lookup(KEYWORD2, KEY2_REPEAT);
        lookup(KEYWORD2, KEY2_SLOWDOWN);

        lookup(KEYWORD2, KEY2_VALUEOFF);
        lookup(KEYWORD2, KEY2_VALUEON);
        lookup(KEYWORD2, KEY2_DIMMOFF);
        lookup(KEYWORD2, KEY2_DIMMON);
        lookup(KEYWORD2, KEY2_DIMMOFF88);
        lookup(KEYWORD2, KEY2_DIMMON88);
        lookup(KEYWORD2, KEY2_MAPPING);
        lookup(KEYWORD2, KEY2_SWITCH_CONTROL);
        lookup(KEYWORD2, KEY2_TICKS);
        lookup(KEYWORD2, KEY2_LOADTYPE);

        lookup(KEYWORD2, KEY2_DAY);
        lookup(KEYWORD2, KEY2_HOUR);
        lookup(KEYWORD2, KEY2_MINUTE);

        lookup(KEYWORD2, KEY2_STARTUP);
        lookup(KEYWORD2, KEY2_RESTORE);
        lookup(KEYWORD2, KEY2_NONE);

        // register port types
        lookup(KEYWORD2, KEY2_ANALOG);
        lookup(KEYWORD2, KEY2_BACKLIGHT);
        lookup(KEYWORD2, KEY2_LIGHT);
        lookup(KEYWORD2, KEY2_MOTOR);
        lookup(KEYWORD2, KEY2_SERVO);
        lookup(KEYWORD2, KEY2_SOUND);
        lookup(KEYWORD2, KEY2_SWITCH);
        lookup(KEYWORD2, KEY2_SWITCHPAIR);

        lookup(KEYWORD2, KEY2_PENDINGONLY);
    }

    /** Override the read method from the Scanner class. */
    @Override
    protected int read() {
        int type;
        int saveStart = 0;
        if (debug) {
            saveStart = start;
        }

        if (start >= end) {
            return WHITESPACE;
        }

        switch (state) {
            case MID_COMMENT:
            case END_COMMENT:
                type = readComment(MID_COMMENT);
                if (type == END_COMMENT) {
                    state = WHITESPACE;
                }
                else {
                    state = MID_COMMENT;
                }
                return type;
            default:
                char c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
                if (c < 128) {
                    type = KIND[c];
                }
                else {
                    type = UNIKIND[Character.getType(c)];
                }
                switch (type) {
                    case WHITESPACE:
                        start = start + charlength;
                        charlength = 1;
                        while (start < end) {
                            c = buffer[start];
                            if (c == '\\') {
                                c = next();
                            }
                            int k;
                            if (c < 128) {
                                k = KIND[c];
                            }
                            else {
                                k = UNIKIND[Character.getType(c)];
                            }
                            if (k != WHITESPACE) {
                                break;
                            }
                            start = start + charlength;
                            charlength = 1;
                        }
                        break;
                    case UNRECOGNIZED:
                    case BRACKET:
                    case SEPARATOR:
                        start = start + charlength;
                        charlength = 1;
                        break;
                    case OPERATOR:
                        start = start + charlength;
                        charlength = 1;
                        type = readOperator(c);
                        break;
                    case CHARACTER:
                        start = start + charlength;
                        charlength = 1;
                        type = readCharLiteral();
                        break;
                    case STRING:
                        start = start + charlength;
                        charlength = 1;
                        type = readStringLiteral();
                        break;
                    case IDENTIFIER:
                        start = start + charlength;
                        charlength = 1;
                        while (start < end) {
                            c = buffer[start];
                            if (c == '\\') {
                                c = next();
                            }
                            int k;
                            if (c < 128) {
                                k = KIND[c];
                            }
                            else {
                                k = UNIKIND[Character.getType(c)];
                            }
                            if (k != IDENTIFIER && k != NUMBER) {
                                break;
                            }
                            start = start + charlength;
                            charlength = 1;
                        }
                        break;
                    case NUMBER:
                        start = start + charlength;
                        charlength = 1;
                        type = readNumber(c);
                        break;
                    case PUNCTUATION:
                        start = start + charlength;
                        charlength = 1;
                        type = readDot();
                        break;
                    case COMMENT:
                        start = start + charlength;
                        charlength = 1;
                        type = readSlash();
                        if (type == START_COMMENT) {
                            state = MID_COMMENT;
                        }
                        break;
                    case VARIABLE:
                        start = start + charlength;
                        charlength = 1;
                        type = readVariable();
                        break;
                    default:
                        break;
                }
        }
        if (LOGGER.isDebugEnabled()) {
            if (type > -1 && type < TokenTypes.TYPENAMES.length) {
                StringBuilder sb = new StringBuilder(TokenTypes.TYPENAMES[type]);
                sb
                    .append(" ").append(saveStart).append(",").append(end).append("(").append((start - saveStart))
                    .append(")");
                LOGGER.debug(sb.toString());
            }
            else {
                LOGGER.debug("Invalid type: {}", type);
            }
        }
        return type;
    }

    private int readOperator(char c) {
        if (start >= end) {
            return OPERATOR;
        }
        char c2;

        switch (c) {
            case '~':
            case '?':
            case ':':
                break;
            case '+':
            case '-':
            case '&':
            case '|':
                c2 = buffer[start];
                if (c2 == '\\') {
                    c2 = next();
                }
                if (c2 != c && c2 != '=') {
                    break;
                }
                start = start + charlength;
                charlength = 1;
                break;
            case '=':
            case '*':
            case '!':
            case '^':
            case '%':
            case '/':
                c2 = buffer[start];
                if (c2 == '\\') {
                    c2 = next();
                }
                if (c2 != '=') {
                    break;
                }
                start = start + charlength;
                charlength = 1;
                break;
            case '<':
            case '>':
                c2 = buffer[start];
                if (c2 == '\\') {
                    c2 = next();
                }
                if (c2 == '=') {
                    start = start + charlength;
                    charlength = 1;
                }
                else if (c2 == c) {
                    start = start + charlength;
                    charlength = 1;
                    if (start >= end) {
                        break;
                    }
                    char c3 = buffer[start];
                    if (c3 == '\\') {
                        c3 = next();
                    }
                    if (c3 == '=') {
                        start = start + charlength;
                        charlength = 1;
                    }
                    else if (c == '>' && c3 == '>') { // >>>
                        start = start + charlength;
                        charlength = 1;
                        if (start >= end) {
                            break;
                        }
                        char c4 = buffer[start];
                        if (c4 == '\\') {
                            c4 = next();
                        }
                        if (c4 != '=') {
                            break;
                        }
                        start = start + charlength;
                        charlength = 1;
                    }
                }
                break;
            default:
                break;
        }
        return OPERATOR;
    }

    private int readCharLiteral() {
        if (start >= end) {
            return bad(CHARACTER);
        }
        char c2 = buffer[start];
        if (c2 == '\\') {
            c2 = next();
        }

        switch (c2) {
            case '\\':
                start = start + charlength;
                charlength = 1;
                boolean ok = readEscapeSequence();
                if (!ok) {
                    return bad(CHARACTER);
                }
                break;
            case '\'':
            case '\n':
                return bad(CHARACTER);
            default:
                start = start + charlength;
                charlength = 1;
                break;
        }
        if (start >= end) {
            return bad(CHARACTER);
        }
        char c3 = buffer[start];
        if (c3 == '\\') {
            c3 = next();
        }
        if (c3 != '\'') {
            return bad(CHARACTER);
        }
        start = start + charlength;
        charlength = 1;
        return CHARACTER;
    }

    private int readStringLiteral() {
        if (start >= end) {
            return bad(STRING);
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }

        while (c != '"' && c != '\'') {
            switch (c) {
                case '\\':
                    start = start + charlength;
                    charlength = 1;
                    boolean ok = readEscapeSequence();
                    if (!ok) {
                        return bad(STRING);
                    }
                    break;
                case '\n':
                    return bad(STRING);
                default:
                    start = start + charlength;
                    charlength = 1;
                    if (start >= end) {
                        return bad(STRING);
                    }
                    break;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
        }
        if ((c != '"') && (c != '\'')) {
            return bad(STRING);
        }
        start = start + charlength;
        charlength = 1;
        return STRING;
    }

    private int readSlash() {
        if (start >= end) {
            return OPERATOR;
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }
        if (c == '/') {
            while (c != '\n') {
                start = start + charlength;
                charlength = 1;
                if (start >= end) {
                    return COMMENT;
                }
                c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
            }
            start = start + charlength;
            charlength = 1;
            return COMMENT;
        }
        else if (c == '*') {
            start = start + charlength;
            charlength = 1;
            return readComment(START_COMMENT);
        }
        return readOperator('/');
    }

    // Read one line of a /*...*/ comment, given the expected type
    int readComment(int type) {
        if (start >= end) {
            return type;
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }

        while (true) {
            while (c != '*' && c != '\n') {
                start = start + charlength;
                charlength = 1;
                if (start >= end) {
                    return type;
                }
                c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
            }
            start = start + charlength;
            charlength = 1;
            if (c == '\n') {
                return type;
            }
            if (start >= end) {
                return type;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
            if (c == '/') {
                start = start + charlength;
                charlength = 1;
                if (type == START_COMMENT) {
                    return COMMENT;
                }
                else {
                    return END_COMMENT;
                }
            }
        }
    }

    // Read a number, without checking whether it is out of range
    // Doesn't deal with e.g. 0777.9 or 07779f
    private int readNumber(char c) {
        if (c == '0') {
            int saveStart = start;
            int saveLength = charlength;
            // start = start + charlength;
            charlength = 1;
            if (start >= end) {
                return NUMBER;
            }
            char c2 = buffer[start];
            if (c2 == '\\') {
                c2 = next();
            }
            switch (c2) {
                case 'x':
                case 'X':
                    start = start + charlength;
                    charlength = 1;
                    boolean ok = readDigits(16);
                    if (!ok) {
                        return bad(NUMBER);
                    }
                    readSuffix();
                    return NUMBER;
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7:
                    readDigits(8);
                    readSuffix();
                    return NUMBER;
                case '.':
                case 'e':
                case 'E':
                    start = saveStart;
                    charlength = saveLength;
                    break;
                case 'f':
                case 'F':
                case 'd':
                case 'D':
                    start = start + charlength; // NOSONAR
                    charlength = 1;
                    return NUMBER;
                case 'l':
                case 'L':
                    start = start + charlength; // NOSONAR
                    charlength = 1;
                    return NUMBER;
                default:
                    break;
            }
        }
        boolean hasDigits = false;
        if ('0' <= c && c <= '9') {
            hasDigits = true;
            readDigits(10);
            if (start >= end) {
                return NUMBER;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
            if (c == 'l' || c == 'L') {
                start = start + charlength;
                charlength = 1;
                return NUMBER;
            }
        }
        if (c == '.') {
            start = start + charlength;
            charlength = 1;
            if (start >= end) {
                return NUMBER;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
            if ('0' <= c && c <= '9') {
                hasDigits = true;
                readDigits(10);
                if (start >= end) {
                    return NUMBER;
                }
                c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
            }
        }
        if (!hasDigits) {
            return bad(NUMBER);
        }
        switch (c) {
            case 'e':
            case 'E':
                start = start + charlength;
                charlength = 1;
                if (start >= end) {
                    return bad(NUMBER);
                }
                c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
                if (c == '+' || c == '-') {
                    start = start + charlength;
                    charlength = 1;
                    if (start >= end) {
                        return bad(NUMBER);
                    }
                    c = buffer[start];
                    if (c == '\\') {
                        next();
                    }
                }
                readDigits(10);
                break;
            case 'f':
            case 'F':
            case 'd':
            case 'D':
                start = start + charlength;
                charlength = 1;
                return NUMBER;
            default:
                break;
        }
        return NUMBER;
    }

    boolean readDigits(int radix) {
        if (start >= end) {
            return false;
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }
        if (Character.digit(c, radix) == -1) {
            return false;
        }
        while (Character.digit(c, radix) != -1) {
            start = start + charlength;
            charlength = 1;
            if (start >= end) {
                return true;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
        }
        return true;
    }

    void readSuffix() {
        if (start >= end) {
            return;
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }
        switch (c) {
            case 'f':
            case 'F':
            case 'd':
            case 'D':
            case 'l':
            case 'L':
                start = start + charlength;
                charlength = 1;
            default:
                break;
        }
    }

    private int readDot() {
        if (start >= end) {
            return SEPARATOR;
        }
        char c2 = buffer[start];
        if (c2 == '\\') {
            c2 = next();
        }
        if (Character.isDigit(c2)) {
            return readNumber('.');
        }
        if (start + 1 >= end) {
            return SEPARATOR;
        }
        if (c2 != '.' || buffer[start + 1] != '.') {
            return SEPARATOR;
        }
        start = start + 2;
        return SEPARATOR;
    }

    private boolean readEscapeSequence() {
        if (start >= end) {
            return false;
        }
        char c2 = buffer[start];
        if (c2 == '\\') {
            c2 = next();
        }

        switch (c2) {
            case 'b':
            case 't':
            case 'n':
            case 'f':
            case 'r':
            case '\"':
            case '\'':
            case '\\':
                start = start + charlength;
                charlength = 1;
                return true;
            case '0':
            case '1':
            case '2':
            case '3':
                return readOctal(3);
            case '4':
            case '5':
            case '6':
            case '7':
                return readOctal(2);
            default:
                return false;
        }
    }

    boolean readOctal(int maxlength) {
        if (start >= end) {
            return false;
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }

        int i;
        int val = 0;
        for (i = 0; i < maxlength; i++) {
            if (Character.digit(c, 8) != -1) {
                val = 8 * val + Character.digit(c, 8);
                start = start + charlength;
                charlength = 1;
                if (start >= end) {
                    break;
                }
                c = buffer[start];
                if (c == '\\') {
                    c = next();
                }
            }
            else {
                break;
            }
        }
        if ((i == 0) || (val > 0xFF)) {
            return false;
        }
        return true;
    }

    int readVariable() {
        if (start >= end) {
            return bad(VARIABLE);
        }
        char c = buffer[start];
        if (c == '\\') {
            c = next();
        }

        while (c == '_' || c == '-' || c == '+' || Character.isLetterOrDigit(c)) {
            start = start + charlength;
            charlength = 1;
            if (start >= end) {
                return VARIABLE;
            }
            c = buffer[start];
            if (c == '\\') {
                c = next();
            }
            else if (c == '%') {
                start = start + 1;
                return VARIABLE;
            }
        }
        return VARIABLE;
    }

    // A malformed or incomplete token has a negative type
    private int bad(int type) {
        return -type;
    }

    // Look ahead at the next character or unicode escape.
    // For efficiency, replace c = next(); with
    // c = buffer[start]; if (c == '\\') c = next();
    // To accept the character after looking at it, use:
    // start = start + charlength; charlength = 1;

    // Record the number of source code characters used up. To deal with an
    // odd or even number of backslashes preceding a unicode escape, whenever a
    // second backslash is coming up, mark its position as a pair.

    private int charlength = 1;

    private int pair = 0;

    private char next() {
        if (start >= end) {
            return 26; // EOF
        }
        char c = buffer[start];
        if (c != '\\') {
            return c;
        }
        if (start == pair) {
            pair = 0;
            return '\\';
        }
        if (start + 1 >= end) {
            return '\\';
        }

        c = buffer[start + 1];
        if (c == '\\') {
            pair = start + 1;
        }
        if (c != 'u') {
            return '\\';
        }

        int pos = start + 2;
        while (pos < end && buffer[pos] == 'u') {
            pos++;
        }
        if (pos + 4 > end) {
            charlength = end - start;
            return '\0';
        }

        c = 0;
        for (int j = 0; j < 4; j++) {
            int d = Character.digit(buffer[pos + j], 16);
            if (d < 0) {
                charlength = pos + j - start;
                return '\0';
            }
            c = (char) (c * 16 + d);
        }
        charlength = pos + 4 - start;
        return c;
    }

    // *** Override lookup, but what about unicode escape translation?

    @Override
    protected Symbol lookup(int type, String name) {
        if (type != IDENTIFIER) {
            return super.lookup(type, name);
        }

        tempSymbol.type = KEYWORD;
        tempSymbol.name = name;
        Symbol sym = symbolTable.get(tempSymbol);
        if (sym != null) {
            return sym;
        }
        tempSymbol.type = KEYWORD2;
        tempSymbol.name = name;
        sym = symbolTable.get(tempSymbol);
        if (sym != null) {
            return sym;
        }
        tempSymbol.type = LITERAL;
        sym = symbolTable.get(tempSymbol);
        if (sym != null) {
            return sym;
        }
        return super.lookup(type, name);
    }

    // Classify the ascii characters using an array of kinds, and classify all
    // other unicode characters using an array indexed by unicode category.
    // See the source file java/lang/Character.java for the categories.
    // To find the classification of a character, use:
    // if (c < 128) k = kind[c]; else k = unikind[Character.getType(c)];

    private static final byte[] KIND = new byte[128];

    private static final byte[] UNIKIND = new byte[31];

    // Initialise the two classification arrays using static initializer code.
    // Token types from the TokenTypes class are used to classify characters.

    private void initKind() {
        for (char c = 0; c < 128; c++) {
            KIND[c] = -1;
        }
        for (char c = 0; c < 128; c++) {
            switch (c) {
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7:
                case 8:
                case 11:
                    // case 13:
                case 14:
                case 15:
                case 16:
                case 17:
                case 18:
                case 19:
                case 20:
                case 21:
                case 22:
                case 23:
                case 24:
                case 25:
                case 27:
                case 28:
                case 29:
                case 30:
                case 31:
                case 127:
                case '#':
                case '@':
                case '`':
                case '\\':
                    KIND[c] = UNRECOGNIZED;
                    break;
                case '\t':
                case '\r':
                case '\n':
                case ' ':
                case '\f':
                case 26:
                    KIND[c] = WHITESPACE;
                    break;
                case '!':
                case '&':
                case '*':
                case '+':
                case '-':
                case ':':
                case '<':
                case '=':
                case '>':
                case '?':
                case '^':
                case '|':
                case '~':
                    KIND[c] = OPERATOR;
                    break;
                // case '%':
                // case '*':
                // KIND[c] = IDENTIFIER; // NOSONAR
                // break;
                case '"':
                    KIND[c] = STRING; // NOSONAR
                    break;
                case '\'':
                    KIND[c] = STRING; // NOSONAR
                    break;
                case '.':
                    KIND[c] = PUNCTUATION;
                    break;
                case '/':
                    KIND[c] = COMMENT;
                    break;
                case '$':
                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                case 'G':
                case 'H':
                case 'I':
                case 'J':
                case 'K':
                case 'L':
                case 'M':
                case 'N':
                case 'O':
                case 'P':
                case 'Q':
                case 'R':
                case 'S':
                case 'T':
                case 'U':
                case 'V':
                case 'W':
                case 'X':
                case 'Y':
                case 'Z':
                case '_':
                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                case 'g':
                case 'h':
                case 'i':
                case 'j':
                case 'k':
                case 'l':
                case 'm':
                case 'n':
                case 'o':
                case 'p':
                case 'q':
                case 'r':
                case 's':
                case 't':
                case 'u':
                case 'v':
                case 'w':
                case 'x':
                case 'y':
                case 'z':
                    KIND[c] = IDENTIFIER; // NOSONAR
                    break;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    KIND[c] = NUMBER;
                    break;
                case '(':
                case ')':
                case '[':
                case ']':
                case '{':
                case '}':
                    KIND[c] = BRACKET;
                    break;
                case ',':
                case ';':
                    KIND[c] = SEPARATOR;
                    break;
                case '%':
                    KIND[c] = VARIABLE;
                    break;
                default:
                    break;
            }
        }
        for (char c = 0; c < 128; c++) {
            if (KIND[c] == -1) {
                LOGGER.debug("Char " + ((int) c) + " hasn't been classified");
            }
        }
    }

    private void initUniKind() {
        for (byte b = 0; b < 31; b++) {
            UNIKIND[b] = -1;
        }
        for (byte b = 0; b < 31; b++) {
            switch (b) {
                case Character.UNASSIGNED:
                case Character.ENCLOSING_MARK:
                case Character.OTHER_NUMBER:
                case Character.SPACE_SEPARATOR:
                case Character.LINE_SEPARATOR:
                case Character.PARAGRAPH_SEPARATOR:
                case Character.CONTROL:
                case 17: // category 17 is unused
                case Character.PRIVATE_USE:
                case Character.SURROGATE:
                case Character.DASH_PUNCTUATION:
                case Character.START_PUNCTUATION:
                case Character.END_PUNCTUATION:
                case Character.OTHER_PUNCTUATION:
                case Character.MATH_SYMBOL:
                case Character.MODIFIER_SYMBOL:
                case Character.OTHER_SYMBOL:
                case Character.INITIAL_QUOTE_PUNCTUATION:
                case Character.FINAL_QUOTE_PUNCTUATION:
                    UNIKIND[b] = UNRECOGNIZED;
                    break;
                case Character.UPPERCASE_LETTER:
                case Character.LOWERCASE_LETTER:
                case Character.TITLECASE_LETTER:
                case Character.MODIFIER_LETTER:
                case Character.OTHER_LETTER:
                case Character.LETTER_NUMBER:
                case Character.CONNECTOR_PUNCTUATION: // maybe NUMBER
                case Character.CURRENCY_SYMBOL:
                    // Characters where Other_ID_Start is true
                    UNIKIND[b] = IDENTIFIER;
                    break;
                case Character.NON_SPACING_MARK:
                case Character.COMBINING_SPACING_MARK:
                case Character.DECIMAL_DIGIT_NUMBER:
                case Character.FORMAT:
                    UNIKIND[b] = NUMBER;
                    break;
                default:
                    break;
            }
        }
        for (byte b = 0; b < 31; b++) {
            if (UNIKIND[b] == -1) {
                LOGGER.debug("Unicode cat " + b + " hasn't been classified");
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy