org.projectnessie.nessie.cli.jsongrammar.JsonCLexer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nessie-cli-grammar Show documentation
There is a newer version: 0.101.3
/* Generated by: CongoCC Parser Generator. JsonCLexer.java  */
package org.projectnessie.nessie.cli.jsongrammar;

import org.projectnessie.nessie.cli.jsongrammar.Token.TokenType;
import static org.projectnessie.nessie.cli.jsongrammar.Token.TokenType.*;
import java.util.*;


public class JsonCLexer extends TokenSource {
    private static MatcherHook MATCHER_HOOK;

    // this cannot be initialize here, since hook must be set afterwards
    public enum LexicalState {
        JSON
    }

    LexicalState lexicalState = LexicalState.values()[0];
    EnumSet activeTokenTypes = null;
    // Token types that are "regular" tokens that participate in parsing,
    // i.e. declared as TOKEN
    static final EnumSet regularTokens = EnumSet.of(EOF, COLON, COMMA, OPEN_BRACKET, CLOSE_BRACKET, OPEN_BRACE, CLOSE_BRACE, TRUE, FALSE, NULL, STRING_LITERAL, NUMBER);
    // Token types that do not participate in parsing
    // i.e. declared as UNPARSED (or SPECIAL_TOKEN)
    static final EnumSet unparsedTokens = EnumSet.of(SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT);
    // Tokens that are skipped, i.e. SKIP
    static final EnumSet skippedTokens = EnumSet.of(WHITESPACE);
    // Tokens that correspond to a MORE, i.e. that are pending
    // additional input
    static final EnumSet moreTokens = EnumSet.noneOf(TokenType.class);

    public JsonCLexer(CharSequence input) {
        this("input", input);
    }

    /**
    * @param inputSource just the name of the input source (typically the filename)
    * that will be used in error messages and so on.
    * @param input the input
    */
    public JsonCLexer(String inputSource, CharSequence input) {
        this(inputSource, input, LexicalState.JSON, 1, 1);
    }

    /**
    * @param inputSource just the name of the input source (typically the filename) that
    * will be used in error messages and so on.
    * @param input the input
    * @param lexicalState The starting lexical state, may be null to indicate the default
    * starting state
    * @param line The line number at which we are starting for the purposes of location/error messages. In most
    * normal usage, this is 1.
    * @param column number at which we are starting for the purposes of location/error messages. In most normal
    * usages this is 1.
    */
    public JsonCLexer(String inputSource, CharSequence input, LexicalState lexState, int startingLine, int startingColumn) {
        super(inputSource, input, startingLine, startingColumn, 1, true, false, false, "");
        if (lexicalState != null) switchTo(lexState);
    }

    public Token getNextToken(Token tok) {
        return getNextToken(tok, this.activeTokenTypes);
    }

    /**
    * The public method for getting the next token, that is
    * called by JsonCParser.
    * It checks whether we have already cached
    * the token after this one. If not, it finally goes
    * to the NFA machinery
    */
    public Token getNextToken(Token tok, EnumSet activeTokenTypes) {
        if (tok == null) {
            tok = tokenizeAt(0, null, activeTokenTypes);
            cacheToken(tok);
            return tok;
        }
        Token cachedToken = tok.nextCachedToken();
        // If the cached next token is not currently active, we
        // throw it away and go back to the JsonCLexer
        if (cachedToken != null && activeTokenTypes != null && !activeTokenTypes.contains(cachedToken.getType())) {
            reset(tok);
            cachedToken = null;
        }
        if (cachedToken == null) {
            Token token = tokenizeAt(tok.getEndOffset(), null, activeTokenTypes);
            cacheToken(token);
            return token;
        }
        return cachedToken;
    }


    static class MatchInfo {
        TokenType matchedType;
        int matchLength;

        @Override
        public int hashCode() {
            return Objects.hash(matchLength, matchedType);
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) return true;
            if (obj == null) return false;
            if (getClass() != obj.getClass()) return false;
            MatchInfo other = (MatchInfo) obj;
            return matchLength == other.matchLength && matchedType == other.matchedType;
        }

    }


    @FunctionalInterface
    private interface MatcherHook {

        MatchInfo apply(LexicalState lexicalState, CharSequence input, int position, EnumSet activeTokenTypes, NfaFunction[] nfaFunctions, BitSet currentStates, BitSet nextStates, MatchInfo matchInfo);

    }


    /**
    * Core tokenization method. Note that this can be called from a static context.
    * Hence the extra parameters that need to be passed in.
    */
    static MatchInfo getMatchInfo(CharSequence input, int position, EnumSet activeTokenTypes, NfaFunction[] nfaFunctions, BitSet currentStates, BitSet nextStates, MatchInfo matchInfo) {
        if (matchInfo == null) {
            matchInfo = new MatchInfo();
        }
        if (position >= input.length()) {
            matchInfo.matchedType = EOF;
            matchInfo.matchLength = 0;
            return matchInfo;
        }
        int start = position;
        int matchLength = 0;
        TokenType matchedType = TokenType.INVALID;
        EnumSet alreadyMatchedTypes = EnumSet.noneOf(TokenType.class);
        if (currentStates == null) currentStates = new BitSet(51);
        else currentStates.clear();
        if (nextStates == null) nextStates = new BitSet(51);
        else nextStates.clear();
        // the core NFA loop
        do {
            // Holder for the new type (if any) matched on this iteration
            if (position > start) {
                // What was nextStates on the last iteration
                // is now the currentStates!
                BitSet temp = currentStates;
                currentStates = nextStates;
                nextStates = temp;
                nextStates.clear();
            } else {
                currentStates.set(0);
            }
            if (position >= input.length()) {
                break;
            }
            int curChar = Character.codePointAt(input, position++);
            if (curChar > 0xFFFF) position++;
            int nextActive = currentStates.nextSetBit(0);
            while (nextActive != -1) {
                TokenType returnedType = nfaFunctions[nextActive].apply(curChar, nextStates, activeTokenTypes, alreadyMatchedTypes);
                if (returnedType != null && (position - start > matchLength || returnedType.ordinal() < matchedType.ordinal())) {
                    matchedType = returnedType;
                    matchLength = position - start;
                    alreadyMatchedTypes.add(returnedType);
                }
                nextActive = currentStates.nextSetBit(nextActive + 1);
            }
            if (position >= input.length()) break;
        }
        while (!nextStates.isEmpty());
        matchInfo.matchedType = matchedType;
        matchInfo.matchLength = matchLength;
        return matchInfo;
    }

    /**
    * @param position The position at which to tokenize.
    * @param lexicalState The lexical state in which to tokenize. If this is null, it is the instance variable #lexicalState
    * @param activeTokenTypes The active token types. If this is null, they are all active.
    * @return the Token at position
    */
    final Token tokenizeAt(int position, LexicalState lexicalState, EnumSet activeTokenTypes) {
        if (lexicalState == null) lexicalState = this.lexicalState;
        int tokenBeginOffset = position;
        boolean inMore = false;
        int invalidRegionStart = -1;
        Token matchedToken = null;
        TokenType matchedType = null;
        // The core tokenization loop
        MatchInfo matchInfo = new MatchInfo();
        BitSet currentStates = new BitSet(51);
        BitSet nextStates = new BitSet(51);
        while (matchedToken == null) {
            if (!inMore) tokenBeginOffset = position;
            if (MATCHER_HOOK != null) {
                matchInfo = MATCHER_HOOK.apply(lexicalState, this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
                if (matchInfo == null) {
                    matchInfo = getMatchInfo(this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
                }
            } else {
                matchInfo = getMatchInfo(this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
            }
            matchedType = matchInfo.matchedType;
            inMore = moreTokens.contains(matchedType);
            position += matchInfo.matchLength;
            if (matchedType == TokenType.INVALID) {
                if (invalidRegionStart == -1) {
                    invalidRegionStart = tokenBeginOffset;
                }
                int cp = Character.codePointAt(this, position);
                ++position;
                if (cp > 0xFFFF) ++position;
                continue;
            }
            if (invalidRegionStart != -1) {
                return new InvalidToken(this, invalidRegionStart, tokenBeginOffset);
            }
            if (skippedTokens.contains(matchedType)) {
                skipTokens(tokenBeginOffset, position);
            } else if (regularTokens.contains(matchedType) || unparsedTokens.contains(matchedType)) {
                matchedToken = Token.newToken(matchedType, this, tokenBeginOffset, position);
                matchedToken.setUnparsed(!regularTokens.contains(matchedType));
            }
        }
        return matchedToken;
    }

    /**
    * Switch to specified lexical state.
    * @param lexState the lexical state to switch to
    * @return whether we switched (i.e. we weren't already in the desired lexical state)
    */
    public boolean switchTo(LexicalState lexState) {
        if (this.lexicalState != lexState) {
            this.lexicalState = lexState;
            return true;
        }
        return false;
    }

    // Reset the token source input
    // to just after the Token passed in.
    void reset(Token t, LexicalState state) {
        uncacheTokens(t);
        if (state != null) {
            switchTo(state);
        }
    }

    void reset(Token t) {
        reset(t, null);
    }


    // NFA related code follows.
    // The functional interface that represents
    // the acceptance method of an NFA state
    @FunctionalInterface
    interface NfaFunction {

        TokenType apply(int ch, BitSet bs, EnumSet validTypes, EnumSet alreadyMatchedTypes);

    }

    private static NfaFunction[] nfaFunctions;
    // Initialize the various NFA method tables
    static {
        JSON.NFA_FUNCTIONS_init();
    }

    //The Nitty-gritty of the NFA code follows.
    /**
    * Holder class for NFA code related to JSON lexical state
    */
    private static class JSON {

        private static TokenType getNfaNameJSONIndex0(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if (ch == '"') {
                if (validTypes == null || validTypes.contains(STRING_LITERAL)) {
                    nextStates.set(6);
                }
            } else if (ch == '-') {
                if (validTypes == null || validTypes.contains(NUMBER)) {
                    nextStates.set(1);
                }
            } else if (ch == '/') {
                if (validTypes == null || validTypes.contains(SINGLE_LINE_COMMENT)) {
                    nextStates.set(10);
                }
                if (validTypes == null || validTypes.contains(MULTI_LINE_COMMENT)) {
                    nextStates.set(29);
                }
            } else if (ch == 'f') {
                if (validTypes == null || validTypes.contains(FALSE)) {
                    nextStates.set(24);
                }
            } else if (ch == 'n') {
                if (validTypes == null || validTypes.contains(NULL)) {
                    nextStates.set(27);
                }
            } else if (ch == 't') {
                if (validTypes == null || validTypes.contains(TRUE)) {
                    nextStates.set(22);
                }
            } else if (ch == '0') {
                if (validTypes == null || validTypes.contains(NUMBER)) {
                    nextStates.set(15);
                    type = NUMBER;
                }
            } else if (ch >= '1' && ch <= '9') {
                if (validTypes == null || validTypes.contains(NUMBER)) {
                    nextStates.set(2);
                    type = NUMBER;
                }
            } else if (ch == '\t') {
                if (validTypes == null || validTypes.contains(WHITESPACE)) {
                    nextStates.set(7);
                    type = WHITESPACE;
                }
            } else if (ch == '\n') {
                if (validTypes == null || validTypes.contains(WHITESPACE)) {
                    nextStates.set(7);
                    type = WHITESPACE;
                }
            } else if (ch == '\r') {
                if (validTypes == null || validTypes.contains(WHITESPACE)) {
                    nextStates.set(7);
                    type = WHITESPACE;
                }
            } else if (ch == ' ') {
                if (validTypes == null || validTypes.contains(WHITESPACE)) {
                    nextStates.set(7);
                    type = WHITESPACE;
                }
            } else if (ch == '}') {
                if (validTypes == null || validTypes.contains(CLOSE_BRACE)) {
                    type = CLOSE_BRACE;
                }
            } else if (ch == '{') {
                if (validTypes == null || validTypes.contains(OPEN_BRACE)) {
                    type = OPEN_BRACE;
                }
            } else if (ch == ']') {
                if (validTypes == null || validTypes.contains(CLOSE_BRACKET)) {
                    type = CLOSE_BRACKET;
                }
            } else if (ch == '[') {
                if (validTypes == null || validTypes.contains(OPEN_BRACKET)) {
                    type = OPEN_BRACKET;
                }
            } else if (ch == ',') {
                if (validTypes == null || validTypes.contains(COMMA)) {
                    type = COMMA;
                }
            } else if (ch == ':') {
                if (validTypes == null || validTypes.contains(COLON)) {
                    type = COLON;
                }
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex1(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if (ch == '0') {
                nextStates.set(15);
                type = NUMBER;
            } else if (ch >= '1' && ch <= '9') {
                nextStates.set(2);
                type = NUMBER;
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex2(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if (ch == '.') {
                nextStates.set(4);
            } else if ((ch == 'E' || ch == 'e')) {
                nextStates.set(14);
            } else if (ch >= '0' && ch <= '9') {
                nextStates.set(2);
                type = NUMBER;
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex3(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch >= '1' && ch <= '9') {
                nextStates.set(3);
                return NUMBER;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex4(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch >= '0' && ch <= '9') {
                nextStates.set(5);
                return NUMBER;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex5(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if ((ch == 'E' || ch == 'e')) {
                nextStates.set(14);
            } else if (ch >= '0' && ch <= '9') {
                nextStates.set(5);
                type = NUMBER;
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex6(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if ((ch == ' ' || ch == '!' || (ch >= '#' && ch <= '[' || ch >= ']'))) {
                nextStates.set(6);
            } else if (ch == '\\') {
                nextStates.set(16);
                nextStates.set(17);
            } else if (ch == '"') {
                type = STRING_LITERAL;
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex7(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            TokenType type = null;
            if (ch == '\t') {
                nextStates.set(7);
                type = WHITESPACE;
            } else if (ch == '\n') {
                nextStates.set(7);
                type = WHITESPACE;
            } else if (ch == '\r') {
                nextStates.set(7);
                type = WHITESPACE;
            } else if (ch == ' ') {
                nextStates.set(7);
                type = WHITESPACE;
            }
            return type;
        }

        private static TokenType getNfaNameJSONIndex8(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'e') {
                return TRUE;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex9(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'e') {
                return FALSE;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex10(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == '/') {
                nextStates.set(11);
                return SINGLE_LINE_COMMENT;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex11(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch >= 0x0 && ch <= '\t' || ch >= 0xb)) {
                nextStates.set(11);
                return SINGLE_LINE_COMMENT;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex12(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'l') {
                return NULL;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex13(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
            if (ch == '/') {
                return MULTI_LINE_COMMENT;
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex14(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch == '+' || ch == '-')) {
                nextStates.set(3);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex15(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == '.') {
                nextStates.set(4);
            } else if ((ch == 'E' || ch == 'e')) {
                nextStates.set(14);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex16(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (checkIntervals(NFA_MOVES_JSON_40, ch)) {
                nextStates.set(6);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex17(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'u') {
                nextStates.set(18);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex18(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
                nextStates.set(19);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex19(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
                nextStates.set(20);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex20(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
                nextStates.set(21);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex21(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
                nextStates.set(6);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex22(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'r') {
                nextStates.set(23);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex23(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'u') {
                nextStates.set(8);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex24(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'a') {
                nextStates.set(25);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex25(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'l') {
                nextStates.set(26);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex26(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 's') {
                nextStates.set(9);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex27(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'u') {
                nextStates.set(28);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex28(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (ch == 'l') {
                nextStates.set(12);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex29(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
            if (ch == '*') {
                nextStates.set(30);
            }
            return null;
        }

        private static TokenType getNfaNameJSONIndex30(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
            if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
            if (ch >= 0x0) {
                nextStates.set(30);
            }
            if (ch == '*') {
                nextStates.set(13);
            }
            return null;
        }

        private static final int[] NFA_MOVES_JSON_40 = NFA_MOVES_JSON_40_init();

        private static int[] NFA_MOVES_JSON_40_init() {
            return new int[] {'"', '"', '/', '/', '\\', '\\', 'b', 'b', 'f', 'f',
            'n', 'n', 'r', 'r', 't', 't'};
        }

        private static void NFA_FUNCTIONS_init() {
            nfaFunctions = new NfaFunction[] {JSON::getNfaNameJSONIndex0, JSON::getNfaNameJSONIndex1,
            JSON::getNfaNameJSONIndex2, JSON::getNfaNameJSONIndex3, JSON::getNfaNameJSONIndex4,
            JSON::getNfaNameJSONIndex5, JSON::getNfaNameJSONIndex6, JSON::getNfaNameJSONIndex7,
            JSON::getNfaNameJSONIndex8, JSON::getNfaNameJSONIndex9, JSON::getNfaNameJSONIndex10,
            JSON::getNfaNameJSONIndex11, JSON::getNfaNameJSONIndex12, JSON::getNfaNameJSONIndex13,
            JSON::getNfaNameJSONIndex14, JSON::getNfaNameJSONIndex15, JSON::getNfaNameJSONIndex16,
            JSON::getNfaNameJSONIndex17, JSON::getNfaNameJSONIndex18, JSON::getNfaNameJSONIndex19,
            JSON::getNfaNameJSONIndex20, JSON::getNfaNameJSONIndex21, JSON::getNfaNameJSONIndex22,
            JSON::getNfaNameJSONIndex23, JSON::getNfaNameJSONIndex24, JSON::getNfaNameJSONIndex25,
            JSON::getNfaNameJSONIndex26, JSON::getNfaNameJSONIndex27, JSON::getNfaNameJSONIndex28,
            JSON::getNfaNameJSONIndex29, JSON::getNfaNameJSONIndex30};
        }

    }

}