org.projectnessie.nessie.cli.jsongrammar.JsonCLexer Maven / Gradle / Ivy
/* Generated by: CongoCC Parser Generator. JsonCLexer.java */
package org.projectnessie.nessie.cli.jsongrammar;
import org.projectnessie.nessie.cli.jsongrammar.Token.TokenType;
import static org.projectnessie.nessie.cli.jsongrammar.Token.TokenType.*;
import java.util.*;
public class JsonCLexer extends TokenSource {
private static MatcherHook MATCHER_HOOK;
// this cannot be initialize here, since hook must be set afterwards
public enum LexicalState {
JSON
}
LexicalState lexicalState = LexicalState.values()[0];
EnumSet activeTokenTypes = null;
// Token types that are "regular" tokens that participate in parsing,
// i.e. declared as TOKEN
static final EnumSet regularTokens = EnumSet.of(EOF, COLON, COMMA, OPEN_BRACKET, CLOSE_BRACKET, OPEN_BRACE, CLOSE_BRACE, TRUE, FALSE, NULL, STRING_LITERAL, NUMBER);
// Token types that do not participate in parsing
// i.e. declared as UNPARSED (or SPECIAL_TOKEN)
static final EnumSet unparsedTokens = EnumSet.of(SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT);
// Tokens that are skipped, i.e. SKIP
static final EnumSet skippedTokens = EnumSet.of(WHITESPACE);
// Tokens that correspond to a MORE, i.e. that are pending
// additional input
static final EnumSet moreTokens = EnumSet.noneOf(TokenType.class);
public JsonCLexer(CharSequence input) {
this("input", input);
}
/**
* @param inputSource just the name of the input source (typically the filename)
* that will be used in error messages and so on.
* @param input the input
*/
public JsonCLexer(String inputSource, CharSequence input) {
this(inputSource, input, LexicalState.JSON, 1, 1);
}
/**
* @param inputSource just the name of the input source (typically the filename) that
* will be used in error messages and so on.
* @param input the input
* @param lexicalState The starting lexical state, may be null to indicate the default
* starting state
* @param line The line number at which we are starting for the purposes of location/error messages. In most
* normal usage, this is 1.
* @param column number at which we are starting for the purposes of location/error messages. In most normal
* usages this is 1.
*/
public JsonCLexer(String inputSource, CharSequence input, LexicalState lexState, int startingLine, int startingColumn) {
super(inputSource, input, startingLine, startingColumn, 1, true, false, false, "");
if (lexicalState != null) switchTo(lexState);
}
public Token getNextToken(Token tok) {
return getNextToken(tok, this.activeTokenTypes);
}
/**
* The public method for getting the next token, that is
* called by JsonCParser.
* It checks whether we have already cached
* the token after this one. If not, it finally goes
* to the NFA machinery
*/
public Token getNextToken(Token tok, EnumSet activeTokenTypes) {
if (tok == null) {
tok = tokenizeAt(0, null, activeTokenTypes);
cacheToken(tok);
return tok;
}
Token cachedToken = tok.nextCachedToken();
// If the cached next token is not currently active, we
// throw it away and go back to the JsonCLexer
if (cachedToken != null && activeTokenTypes != null && !activeTokenTypes.contains(cachedToken.getType())) {
reset(tok);
cachedToken = null;
}
if (cachedToken == null) {
Token token = tokenizeAt(tok.getEndOffset(), null, activeTokenTypes);
cacheToken(token);
return token;
}
return cachedToken;
}
static class MatchInfo {
TokenType matchedType;
int matchLength;
@Override
public int hashCode() {
return Objects.hash(matchLength, matchedType);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
MatchInfo other = (MatchInfo) obj;
return matchLength == other.matchLength && matchedType == other.matchedType;
}
}
@FunctionalInterface
private interface MatcherHook {
MatchInfo apply(LexicalState lexicalState, CharSequence input, int position, EnumSet activeTokenTypes, NfaFunction[] nfaFunctions, BitSet currentStates, BitSet nextStates, MatchInfo matchInfo);
}
/**
* Core tokenization method. Note that this can be called from a static context.
* Hence the extra parameters that need to be passed in.
*/
static MatchInfo getMatchInfo(CharSequence input, int position, EnumSet activeTokenTypes, NfaFunction[] nfaFunctions, BitSet currentStates, BitSet nextStates, MatchInfo matchInfo) {
if (matchInfo == null) {
matchInfo = new MatchInfo();
}
if (position >= input.length()) {
matchInfo.matchedType = EOF;
matchInfo.matchLength = 0;
return matchInfo;
}
int start = position;
int matchLength = 0;
TokenType matchedType = TokenType.INVALID;
EnumSet alreadyMatchedTypes = EnumSet.noneOf(TokenType.class);
if (currentStates == null) currentStates = new BitSet(51);
else currentStates.clear();
if (nextStates == null) nextStates = new BitSet(51);
else nextStates.clear();
// the core NFA loop
do {
// Holder for the new type (if any) matched on this iteration
if (position > start) {
// What was nextStates on the last iteration
// is now the currentStates!
BitSet temp = currentStates;
currentStates = nextStates;
nextStates = temp;
nextStates.clear();
} else {
currentStates.set(0);
}
if (position >= input.length()) {
break;
}
int curChar = Character.codePointAt(input, position++);
if (curChar > 0xFFFF) position++;
int nextActive = currentStates.nextSetBit(0);
while (nextActive != -1) {
TokenType returnedType = nfaFunctions[nextActive].apply(curChar, nextStates, activeTokenTypes, alreadyMatchedTypes);
if (returnedType != null && (position - start > matchLength || returnedType.ordinal() < matchedType.ordinal())) {
matchedType = returnedType;
matchLength = position - start;
alreadyMatchedTypes.add(returnedType);
}
nextActive = currentStates.nextSetBit(nextActive + 1);
}
if (position >= input.length()) break;
}
while (!nextStates.isEmpty());
matchInfo.matchedType = matchedType;
matchInfo.matchLength = matchLength;
return matchInfo;
}
/**
* @param position The position at which to tokenize.
* @param lexicalState The lexical state in which to tokenize. If this is null, it is the instance variable #lexicalState
* @param activeTokenTypes The active token types. If this is null, they are all active.
* @return the Token at position
*/
final Token tokenizeAt(int position, LexicalState lexicalState, EnumSet activeTokenTypes) {
if (lexicalState == null) lexicalState = this.lexicalState;
int tokenBeginOffset = position;
boolean inMore = false;
int invalidRegionStart = -1;
Token matchedToken = null;
TokenType matchedType = null;
// The core tokenization loop
MatchInfo matchInfo = new MatchInfo();
BitSet currentStates = new BitSet(51);
BitSet nextStates = new BitSet(51);
while (matchedToken == null) {
if (!inMore) tokenBeginOffset = position;
if (MATCHER_HOOK != null) {
matchInfo = MATCHER_HOOK.apply(lexicalState, this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
if (matchInfo == null) {
matchInfo = getMatchInfo(this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
}
} else {
matchInfo = getMatchInfo(this, position, activeTokenTypes, nfaFunctions, currentStates, nextStates, matchInfo);
}
matchedType = matchInfo.matchedType;
inMore = moreTokens.contains(matchedType);
position += matchInfo.matchLength;
if (matchedType == TokenType.INVALID) {
if (invalidRegionStart == -1) {
invalidRegionStart = tokenBeginOffset;
}
int cp = Character.codePointAt(this, position);
++position;
if (cp > 0xFFFF) ++position;
continue;
}
if (invalidRegionStart != -1) {
return new InvalidToken(this, invalidRegionStart, tokenBeginOffset);
}
if (skippedTokens.contains(matchedType)) {
skipTokens(tokenBeginOffset, position);
} else if (regularTokens.contains(matchedType) || unparsedTokens.contains(matchedType)) {
matchedToken = Token.newToken(matchedType, this, tokenBeginOffset, position);
matchedToken.setUnparsed(!regularTokens.contains(matchedType));
}
}
return matchedToken;
}
/**
* Switch to specified lexical state.
* @param lexState the lexical state to switch to
* @return whether we switched (i.e. we weren't already in the desired lexical state)
*/
public boolean switchTo(LexicalState lexState) {
if (this.lexicalState != lexState) {
this.lexicalState = lexState;
return true;
}
return false;
}
// Reset the token source input
// to just after the Token passed in.
void reset(Token t, LexicalState state) {
uncacheTokens(t);
if (state != null) {
switchTo(state);
}
}
void reset(Token t) {
reset(t, null);
}
// NFA related code follows.
// The functional interface that represents
// the acceptance method of an NFA state
@FunctionalInterface
interface NfaFunction {
TokenType apply(int ch, BitSet bs, EnumSet validTypes, EnumSet alreadyMatchedTypes);
}
private static NfaFunction[] nfaFunctions;
// Initialize the various NFA method tables
static {
JSON.NFA_FUNCTIONS_init();
}
//The Nitty-gritty of the NFA code follows.
/**
* Holder class for NFA code related to JSON lexical state
*/
private static class JSON {
private static TokenType getNfaNameJSONIndex0(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if (ch == '"') {
if (validTypes == null || validTypes.contains(STRING_LITERAL)) {
nextStates.set(6);
}
} else if (ch == '-') {
if (validTypes == null || validTypes.contains(NUMBER)) {
nextStates.set(1);
}
} else if (ch == '/') {
if (validTypes == null || validTypes.contains(SINGLE_LINE_COMMENT)) {
nextStates.set(10);
}
if (validTypes == null || validTypes.contains(MULTI_LINE_COMMENT)) {
nextStates.set(29);
}
} else if (ch == 'f') {
if (validTypes == null || validTypes.contains(FALSE)) {
nextStates.set(24);
}
} else if (ch == 'n') {
if (validTypes == null || validTypes.contains(NULL)) {
nextStates.set(27);
}
} else if (ch == 't') {
if (validTypes == null || validTypes.contains(TRUE)) {
nextStates.set(22);
}
} else if (ch == '0') {
if (validTypes == null || validTypes.contains(NUMBER)) {
nextStates.set(15);
type = NUMBER;
}
} else if (ch >= '1' && ch <= '9') {
if (validTypes == null || validTypes.contains(NUMBER)) {
nextStates.set(2);
type = NUMBER;
}
} else if (ch == '\t') {
if (validTypes == null || validTypes.contains(WHITESPACE)) {
nextStates.set(7);
type = WHITESPACE;
}
} else if (ch == '\n') {
if (validTypes == null || validTypes.contains(WHITESPACE)) {
nextStates.set(7);
type = WHITESPACE;
}
} else if (ch == '\r') {
if (validTypes == null || validTypes.contains(WHITESPACE)) {
nextStates.set(7);
type = WHITESPACE;
}
} else if (ch == ' ') {
if (validTypes == null || validTypes.contains(WHITESPACE)) {
nextStates.set(7);
type = WHITESPACE;
}
} else if (ch == '}') {
if (validTypes == null || validTypes.contains(CLOSE_BRACE)) {
type = CLOSE_BRACE;
}
} else if (ch == '{') {
if (validTypes == null || validTypes.contains(OPEN_BRACE)) {
type = OPEN_BRACE;
}
} else if (ch == ']') {
if (validTypes == null || validTypes.contains(CLOSE_BRACKET)) {
type = CLOSE_BRACKET;
}
} else if (ch == '[') {
if (validTypes == null || validTypes.contains(OPEN_BRACKET)) {
type = OPEN_BRACKET;
}
} else if (ch == ',') {
if (validTypes == null || validTypes.contains(COMMA)) {
type = COMMA;
}
} else if (ch == ':') {
if (validTypes == null || validTypes.contains(COLON)) {
type = COLON;
}
}
return type;
}
private static TokenType getNfaNameJSONIndex1(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if (ch == '0') {
nextStates.set(15);
type = NUMBER;
} else if (ch >= '1' && ch <= '9') {
nextStates.set(2);
type = NUMBER;
}
return type;
}
private static TokenType getNfaNameJSONIndex2(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if (ch == '.') {
nextStates.set(4);
} else if ((ch == 'E' || ch == 'e')) {
nextStates.set(14);
} else if (ch >= '0' && ch <= '9') {
nextStates.set(2);
type = NUMBER;
}
return type;
}
private static TokenType getNfaNameJSONIndex3(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch >= '1' && ch <= '9') {
nextStates.set(3);
return NUMBER;
}
return null;
}
private static TokenType getNfaNameJSONIndex4(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch >= '0' && ch <= '9') {
nextStates.set(5);
return NUMBER;
}
return null;
}
private static TokenType getNfaNameJSONIndex5(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if ((ch == 'E' || ch == 'e')) {
nextStates.set(14);
} else if (ch >= '0' && ch <= '9') {
nextStates.set(5);
type = NUMBER;
}
return type;
}
private static TokenType getNfaNameJSONIndex6(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if ((ch == ' ' || ch == '!' || (ch >= '#' && ch <= '[' || ch >= ']'))) {
nextStates.set(6);
} else if (ch == '\\') {
nextStates.set(16);
nextStates.set(17);
} else if (ch == '"') {
type = STRING_LITERAL;
}
return type;
}
private static TokenType getNfaNameJSONIndex7(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
TokenType type = null;
if (ch == '\t') {
nextStates.set(7);
type = WHITESPACE;
} else if (ch == '\n') {
nextStates.set(7);
type = WHITESPACE;
} else if (ch == '\r') {
nextStates.set(7);
type = WHITESPACE;
} else if (ch == ' ') {
nextStates.set(7);
type = WHITESPACE;
}
return type;
}
private static TokenType getNfaNameJSONIndex8(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'e') {
return TRUE;
}
return null;
}
private static TokenType getNfaNameJSONIndex9(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'e') {
return FALSE;
}
return null;
}
private static TokenType getNfaNameJSONIndex10(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == '/') {
nextStates.set(11);
return SINGLE_LINE_COMMENT;
}
return null;
}
private static TokenType getNfaNameJSONIndex11(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch >= 0x0 && ch <= '\t' || ch >= 0xb)) {
nextStates.set(11);
return SINGLE_LINE_COMMENT;
}
return null;
}
private static TokenType getNfaNameJSONIndex12(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'l') {
return NULL;
}
return null;
}
private static TokenType getNfaNameJSONIndex13(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
if (ch == '/') {
return MULTI_LINE_COMMENT;
}
return null;
}
private static TokenType getNfaNameJSONIndex14(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch == '+' || ch == '-')) {
nextStates.set(3);
}
return null;
}
private static TokenType getNfaNameJSONIndex15(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == '.') {
nextStates.set(4);
} else if ((ch == 'E' || ch == 'e')) {
nextStates.set(14);
}
return null;
}
private static TokenType getNfaNameJSONIndex16(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (checkIntervals(NFA_MOVES_JSON_40, ch)) {
nextStates.set(6);
}
return null;
}
private static TokenType getNfaNameJSONIndex17(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'u') {
nextStates.set(18);
}
return null;
}
private static TokenType getNfaNameJSONIndex18(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
nextStates.set(19);
}
return null;
}
private static TokenType getNfaNameJSONIndex19(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
nextStates.set(20);
}
return null;
}
private static TokenType getNfaNameJSONIndex20(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
nextStates.set(21);
}
return null;
}
private static TokenType getNfaNameJSONIndex21(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if ((ch >= '0' && ch <= '9' || (ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'))) {
nextStates.set(6);
}
return null;
}
private static TokenType getNfaNameJSONIndex22(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'r') {
nextStates.set(23);
}
return null;
}
private static TokenType getNfaNameJSONIndex23(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'u') {
nextStates.set(8);
}
return null;
}
private static TokenType getNfaNameJSONIndex24(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'a') {
nextStates.set(25);
}
return null;
}
private static TokenType getNfaNameJSONIndex25(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'l') {
nextStates.set(26);
}
return null;
}
private static TokenType getNfaNameJSONIndex26(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 's') {
nextStates.set(9);
}
return null;
}
private static TokenType getNfaNameJSONIndex27(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'u') {
nextStates.set(28);
}
return null;
}
private static TokenType getNfaNameJSONIndex28(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (ch == 'l') {
nextStates.set(12);
}
return null;
}
private static TokenType getNfaNameJSONIndex29(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
if (ch == '*') {
nextStates.set(30);
}
return null;
}
private static TokenType getNfaNameJSONIndex30(int ch, BitSet nextStates, EnumSet validTypes, EnumSet alreadyMatchedTypes) {
if (alreadyMatchedTypes.contains(MULTI_LINE_COMMENT)) return null;
if (ch >= 0x0) {
nextStates.set(30);
}
if (ch == '*') {
nextStates.set(13);
}
return null;
}
private static final int[] NFA_MOVES_JSON_40 = NFA_MOVES_JSON_40_init();
private static int[] NFA_MOVES_JSON_40_init() {
return new int[] {'"', '"', '/', '/', '\\', '\\', 'b', 'b', 'f', 'f',
'n', 'n', 'r', 'r', 't', 't'};
}
private static void NFA_FUNCTIONS_init() {
nfaFunctions = new NfaFunction[] {JSON::getNfaNameJSONIndex0, JSON::getNfaNameJSONIndex1,
JSON::getNfaNameJSONIndex2, JSON::getNfaNameJSONIndex3, JSON::getNfaNameJSONIndex4,
JSON::getNfaNameJSONIndex5, JSON::getNfaNameJSONIndex6, JSON::getNfaNameJSONIndex7,
JSON::getNfaNameJSONIndex8, JSON::getNfaNameJSONIndex9, JSON::getNfaNameJSONIndex10,
JSON::getNfaNameJSONIndex11, JSON::getNfaNameJSONIndex12, JSON::getNfaNameJSONIndex13,
JSON::getNfaNameJSONIndex14, JSON::getNfaNameJSONIndex15, JSON::getNfaNameJSONIndex16,
JSON::getNfaNameJSONIndex17, JSON::getNfaNameJSONIndex18, JSON::getNfaNameJSONIndex19,
JSON::getNfaNameJSONIndex20, JSON::getNfaNameJSONIndex21, JSON::getNfaNameJSONIndex22,
JSON::getNfaNameJSONIndex23, JSON::getNfaNameJSONIndex24, JSON::getNfaNameJSONIndex25,
JSON::getNfaNameJSONIndex26, JSON::getNfaNameJSONIndex27, JSON::getNfaNameJSONIndex28,
JSON::getNfaNameJSONIndex29, JSON::getNfaNameJSONIndex30};
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy