Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2017-present Strumenta and contributors, licensed under Apache 2.0.
// Copyright 2024-present Strumenta and contributors, licensed under BSD 3-Clause.
package org.antlr.v4.kotlinruntime.atn
import com.strumenta.antlrkotlin.runtime.System
import com.strumenta.antlrkotlin.runtime.assert
import com.strumenta.antlrkotlin.runtime.synchronized
import org.antlr.v4.kotlinruntime.*
import org.antlr.v4.kotlinruntime.dfa.DFA
import org.antlr.v4.kotlinruntime.dfa.DFAState
import org.antlr.v4.kotlinruntime.misc.Interval
* "dup" of [ParserInterpreter].
public open class LexerATNSimulator(
protected val recog: Lexer?,
atn: ATN,
public val decisionToDFA: Array,
sharedContextCache: PredictionContextCache,
) : ATNSimulator(atn, sharedContextCache) {
public companion object {
public const val debug: Boolean = false
public const val dfa_debug: Boolean = false
public const val MIN_DFA_EDGE: Int = 0
public const val MAX_DFA_EDGE: Int = 127 // Forces unicode to stay in ATN
* The current token's starting index into the character stream.
* Shared across DFA to ATN simulation in case the ATN fails and the
* DFA did not have a previous accept state. In this case, we use the
* ATN-generated exception object.
protected var startIndex: Int = -1
* Line number `1..n` within the input.
public var line: Int = 1
* The index of the character relative to the beginning of the line `0..n-1`.
public var charPositionInLine: Int = 0
protected var mode: Int = Lexer.DEFAULT_MODE
* Used during DFA/ATN exec to record the most recent accept configuration info.
protected val prevAccept: SimState = SimState()
* When we hit an accept state in either the DFA or the ATN, we
* have to notify the character stream to start buffering characters
* via [IntStream.mark] and record the current state. The current sim state
* includes the current index into the input, the current line,
* and current character position in that line. Note that the Lexer is
* tracking the starting line and characterization of the token. These
* variables track the "state" of the simulator when it hits an accept state.
* We track these variables separately for the DFA and ATN simulation
* because the DFA simulation often has to fail over to the ATN
* simulation. If the ATN simulation fails, we need the DFA to fall
* back to its previously accepted state, if any. If the ATN succeeds,
* then the ATN does the accept and the DFA simulator that invoked it
* can simply return the predicted token type.
protected class SimState {
public var index: Int = -1
public var line: Int = 0
public var charPos: Int = -1
public var dfaState: DFAState? = null
public fun reset() {
index = -1
line = 0
charPos = -1
dfaState = null
public constructor(
atn: ATN,
decisionToDFA: Array,
sharedContextCache: PredictionContextCache,
) : this(null, atn, decisionToDFA, sharedContextCache)
public open fun copyState(simulator: LexerATNSimulator) {
charPositionInLine = simulator.charPositionInLine
line = simulator.line
mode = simulator.mode
startIndex = simulator.startIndex
public open fun match(input: CharStream, mode: Int): Int {
this.mode = mode
val mark = input.mark()
try {
this.startIndex = input.index()
val dfa = decisionToDFA[mode]
return if (dfa.s0 == null) {
} else {
execATN(input, dfa.s0 as DFAState)
} finally {
override fun reset() {
startIndex = -1
line = 1
charPositionInLine = 0
mode = Lexer.DEFAULT_MODE
override fun clearDFA() {
for (d in decisionToDFA.indices) {
decisionToDFA[d] = DFA(atn.getDecisionState(d)!!, d)
protected open fun matchATN(input: CharStream): Int {
val startState = atn.modeToStartState[mode]
if (debug) {
System.out.println("matchATN mode $mode start: $startState")
val oldMode = mode
val s0Closure = computeStartState(input, startState)
val suppressEdge = s0Closure.hasSemanticContext
s0Closure.hasSemanticContext = false
val next = addDFAState(s0Closure)
if (!suppressEdge) {
decisionToDFA[mode].s0 = next
val predict = execATN(input, next)
if (debug) {
System.out.println("DFA after matchATN: ${decisionToDFA[oldMode].toLexerString()}")
return predict
protected open fun execATN(input: CharStream, ds0: DFAState): Int {
if (debug) {
System.out.println("start state closure=${ds0.configs}")
if (ds0.isAcceptState) {
// Allow zero-length tokens
captureSimState(prevAccept, input, ds0)
var t = input.LA(1)
// s is current/from DFA state
var s = ds0
while (true) {
if (debug) {
System.out.println("execATN loop starting closure: ${s.configs}")
// As we move src->trg, src->trg, we keep track of the previous trg to
// avoid looking up the DFA state again, which is expensive.
// If the previous target was already part of the DFA, we might
// be able to avoid doing a reach operation upon t. If s!=null,
// it means that semantic predicates didn't prevent us from
// creating a DFA state. Once we know s!=null, we check to see if
// the DFA state has an edge already for t. If so, we can just reuse
// its configuration set; there's no point in re-computing it.
// This is kind of like doing DFA simulation within the ATN
// simulation because DFA simulation is really just a way to avoid
// computing reach/closure sets. Technically, once we know that
// we have a previously added DFA state, we could jump over to
// the DFA simulator. But, that would mean popping back and forth
// a lot and making things more complicated algorithmically.
// This optimization makes a lot of sense for loops within DFA.
// A character will take us back to an existing DFA state
// that already has lots of edges out of it. e.g., .* in comments.
val target = getExistingTargetState(s, t) ?: computeTargetState(input, s, t)
if (target === ERROR) {
// If this is a consumable input element, make sure to consume before
// capturing the accept state so the input index, line, and char
// position accurately reflect the state of the interpreter at the
// end of the token.
if (t != IntStream.EOF) {
if (target.isAcceptState) {
captureSimState(prevAccept, input, target)
if (t == IntStream.EOF) {
t = input.LA(1)
// Flip, current DFA target becomes new src/from state
s = target
return failOrAccept(prevAccept, input, s.configs, t)
* Get an existing target state for an edge in the DFA.
* If the target state for the edge has not yet been computed
* or is otherwise not available, this method returns `null`.
* @param s The current DFA state
* @param t The next input symbol
* @return The existing target DFA state for the given input symbol [t],
* or `null` if the target state for this edge is not already cached
public open fun getExistingTargetState(s: DFAState, t: Int): DFAState? {
if (s.edges == null || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) {
return null
val target = s.edges!![t - MIN_DFA_EDGE]
if (debug && target != null) {
System.out.println("reuse state ${s.stateNumber} edge to ${target.stateNumber}")
return target
* Compute a target state for an edge in the DFA, and attempt to add the
* computed state and corresponding edge to the DFA.
* @param input The input stream
* @param s The current DFA state
* @param t The next input symbol
* @return The computed target DFA state for the given input symbol [t].
* If [t] does not lead to a valid DFA state, this method returns [ATNSimulator.ERROR]
public open fun computeTargetState(input: CharStream, s: DFAState, t: Int): DFAState {
val reach = OrderedATNConfigSet()
// If we don't find an existing DFA state
// Fill reach starting from closure, following t transitions
getReachableConfigSet(input, s.configs, reach, t)
if (reach.isEmpty()) {
// We got nowhere on t from s
if (!reach.hasSemanticContext) {
// We got nowhere on t, don't throw out this knowledge; it'd
// cause a failover from DFA later.
addDFAEdge(s, t, ERROR)
// Stop when we can't match any more char
return ERROR
// Add an edge from s to target DFA found/created for reach
return addDFAEdge(s, t, reach)
protected open fun failOrAccept(
prevAccept: SimState,
input: CharStream,
reach: ATNConfigSet,
t: Int,
): Int {
if (prevAccept.dfaState != null) {
val lexerActionExecutor = prevAccept.dfaState!!.lexerActionExecutor
input = input,
lexerActionExecutor = lexerActionExecutor,
startIndex = startIndex,
index = prevAccept.index,
line = prevAccept.line,
charPos = prevAccept.charPos,
return prevAccept.dfaState!!.prediction
// If no accept and EOF is first char, return EOF
if (t == IntStream.EOF && input.index() == startIndex) {
return Token.EOF
throw LexerNoViableAltException(recog!!, input, startIndex, reach)
* Given a starting configuration set, figure out all ATN configurations
* we can reach upon input [t].
* Parameter [reach] is a return parameter.
protected open fun getReachableConfigSet(input: CharStream, closure: ATNConfigSet, reach: ATNConfigSet, t: Int) {
// This is used to skip processing for configs which have a lower priority
// than a config that already reached an accept state for the same rule
for (c in closure) {
val currentAltReachedAcceptState = c.alt == skipAlt
if (currentAltReachedAcceptState && (c as LexerATNConfig).hasPassedThroughNonGreedyDecision()) {
if (debug) {
System.out.println("testing ${getTokenName(t)} at ${c.toString(recog, true)}")
val n = c.state.numberOfTransitions
for (ti in 0.. {
val ruleTransition = t as RuleTransition
val newContext = SingletonPredictionContext.create(config.context, ruleTransition.followState.stateNumber)
c = LexerATNConfig(config,, newContext)
Transition.PRECEDENCE -> throw UnsupportedOperationException("Precedence predicates are not supported in lexers.")
Transition.PREDICATE -> {
// Track traversing semantic predicates. If we traverse,
// we cannot add a DFA state for this "reach" computation
// because the DFA would not test the predicate again in the
// future. Rather than creating collections of semantic predicates
// like v3 and testing them on prediction, v4 will test them on the
// fly all the time using the ATN not the DFA. This is slower, but
// semantically it's not used that often. One of the key elements to
// this predicate mechanism is not adding DFA states that see
// predicates immediately afterward in the ATN. For example,
// a : ID {p1}? | ID {p2}? ;
// should create the start state for rule 'a' (to save start state
// competition), but should not create target of ID state. The
// collection of ATN states the following ID references includes
// states reached by traversing predicates. Since this is when we
// test them, we cannot cash the DFA state target of ID.
val pt = t as PredicateTransition
if (debug) {
System.out.println("EVAL rule " + pt.ruleIndex + ":" + pt.predIndex)
configs.hasSemanticContext = true
if (evaluatePredicate(input, pt.ruleIndex, pt.predIndex, speculative)) {
c = LexerATNConfig(config,
Transition.ACTION -> {
if (config.context == null || config.context!!.hasEmptyPath()) {
// Execute actions anywhere in the start rule for a token.
// TODO: if the entry rule is invoked recursively, some
// actions may be executed during the recursive call. The
// problem can appear when hasEmptyPath() is true but
// isEmpty() is false. In this case, the config needs to be
// split into two contexts - one with just the empty path
// and another with everything but the empty path.
// Unfortunately, the current algorithm does not allow
// getEpsilonTarget to return two configurations, so
// additional modifications are needed before we can support
// the split operation.
val lexerActionExecutor = LexerActionExecutor.append(
atn.lexerActions!![(t as ActionTransition).actionIndex],
c = LexerATNConfig(config,, lexerActionExecutor)
} else {
// Ignore actions in referenced rules
c = LexerATNConfig(config,
Transition.EPSILON -> {
c = LexerATNConfig(config,
Transition.SET -> {
if (treatEofAsEpsilon) {
if (t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) {
c = LexerATNConfig(config,
return c
* Evaluate a predicate specified in the lexer.
* If [speculative] is `true`, this method was called before
* [consume] for the matched character. This method should call
* [consume] before evaluating the predicate to ensure position
* sensitive values, including [Lexer.text], [Lexer.line],
* and [Lexer.charPositionInLine], properly reflect the current
* lexer state. This method should restore [input] and the simulator
* to the original state before returning (i.e., undo the actions made by the
* call to [consume]).
* @param input The input stream
* @param ruleIndex The rule containing the predicate
* @param predIndex The index of the predicate within the rule
* @param speculative `true` if the current index in [input] is
* one character before the predicate's location
* @return `true` if the specified predicate evaluates to `true`
protected open fun evaluatePredicate(
input: CharStream,
ruleIndex: Int,
predIndex: Int,
speculative: Boolean
): Boolean {
// Assume true if no recognizer was provided
if (recog == null) {
return true
if (!speculative) {
return recog.sempred(null, ruleIndex, predIndex)
val savedCharPositionInLine = charPositionInLine
val savedLine = line
val index = input.index()
val marker = input.mark()
try {
return recog.sempred(null, ruleIndex, predIndex)
} finally {
charPositionInLine = savedCharPositionInLine
line = savedLine
protected open fun captureSimState(settings: SimState, input: CharStream, dfaState: DFAState) {
settings.index = input.index()
settings.line = line
settings.charPos = charPositionInLine
settings.dfaState = dfaState
protected open fun addDFAEdge(from: DFAState, t: Int, q: ATNConfigSet): DFAState {
// Leading to this call, ATNConfigSet.hasSemanticContext is used as a
// marker indicating dynamic predicate evaluation makes this edge
// dependent on the specific input sequence, so the static edge in the
// DFA should be omitted. The target DFAState is still created since
// execATN has the ability to resynchronize with the DFA state cache
// following the predicate evaluation step.
// TJP notes: next time through the DFA, we see a pred again and eval.
// If that gets us to a previously created (but dangling) DFA
// state, we can continue in pure DFA mode from there.
val suppressEdge = q.hasSemanticContext
q.hasSemanticContext = false
val to = addDFAState(q)
if (suppressEdge) {
return to
addDFAEdge(from, t, to)
return to
protected open fun addDFAEdge(p: DFAState, t: Int, q: DFAState) {
if (t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) {
// Only track edges within the DFA bounds
if (debug) {
System.out.println("EDGE $p -> $q upon ${t.toChar()}")
synchronized(p) {
if (p.edges == null) {
// Make room for tokens 1..n and -1 masquerading as index 0
p.edges = arrayOfNulls(MAX_DFA_EDGE - MIN_DFA_EDGE + 1)
// Connect
p.edges!![t - MIN_DFA_EDGE] = q
* Add a new DFA state if there isn't one with this set of
* configurations already.
* This method also detects the first configuration containing
* an ATN rule stop state. Later, when traversing the DFA,
* we will know which rule to accept.
protected open fun addDFAState(configs: ATNConfigSet): DFAState {
// The lexer evaluates predicates on-the-fly; by this point configs
// should not contain any configurations with unevaluated predicates
val proposed = DFAState(configs)
var firstConfigWithRuleStopState: ATNConfig? = null
for (c in configs) {
if (c.state is RuleStopState) {
firstConfigWithRuleStopState = c
if (firstConfigWithRuleStopState != null) {
proposed.isAcceptState = true
proposed.lexerActionExecutor = (firstConfigWithRuleStopState as LexerATNConfig).lexerActionExecutor
proposed.prediction = atn.ruleToTokenType!![firstConfigWithRuleStopState.state.ruleIndex]
val dfa = decisionToDFA[mode]
synchronized(dfa.states) {
val existing = dfa.states[proposed]
if (existing != null) {
return existing
proposed.stateNumber = dfa.states.size
configs.isReadonly = true
proposed.configs = configs
dfa.states[proposed] = proposed
return proposed
public open fun getDFA(mode: Int): DFA =
* Get the text matched so far for the current token.
public open fun getText(input: CharStream): String =
// Index is first lookahead char, don't include
input.getText(Interval.of(startIndex, input.index() - 1))
public open fun consume(input: CharStream) {
val curChar = input.LA(1)
if (curChar.toChar() == '\n') {
charPositionInLine = 0
} else {
public open fun getTokenName(t: Int): String =
if (t == -1) {
} else {