Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2017-present Strumenta and contributors, licensed under Apache 2.0.
// Copyright 2024-present Strumenta and contributors, licensed under BSD 3-Clause.
package org.antlr.v4.kotlinruntime
import com.strumenta.antlrkotlin.runtime.System
import org.antlr.v4.kotlinruntime.atn.LexerATNSimulator
import org.antlr.v4.kotlinruntime.misc.IntegerStack
import org.antlr.v4.kotlinruntime.misc.Interval
/**
* A lexer is a recognizer that draws input symbols from a character stream.
*
* Lexer grammars results in a subclass of this object.
*
* A Lexer object uses simplified `match()` and error recovery mechanisms
* in the interest of speed.
*/
@Suppress("MemberVisibilityCanBePrivate", "PropertyName")
public abstract class Lexer(input: CharStream) : Recognizer(), TokenSource {
public companion object {
public const val DEFAULT_MODE: Int = 0
public const val MORE: Int = -2
public const val SKIP: Int = -3
public const val DEFAULT_TOKEN_CHANNEL: Int = Token.DEFAULT_CHANNEL
public const val HIDDEN: Int = Token.HIDDEN_CHANNEL
public const val MIN_CHAR_VALUE: Int = 0x0000
public const val MAX_CHAR_VALUE: Int = 0x10FFFF
}
protected var _input: CharStream = input
@Suppress("LeakingThis")
protected var _tokenFactorySourcePair: Pair =
Pair(this, input)
public override var inputStream: CharStream
get() = _input
set(value) {
_input = DummyCharStream
_tokenFactorySourcePair = Pair(this, null)
reset()
_input = value
_tokenFactorySourcePair = Pair(this, _input)
}
/**
* How to create token objects.
*/
override var tokenFactory: TokenFactory = CommonTokenFactory.DEFAULT
/**
* The goal of all lexer rules/methods is to create a token object.
* This is an instance variable as multiple rules may collaborate to
* create a single token.
*
* [nextToken] will return this object after matching lexer rule(s).
*
* If you subclass to allow multiple token emissions, then set this
* to the last token to be matched or something non-`null` so that
* the auto token emit mechanism will not emit another token.
*/
public open var token: Token? = null
/**
* What character index in the stream did the current token start at?
*
* Needed, for example, to get the text for current token.
*
* Set at the start of [nextToken].
*/
public var _tokenStartCharIndex: Int = -1
/**
* The line on which the first character of the token resides.
*/
public var _tokenStartLine: Int = 0
/**
* The character position of first character within the line.
*/
public var _tokenStartCharPositionInLine: Int = 0
/**
* Once we see `EOF` on char stream, next token will be `EOF`.
*
* If you have `DONE : EOF ;` then you see `DONE EOF`.
*/
public var _hitEOF: Boolean = false
/**
* The channel number for the current token.
*/
public var channel: Int = 0
/**
* The token type for the current token.
*/
public var type: Int = 0
public val _modeStack: IntegerStack = IntegerStack()
public var _mode: Int = DEFAULT_MODE
/**
* You can set the text for the current token to override what is in
* the input char buffer.
*
* Use [text] `= ...`, or you can set this instance variable directly.
*/
public var _text: String? = null
override val sourceName: String
get() = _input.sourceName
override var line: Int
get() = interpreter.line
set(line) {
interpreter.line = line
}
override var charPositionInLine: Int
get() = interpreter.charPositionInLine
set(charPositionInLine) {
interpreter.charPositionInLine = charPositionInLine
}
/**
* What is the index of the current character of lookahead?
*/
public val charIndex: Int
get() = _input.index()
/**
* The text matched so far for the current token or any text override.
*/
public var text: String
get() = _text ?: interpreter.getText(_input)
set(text) {
this._text = text
}
public open val channelNames: Array =
emptyArray()
public open val modeNames: Array =
emptyArray()
/**
* Used to print out token names like ID during debugging and error reporting.
*
* The generated parsers implement a method that overrides this
* to point to their `Array` [tokenNames].
*/
@Deprecated("Use vocabulary instead", ReplaceWith("vocabulary"))
override val tokenNames: Array =
emptyArray()
/**
* Return a list of all [Token] objects in input char stream.
*
* Forces load of all tokens. Does not include `EOF` token.
*/
public val allTokens: List
get() {
val tokens = ArrayList()
var t = nextToken()
while (t.type != Token.EOF) {
tokens.add(t)
t = nextToken()
}
return tokens
}
public open fun reset() {
// Wack Lexer state variables.
// Rewind the input
_input.seek(0)
token = null
type = Token.INVALID_TYPE
channel = Token.DEFAULT_CHANNEL
_tokenStartCharIndex = -1
_tokenStartCharPositionInLine = -1
_tokenStartLine = -1
_text = null
_hitEOF = false
_mode = DEFAULT_MODE
_modeStack.clear()
// TODO(Edoardo): 'interpreter' is abstract and might be initialized
// only after this method has been called for the first time
// (e.g., at instance construction time), so here we explicitly
// 'reset()' it only if it's not null.
// We might want to find a better way to initialize 'interpreter'
@Suppress("UNNECESSARY_SAFE_CALL")
interpreter?.reset()
}
/**
* Return a token from this source, i.e., match a token on the char stream.
*/
override fun nextToken(): Token {
// Mark start location in char stream so unbuffered streams are
// guaranteed at least have text of current token
val tokenStartMarker = _input.mark()
try {
outer@ while (true) {
if (_hitEOF) {
emitEOF()
return token!!
}
token = null
channel = Token.DEFAULT_CHANNEL
_tokenStartCharIndex = _input.index()
_tokenStartCharPositionInLine = interpreter.charPositionInLine
_tokenStartLine = interpreter.line
_text = null
do {
type = Token.INVALID_TYPE
var ttype: Int
try {
ttype = interpreter.match(_input, _mode)
} catch (e: LexerNoViableAltException) {
// Report error
notifyListeners(e)
recover(e)
ttype = SKIP
}
if (_input.LA(1) == IntStream.EOF) {
_hitEOF = true
}
if (type == Token.INVALID_TYPE) {
type = ttype
}
if (type == SKIP) {
continue@outer
}
} while (type == MORE)
if (token == null) {
emit()
}
return token!!
}
} finally {
// Make sure we release marker after match or
// unbuffered char stream will keep buffering
_input.release(tokenStartMarker)
}
}
/**
* Instruct the lexer to skip creating a token for the current
* lexer rule and look for another token.
*
* [nextToken] knows to keep looking when a lexer rule finishes
* with token set to [SKIP].
*
* Recall that if `token == null` at end of any token rule,
* it creates one for you and emits it.
*/
public open fun skip() {
type = SKIP
}
public open fun more() {
type = MORE
}
public open fun mode(m: Int) {
_mode = m
}
public open fun pushMode(m: Int) {
if (LexerATNSimulator.debug) {
System.out.println("pushMode $m")
}
_modeStack.push(_mode)
mode(m)
}
public open fun popMode(): Int {
if (_modeStack.isEmpty) {
throw EmptyStackException()
}
if (LexerATNSimulator.debug) {
System.out.println("popMode back to ${_modeStack.peek()}")
}
mode(_modeStack.pop())
return _mode
}
/**
* By default, does not support multiple emits per [nextToken] invocation
* for efficiency reasons.
*
* Subclass and override this method, [nextToken] and [token]
* (to push tokens into a list and pull from that list rather
* than a single variable as this implementation does).
*/
public open fun emit(token: Token) {
this.token = token
}
/**
* The standard method called to automatically emit a token at the
* outermost lexical rule.
*
* The token object should point into the char buffer `start..stop`.
* If there is a text override in [text], use that to set the token's text.
*
* Override this method to emit custom [Token] objects or provide a new factory.
*/
public open fun emit(): Token {
val t = tokenFactory.create(
_tokenFactorySourcePair,
type,
_text,
channel,
_tokenStartCharIndex,
charIndex - 1,
_tokenStartLine,
_tokenStartCharPositionInLine,
)
emit(t)
return t
}
public open fun emitEOF(): Token {
val cPos = charPositionInLine
val eof = tokenFactory.create(
_tokenFactorySourcePair,
Token.EOF,
null,
Token.DEFAULT_CHANNEL,
_input.index(),
_input.index() - 1,
line,
cPos,
)
emit(eof)
return eof
}
public open fun recover(e: LexerNoViableAltException) {
if (_input.LA(1) != IntStream.EOF) {
// Skip a char and try again
interpreter.consume(_input)
}
}
public open fun notifyListeners(e: LexerNoViableAltException) {
val text = _input.getText(Interval.of(_tokenStartCharIndex, _input.index()))
val msg = "token recognition error at: '${getErrorDisplay(text)}'"
errorListenerDispatch.syntaxError(this, null, _tokenStartLine, _tokenStartCharPositionInLine, msg, e)
}
public open fun getErrorDisplay(s: String): String {
val buf = StringBuilder()
for (c in s) {
buf.append(getErrorDisplay(c.code))
}
return buf.toString()
}
public open fun getErrorDisplay(c: Int): String =
when (c) {
Token.EOF -> ""
'\n'.code -> "\\n"
'\t'.code -> "\\t"
'\r'.code -> "\\r"
else -> c.toChar().toString()
}
public open fun getCharErrorDisplay(c: Int): String {
val s = getErrorDisplay(c)
return "'$s'"
}
/**
* Lexers can normally match any char in their vocabulary after matching
* a token, so do the easy thing and just kill a character and hope
* it all works out.
*
* You can instead use the rule invocation stack to do sophisticated
* error recovery if you are in a fragment rule.
*/
public open fun recover(re: RecognitionException) {
// TODO: do we lose character or line position information?
_input.consume()
}
}