commonMain.org.antlr.v4.kotlinruntime.BufferedTokenStream.kt Maven / Gradle / Ivy
The newest version!
// Copyright 2017-present Strumenta and contributors, licensed under Apache 2.0.
// Copyright 2024-present Strumenta and contributors, licensed under BSD 3-Clause.
package org.antlr.v4.kotlinruntime
import com.strumenta.antlrkotlin.runtime.assert
import org.antlr.v4.kotlinruntime.misc.Interval
/**
* This implementation of [TokenStream] loads tokens from a
* [TokenSource] on-demand, and places the tokens in a buffer to provide
* access to any previous token by index.
*
* This token stream ignores the value of [Token.channel].
* If your parser requires the token stream filter tokens to only
* those on a particular channel, such as [Token.DEFAULT_CHANNEL] or
* [Token.HIDDEN_CHANNEL], use a filtering token stream such a [CommonTokenStream].
*
* @param tokenSource The [TokenSource] from which tokens for this stream are fetched
*/
@Suppress("MemberVisibilityCanBePrivate")
public open class BufferedTokenStream(tokenSource: TokenSource) : TokenStream {
/**
* A collection of all tokens fetched from the token source.
*
* The list is considered a complete view of the input once [fetchedEOF]
* is set to `true`.
*/
public var tokens: MutableList = ArrayList(100)
/**
* The index into [tokens] of the current token (next token to [consume]).
* `tokens[p]` should be `LT(1)`.
*
* This field is set to `-1` when the stream is first constructed or when
* [tokenSource] is assigned, indicating that the first token has
* not yet been fetched from the token source.
*
* For additional information, see the documentation of [IntStream]
* for a description of *initializing mMethods*.
*/
protected var p: Int = -1
/**
* Indicates whether the [Token.EOF] token has been fetched from
* [tokenSource] and added to [tokens].
*
* This field improves performance for the following cases:
*
* - [consume]: the lookahead check in [consume] to prevent
* consuming the `EOF` symbol is optimized by checking the
* values of [fetchedEOF] and [p] instead of calling [LA]
* - [fetch]: the check to prevent adding multiple `EOF` symbols
* into [tokens] is trivial with this field
*/
protected var fetchedEOF: Boolean = false
override var tokenSource: TokenSource = tokenSource
set(value) {
field = value
tokens.clear()
p = -1
fetchedEOF = false
}
override val sourceName: String
get() = tokenSource.sourceName
/**
* Get the text of all tokens in this buffer.
*/
override val text: String
get() = getText(Interval.of(0, size() - 1))
override fun index(): Int =
p
override fun mark(): Int =
0
override fun release(marker: Int) {
// No resources to release
}
override fun seek(index: Int) {
lazyInit()
p = adjustSeekIndex(index)
}
override fun size(): Int =
tokens.size
override fun consume() {
val skipEofCheck =
if (p >= 0) {
if (fetchedEOF) {
// The last token in tokens is EOF.
// Skip check if p indexes any fetched token except the last
p < tokens.size - 1
} else {
// No EOF token in tokens. Skip check if p indexes a fetched token.
p < tokens.size
}
} else {
// Not yet initialized
false
}
if (!skipEofCheck && LA(1) == IntStream.EOF) {
throw IllegalStateException("cannot consume EOF")
}
if (sync(p + 1)) {
p = adjustSeekIndex(p + 1)
}
}
/**
* Make sure index [i] in tokens has a token.
*
* @return `true` if a token is located at index [i], otherwise `false`
*/
protected fun sync(i: Int): Boolean {
assert(i >= 0)
val n = i - tokens.size + 1 // How many more elements we need?
if (n > 0) {
val fetched = fetch(n)
return fetched >= n
}
return true
}
/**
* Add [n] elements to buffer.
*
* @return The actual number of elements added to the buffer
*/
protected fun fetch(n: Int): Int {
if (fetchedEOF) {
return 0
}
for (i in 0..= tokens.size) {
throw IndexOutOfBoundsException("token index $index out of range 0..${tokens.size - 1}")
}
return tokens[index]
}
/**
* Get all tokens from [start]..[stop] inclusively.
*/
public operator fun get(start: Int, stop: Int): List? {
if (start < 0 || stop < 0) {
return null
}
lazyInit()
val subset = ArrayList()
var tempStop = stop
if (tempStop >= tokens.size) {
tempStop = tokens.size - 1
}
for (i in start..tempStop) {
val t = tokens[i]
if (t.type == Token.EOF) {
break
}
subset.add(t)
}
return subset
}
override fun LA(i: Int): Int =
LT(i)!!.type
@Suppress("FunctionName")
protected open fun LB(k: Int): Token? =
if (p - k < 0) {
null
} else {
tokens[p - k]
}
override fun LT(k: Int): Token? {
lazyInit()
if (k == 0) {
return null
}
if (k < 0) {
return LB(-k)
}
val i = p + k - 1
sync(i)
return if (i >= tokens.size) {
// Return EOF token. EOF must be last token
tokens[tokens.size - 1]
} else {
tokens[i]
}
}
/**
* Allow derived classes to modify the behavior of operations which change
* the current stream position by adjusting the target token index of a seek
* operation.
*
* The default implementation simply returns [i].
*
* If an exception is thrown in this method, the current stream index
* should not be changed.
*
* For example, [CommonTokenStream] overrides this method to ensure that
* the seek target is always an on-channel token.
*
* @param i The target token index
* @return The adjusted target token index
*/
protected open fun adjustSeekIndex(i: Int): Int =
i
protected fun lazyInit() {
if (p == -1) {
setup()
}
}
protected fun setup() {
sync(0)
p = adjustSeekIndex(0)
}
/**
* Given a [start] and [stop] index, return a list of all tokens
* in the token type `BitSet`.
*
* Return `null` if no tokens were found.
*
* This method looks at both on and off channel tokens.
*/
public fun getTokens(start: Int, stop: Int, types: Set? = null): List? {
lazyInit()
if (start < 0 || stop >= tokens.size || stop < 0 || start >= tokens.size) {
throw IndexOutOfBoundsException("start $start or stop $stop not in 0..${tokens.size - 1}")
}
if (start > stop) {
return null
}
// list = tokens[start:stop]:{T t, t.getType() in types}
val filteredTokens = ArrayList()
for (i in start..stop) {
val t = tokens[i]
if (types == null || types.contains(t.type)) {
filteredTokens.add(t)
}
}
return filteredTokens.ifEmpty { null }
}
public fun getTokens(start: Int, stop: Int, ttype: Int): List? {
val s = HashSet(ttype)
s.add(ttype)
return getTokens(start, stop, s)
}
/**
* Given a starting index, return the index of the next token on channel.
*
* Return [i] if `tokens[i]` is on channel.
*
* Return the index of the `EOF` token if there are no tokens on channel
* between [i] and `EOF`.
*/
protected fun nextTokenOnChannel(i: Int, channel: Int): Int {
sync(i)
if (i >= size()) {
return size() - 1
}
var token = tokens[i]
var ii = i
while (token.channel != channel) {
if (token.type == Token.EOF) {
return ii
}
ii++
sync(ii)
token = tokens[ii]
}
return ii
}
/**
* Given a starting index, return the index of the previous token on channel.
*
* Return [i] if `tokens[i]` is on channel.
*
* Return `-1` if there are no tokens on channel between [i] and `0`.
*
* If [i] specifies an index at or after the `EOF` token, the `EOF` token
* index is returned. This is due to the fact that the `EOF` token is treated
* as though it were on every channel.
*/
protected fun previousTokenOnChannel(i: Int, channel: Int): Int {
sync(i)
if (i >= size()) {
// The EOF token is on every channel
return size() - 1
}
var ii = i
while (ii >= 0) {
val token = tokens[ii]
if (token.type == Token.EOF || token.channel == channel) {
return ii
}
ii--
}
return ii
}
/**
* Collect all tokens on specified channel to the right of the current token
* up until we see a token on [Lexer.DEFAULT_TOKEN_CHANNEL] or `EOF`.
*
* If channel is `-1`, find any non default channel token.
*/
public fun getHiddenTokensToRight(tokenIndex: Int, channel: Int = -1): List? {
lazyInit()
if (tokenIndex < 0 || tokenIndex >= tokens.size) {
throw IndexOutOfBoundsException("$tokenIndex not in 0..${tokens.size - 1}")
}
val nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL)
val from = tokenIndex + 1
// If none on channel to right, nextOnChannel=-1 so set to = last token
val to = if (nextOnChannel == -1) {
size() - 1
} else {
nextOnChannel
}
return filterForChannel(from, to, channel)
}
/**
* Collect all tokens on specified channel to the left of
* the current token up until we see a token on [Lexer.DEFAULT_TOKEN_CHANNEL].
*
* If channel is `-1`, find any non default channel token.
*/
public fun getHiddenTokensToLeft(tokenIndex: Int, channel: Int = -1): List? {
lazyInit()
if (tokenIndex < 0 || tokenIndex >= tokens.size) {
throw IndexOutOfBoundsException("$tokenIndex not in 0..${tokens.size - 1}")
}
if (tokenIndex == 0) {
// Obviously no tokens can appear before the first token
return null
}
val prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL)
if (prevOnChannel == tokenIndex - 1) {
return null
}
// If none on channel to left, prevOnChannel=-1 then from=0
val from = prevOnChannel + 1
val to = tokenIndex - 1
return filterForChannel(from, to, channel)
}
protected fun filterForChannel(from: Int, to: Int, channel: Int): List? {
val hidden = ArrayList()
for (i in from..to) {
val t = tokens[i]
if (channel == -1) {
if (t.channel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden.add(t)
} else {
if (t.channel == channel) hidden.add(t)
}
}
return if (hidden.size == 0) {
null
} else {
hidden
}
}
override fun getText(interval: Interval): String {
val start = interval.a
var stop = interval.b
if (start < 0 || stop < 0) {
return ""
}
sync(stop)
if (stop >= tokens.size) {
stop = tokens.size - 1
}
val buf = StringBuilder()
for (i in start..stop) {
val t = tokens[i]
if (t.type == Token.EOF) {
break
}
buf.append(t.text)
}
return buf.toString()
}
override fun getText(ctx: RuleContext): String =
getText(ctx.sourceInterval)
override fun getText(start: Token?, stop: Token?): String? =
if (start != null && stop != null) {
getText(Interval.of(start.tokenIndex, stop.tokenIndex))
} else {
""
}
/**
* Get all tokens from lexer until `EOF`.
*/
public fun fill() {
lazyInit()
val blockSize = 1000
while (true) {
val fetched = fetch(blockSize)
if (fetched < blockSize) {
return
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy