commonMain.piacenti.dslmaker.dsl.antlr.AntlrCompletionSuggester.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dsl-maker-js Show documentation
Kotlin multiplatform library to facilitate creation of DSLs with ANTLR or a simple built in parser
There is a newer version: 1.1.55
package piacenti.dslmaker.dsl.antlr

import org.antlr.v4.kotlinruntime.CommonTokenStream
import org.antlr.v4.kotlinruntime.Parser
import org.antlr.v4.kotlinruntime.Token
import org.antlr.v4.kotlinruntime.atn.AtomTransition
import org.antlr.v4.kotlinruntime.atn.BasicBlockStartState
import org.antlr.v4.kotlinruntime.atn.BasicState
import org.antlr.v4.kotlinruntime.atn.BlockEndState
import org.antlr.v4.kotlinruntime.atn.EpsilonTransition
import org.antlr.v4.kotlinruntime.atn.LoopEndState
import org.antlr.v4.kotlinruntime.atn.PlusBlockStartState
import org.antlr.v4.kotlinruntime.atn.PlusLoopbackState
import org.antlr.v4.kotlinruntime.atn.RuleStartState
import org.antlr.v4.kotlinruntime.atn.RuleStopState
import org.antlr.v4.kotlinruntime.atn.RuleTransition
import org.antlr.v4.kotlinruntime.atn.SetTransition
import org.antlr.v4.kotlinruntime.atn.StarBlockStartState
import org.antlr.v4.kotlinruntime.atn.StarLoopEntryState
import org.antlr.v4.kotlinruntime.atn.StarLoopbackState
import org.antlr.v4.kotlinruntime.atn.Transition
import piacenti.dslmaker.pop
data class ExpectationData(val id:Int, val ruleStack:List)
data class AntlrCompletionData(val tokens: Set, val rules: Set, val highestTokenIndexReached: Int, val highestTextIndexReached: Int)
class AntlrCompletionSuggester {
    fun suggestCompletions(parser: Parser): AntlrCompletionData {
        val tokenStream = parser.tokenStream!! as CommonTokenStream
        val allDefaultChannelTokenTypes = mutableListOf()
        val allTokens = mutableListOf()
        for (i in 0 until tokenStream.size()) {
            allTokens.add(tokenStream[i])
        }
        val allDefaultChannelTokens = allTokens.filter { it.channel == 0 }
        allDefaultChannelTokenTypes.addAll(allDefaultChannelTokens.map { it.type })
        //inject a fake token to replace EOF to make sure nothing will match it
        //so that we can exhaustively search for all rules that attempt to match at index
        allDefaultChannelTokenTypes[allDefaultChannelTokenTypes.lastIndex] = -10
        val startRuleState = parser.atn.ruleToStartState!![0]!!

        val result = runSimulation(parser, startRuleState, allDefaultChannelTokenTypes)
        //stop text index of highest reached normal token
        //we just wont the stopIndex of whatever came before the highest index token because it means it got matched

        //we don't want the fake token or EOF, just whatever came before it
        val tokenIndex = allDefaultChannelTokens.getOrNull(result.highestTokenIndexReached - 1)?.tokenIndex
        val highestReachedTextIndex = if (tokenIndex != null) {
            val lastHiddenTokenToRight = tokenStream.getHiddenTokensToRight(tokenIndex)?.lastOrNull()
            lastHiddenTokenToRight?.stopIndex
                    ?: allDefaultChannelTokens.getOrNull(result.highestTokenIndexReached - 1)?.stopIndex ?: 0
        } else 0

        return result.copy(highestTextIndexReached = highestReachedTextIndex)
    }

    data class Rule(val ruleIndex: Int, val pathStack: MutableList = mutableListOf()/*,
                    val pathsToFailure: MutableList> = mutableListOf(),
                    val pathToMatch: MutableList = mutableListOf()*/) {
        val statesSeen = mutableSetOf()
        val startTokenIndex: Int = pathStack.first().tokenIndex
        /*val stopTokenIndex: Int
            get() = pathStack.last().tokenIndex*/
    }

    enum class TransitionType {
        REGULAR, FOLLOWSET
    }

    data class RulePath(val iterator: Iterator,
                        var tokenIndex: Int,
                        val statesPathOfMatch: MutableList, //should be copy of previous ATNState
                        val pathOfMatch: MutableList,/*should be copy of previous path state
                        should be specific for each branch*/
                        val tokensMatched: MutableList,
                        val transitionType: TransitionType = TransitionType.REGULAR) {
        val startState = statesPathOfMatch.first()
        val stopState = statesPathOfMatch.last()
    }

    data class StateIndexPair(val stateNumber: Int, val index: Int)

    private fun runSimulation(parser: Parser, startRuleState: RuleStartState, allTokens: MutableList): AntlrCompletionData {
        var iterationCount = 0;
        var highestReachedIndex = 0
        val expectedTokens = mutableSetOf()
        val expectedRules = mutableSetOf()
        val ruleStack = mutableListOf()
        ruleStack.add(Rule(startRuleState.ruleIndex, mutableListOf(RulePath(startRuleState.transitions.iterator(), 0,
                mutableListOf(startRuleState.stateNumber), mutableListOf(), mutableListOf()))))
        val stateIndexPairs = mutableSetOf()
        while (ruleStack.isNotEmpty()) {
            val currentRule = ruleStack.last()
            val branchStack = currentRule.pathStack
            if (branchStack.isEmpty()) {
                val failedRule = ruleStack.pop()
                //rule failed if all branches were traversed and it is still not matched (if it was matched it would
                //have been popped in the state processing). We remove it here
                if (ruleStack.isNotEmpty())
                    ruleStack.last().pathStack.pop()
                if (startingRulePointMatchesEndToken(ruleStack, failedRule, highestReachedIndex))
                    expectedRules.add(ExpectationData(failedRule.ruleIndex, ruleStack.map { it.ruleIndex }))
                continue
            }
            inner@ while (branchStack.isNotEmpty()) {
                var succeededTransition = true
                val currentBranch = branchStack.last()
                if (currentBranch.iterator.hasNext()) {
                    val transition = currentBranch.iterator.next()
                    val target = transition.target!!
                    val statesPathOfMatch = currentBranch.statesPathOfMatch.toMutableList()
                    val pathOfMatch = currentBranch.pathOfMatch.toMutableList()
                    val tokensMatched = currentBranch.tokensMatched.toMutableList()
                    statesPathOfMatch.add(target.stateNumber)
                    currentRule.statesSeen.add(target.stateNumber)
                    val startState = parser.atn.states[currentBranch.startState]!!
                    //transitions deal with path stack
                    var currentTokenIndex = currentBranch.tokenIndex
                    val stateIndexPair = StateIndexPair(target.stateNumber, currentTokenIndex)
                    if (!stateIndexPairs.contains(stateIndexPair)) {
//                        iterationCount++
//                        if (currentTokenIndex >= highestReachedIndex)
//                            println("iteration $iterationCount rule name '${parser.ruleNames!![currentRule.ruleIndex]}' index $highestReachedIndex state ${target.stateNumber}")
                        when (transition) {
                            is EpsilonTransition -> {
                                branchStack.add(RulePath(target.transitions.iterator(),
                                        currentTokenIndex,
                                        statesPathOfMatch, pathOfMatch, tokensMatched))
                            }
                            is RuleTransition -> {
                                //fabricate epsilon transition to follow state
                                val fabricatedTransition = EpsilonTransition(transition.followState)
                                branchStack.add(RulePath(listOf(fabricatedTransition).iterator(),
                                        currentTokenIndex,
                                        statesPathOfMatch, pathOfMatch, tokensMatched, TransitionType.FOLLOWSET))
                                pathOfMatch.add(transition.ruleIndex)
                            }
                            is AtomTransition -> {
                                val currentToken = allTokens[currentTokenIndex]
                                val expectedToken = transition.label
                                if (currentToken == -10) {
                                    //branch completed so clear states for other branches
                                    currentRule.statesSeen.clear()
                                    expectedTokens.add(ExpectationData(expectedToken, ruleStack.map { it.ruleIndex }))
                                    succeededTransition = false
                                } else if (expectedToken == currentToken) {
                                    tokensMatched.add(currentToken)
                                    //if we advanced the input we can allow ourselves to revisit states
                                    currentRule.statesSeen.clear()
                                    currentTokenIndex++
                                    if (currentTokenIndex > highestReachedIndex) {
//                                    println("matched token $currentToken")
                                        highestReachedIndex = currentTokenIndex
                                        expectedTokens.clear()
                                        expectedRules.clear()
                                    }
                                    branchStack.add(RulePath(target.transitions.iterator(),
                                            currentTokenIndex,
                                            statesPathOfMatch, pathOfMatch, tokensMatched))
                                } else if (currentTokenIndex == highestReachedIndex) {
                                    expectedTokens.add(ExpectationData(expectedToken, ruleStack.map { it.ruleIndex }))
                                    succeededTransition = false
                                }
                            }
                            is SetTransition -> {
                                val currentToken = allTokens[currentTokenIndex]
                                val expectedRuleTokens = transition.set.toList()
                                if (currentToken == -10) {
                                    //branch completed so clear states for other branches
                                    currentRule.statesSeen.clear()
                                    val currentStack = ruleStack.map { it.ruleIndex }
                                    expectedTokens.addAll(expectedRuleTokens.map { ExpectationData(it, currentStack) })
                                    succeededTransition = false
                                } else if (expectedRuleTokens.contains(currentToken)) {
                                    tokensMatched.add(currentToken)
                                    //if we advanced the input we can allow ourselves to revisit states
                                    currentRule.statesSeen.clear()
                                    currentTokenIndex++
                                    if (currentTokenIndex > highestReachedIndex) {
//                                    println("matched token $currentToken")
                                        highestReachedIndex = currentTokenIndex
                                        expectedTokens.clear()
                                        expectedRules.clear()
                                    }
                                    branchStack.add(RulePath(target.transitions.iterator(),
                                            currentTokenIndex,
                                            statesPathOfMatch, pathOfMatch, tokensMatched))
                                } else if (currentTokenIndex == highestReachedIndex) {
                                    val currentStack = ruleStack.map { it.ruleIndex }
                                    expectedTokens.addAll(expectedRuleTokens.map { ExpectationData(it, currentStack) })
                                    succeededTransition = false
                                }
                            }
                            else -> throw UnsupportedOperationException(target::class.simpleName)
                        }
                        //no point in processing target of transition if it fails
                        if (succeededTransition) {
                            //state deals with matching of a rule
                            when (target) {
                                /*
                        I believe that there may issues in case I use rules with PlusBlockStartState and StarLoopEntryState
                        since those are simply branching blocks rather than specifying that a new rule will be parsed.
                        It seems that RuleStartState is still the for sure way to determine when to add something to the stack.
                        For now I'm leaving the others around since I got them from someone else's code that processed them together.
                        My approach may very well be different than theirs.
                        for reference see https://www.antlr.org/api/Java/org/antlr/v4/runtime/atn/ATNState.html
                        * */
                                is RuleStartState -> {
//                                println("adding rule ${parser.ruleNames!![target.ruleIndex]}")
                                    ruleStack.add(Rule(target.ruleIndex, mutableListOf(RulePath(target.transitions.iterator(),
                                            currentTokenIndex,
                                            mutableListOf(target.stateNumber), mutableListOf(), mutableListOf()))))
                                    break@inner
                                }
                                is RuleStopState -> {
                                    if (startState is RuleStartState && startState.stopState == target) {
                                        val matched = ruleStack.pop()
                                        //since fully matched clear all seen states for this rule
                                        // in case this rule is matched from a different context for the same index
                                        //this also clears it for other indexes since we don't quite keep an accurate
                                        //representation of each state to index specially since we update the tokenIndex
                                        //of a parent rule once a child one matches. This will result in additional
                                        //computations but should still avoid infinite loops
                                        matched.pathStack.last().statesPathOfMatch.forEach { stateNumber ->
                                            stateIndexPairs.removeAll { it.stateNumber == stateNumber }
                                        }
                                        updateParentLastTokenParsed(ruleStack, matched, currentTokenIndex)
                                        break@inner
                                    }
                                }
                                is BasicState,
                                is LoopEndState,
                                is BasicBlockStartState,
                                is BlockEndState,
                                is StarLoopbackState,
                                is PlusLoopbackState,
                                is StarBlockStartState,
                                is PlusBlockStartState,
                                is StarLoopEntryState -> {
                                    //ignore
                                }
                                else -> throw UnsupportedOperationException(target::class.simpleName)
                            }
                            stateIndexPairs.add(stateIndexPair)
                        }
                    }

                } else {
                    //failed branch
                    //if current branch has no more iterations left then it failed
                    val failedBranch = branchStack.pop()
//                    currentRule.pathsToFailure.add(failedBranch.pathOfMatch)
                }
            }

        }
        return AntlrCompletionData(expectedTokens, expectedRules, highestReachedIndex, 0)
    }

    private fun startingRulePointMatchesEndToken(ruleStack: List, failedRule: Rule, highestReachedIndex: Int): Boolean {
        var last = failedRule
        //in case of recursive rule injection we go to earliest of consecutive entries in the stack to determine if
        //rule should be added
        for (i in ruleStack.lastIndex.downTo(0)) {
            val current = ruleStack[i]
            if (current.ruleIndex == last.ruleIndex)
                last = current
            else
                break
        }
        return last.startTokenIndex == highestReachedIndex

    }

    private fun updateParentLastTokenParsed(ruleStack: List, matched: Rule, currentTokenIndex: Int) {
        ruleStack.lastOrNull()?.let { toUpdate ->
            if (toUpdate.pathStack.last().tokenIndex < currentTokenIndex) {
                toUpdate.pathStack.last().tokenIndex = currentTokenIndex
                toUpdate.pathStack.last().tokensMatched.addAll(matched.pathStack.last().tokensMatched)
            }
        }
    }
}