commonMain.piacenti.dslmaker.ExpressionMatcher2.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dsl-maker-js Show documentation
Show all versions of dsl-maker-js Show documentation
Kotlin multiplatform library to facilitate creation of DSLs with ANTLR or a simple built in parser
package piacenti.dslmaker
import piacenti.dslmaker.ExpressionMatcher2.MatchStatus.*
import piacenti.dslmaker.abstraction.ProductionStep
import piacenti.dslmaker.errors.ParserException
import piacenti.dslmaker.structures.*
import piacenti.dslmaker.structures.strategies.pretokenizestrategy.Token
import piacenti.dslmaker.structures.strategies.pretokenizestrategy.Tokenizer
/**
* @param
* @author Piacenti
*/
open class ExpressionMatcher2(var tokenize: Boolean = false) {
var trackData: TrackData = TrackData()
data class TrackData(
var highestSuccessfulIndex: Int = 0,
val fullPath: MutableList> = mutableListOf(),
val expectedTokenNotMatched: MutableList = mutableListOf(),
val possibleValidTokensIfContinuingParsing: MutableList = mutableListOf(),
val matchCache: MutableMap = mutableMapOf()
)
fun match(grammar: Grammar<*>, text: String, matchAllTokens: Boolean,
startIndex: Int = 0): ASTNode {
trackData = TrackData()
val result: ASTNode
val inputSequence = getInputSequence(text, grammar)
try {
result = findPath(startIndex, inputSequence, grammar, text)
finalizeAST(result, text, inputSequence)
if (result.children.isEmpty() || (nothingRemainingToBeParsed(text, result, inputSequence) && matchAllTokens)) {
throw ParserException("")
}
trackData.expectedTokenNotMatched.clear()
return result
} catch (e: ParserException) {
throw throwException(inputSequence, text, e)
}
}
private fun nothingRemainingToBeParsed(text: String, result: ASTNode, inputSequence: Any) =
((!tokenize && text.substring(result.endIndex+1).isNotBlank()) || (tokenize && trackData.highestSuccessfulIndex < (inputSequence as List).lastIndex))
private fun throwException(inputSequence: Any, text: String, e: ParserException?): ParserException {
val subList: String
val index: Int = if (tokenize) (inputSequence as List)[trackData.highestSuccessfulIndex].endIndex else trackData.highestSuccessfulIndex
subList = if (index < text.length) {
text.substring(index, text.length)
} else {
""
}
val fullPath = trackData.fullPath.map {
if (!it.first.isProduction)
"${it.first} " + 27.toChar() + "[32mmatched: ${it.second}" + 27.toChar() + "[31m"
else
it.first.toString()
}
return ParserException(
(if (e != null) e.message + "\n" else "") + "Not all tokens were matched, highest index reached " + index + "\nhighest success result index: " + index +
"\nexecution path: " + fullPath + "" + "\nexpected: " + trackData.expectedTokenNotMatched + "" + "\nremaining text: " + subList)
}
enum class MatchStatus {
NOT_EVALUATED, FAILED, MATCHED
}
class Branch(stepSequence: List) {
var matched: MatchStatus = NOT_EVALUATED
val activeStep: LinkedNodes = LinkedNodes(stepSequence)
}
class ProductionEntry(val step: ProductionStep,
branches: List,
startIndex: Int,
originalText: String) {
val activeBranch: LinkedNodes = LinkedNodes(branches)
var astNode = ASTNode(step, startIndex = startIndex, endIndex = startIndex, originalText = originalText)
}
data class CacheKey(val stepName: String, val startIndex: Int) {
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other == null || this::class != other::class) return false
other as CacheKey
if (stepName != other.stepName) return false
if (startIndex != other.startIndex) return false
return true
}
override fun hashCode(): Int {
var result = stepName.hashCode()
result = 92821 * result + startIndex
return result
}
}
data class CacheValue(val astNode: ASTNode, val endIndex: Int, val matched: Boolean, val evalTextIsBlank: Boolean? = null)
/**
* This method finds the path through the graph that matches the string.
*/
protected open fun findPath(startIndex: Int, inputSequence: Any, grammar: Grammar<*>, originalText: String): ASTNode {
val startTime = getCurrentTimeInMilliSeconds()
//for very long inputs constant resizing of hashmap can be expensive
//hashmaps resize at around 75% capacity so here we are making sure we don't see any resize
val matchCache: MutableMap = LinkedHashMap((originalText.length * 1.4).toInt())
val start = grammar.productions[grammar.startProduction]
?: throw ParserException("Production not defined for " + grammar.startProduction.name)
val productionStack: MutableList = mutableListOf()
val startEntry = ProductionEntry(grammar.startProduction, start.expressions.map { Branch(it.steps) }, startIndex, originalText)
productionStack.add(startEntry)
while (productionStack.isNotEmpty()) {
val currentEntry = productionStack.peek()
if (currentEntry != null) {
val currentIndex = currentEntry.astNode.endIndex
val activeBranchNode = currentEntry.activeBranch
var branch = activeBranchNode.node?.value
if (branch != null) {
if (branch.matched == MATCHED) {
matchProduction(matchCache, currentEntry, productionStack)
} else {
if (branch.matched == FAILED) {
branch = moveToNextBranch(activeBranchNode, currentEntry)
}
if (branch == null) {
failProduction(matchCache, currentEntry, productionStack)
} else {
processStep(branch, grammar, matchCache, currentIndex, currentEntry, productionStack, inputSequence)
}
}
}
}
}
LOG.debug("path finding done in " + (getCurrentTimeInMilliSeconds() - startTime).toString() + "ms")
return startEntry.astNode
}
protected open fun getInputSequence(text: String, grammar: Grammar<*>): Any {
return if (tokenize) {
Tokenizer(grammar).tokenize(text)
} else {
text
}
}
private fun processStep(branch: Branch, grammar: Grammar<*>, matchCache: MutableMap, currentIndex: Int, currentEntry: ProductionEntry, productionStack: MutableList, inputSequence: Any) {
val stepNode = branch.activeStep.node
val step = stepNode?.value
if (step == null) {
branch.matched = MATCHED
} else {
if (step.isProduction) {
handleProductionStep(grammar, step, matchCache, currentIndex, currentEntry, productionStack)
} else {
handleTerminal(inputSequence, currentIndex, step, productionStack, branch, matchCache)
}
}
}
private fun finalizeAST(astNode: ASTNode, text: String, inputSequence: Any) {
astNode.forEachRecursive({ node ->
if (tokenize) {
inputSequence as List
if (node.endIndex > node.startIndex) {
node.startIndex = inputSequence[node.startIndex].startIndex
node.endIndex = inputSequence[node.endIndex - 1].endIndex
} else {
val tokenStartIndex = node.startIndex
node.startIndex = inputSequence[tokenStartIndex].startIndex
node.endIndex = inputSequence[tokenStartIndex].startIndex
}
}
else
node.endIndex = node.endIndex - 1
node.children.forEach {
it.parent = node
//regex endIndex is one over real index. It works for substringing but
// causes problems for other things like styling and text modifications
}
})
}
private fun handleTerminal(inputSequence: Any, currentIndex: Int, step: ProductionStep, productionStack: MutableList, branch: Branch, matchCache: MutableMap) {
val resultCache = matchCache[CacheKey(step.name, currentIndex)]
var matchEndIndexOffset = currentIndex
var evalTextIsBlank = if (resultCache?.evalTextIsBlank != null)
resultCache.evalTextIsBlank
else
false
val matched = if (resultCache != null) {
resultCache.matched
} else {
when (inputSequence) {
is String -> {
val m = Tokenizer.matchTokenRegex(step, inputSequence, currentIndex)
evalTextIsBlank = remainderIsBlank(inputSequence, currentIndex)
matchEndIndexOffset = (m?.groups?.first()?.endIndex ?: 0)
m != null
}
is List<*> -> {
inputSequence as List
evalTextIsBlank = inputSequence.lastIndex <= currentIndex
if (step.regexDefinition.isNotBlank())
matchEndIndexOffset = currentIndex + 1
//fail matches past last index
if (currentIndex > inputSequence.lastIndex)
false
else
inputSequence[currentIndex].definition == step || step.regexDefinition.isBlank()
}
else -> {
error("Bad input sequence")
}
}
}
if (matched) {
val endIndex = resultCache?.endIndex ?: matchEndIndexOffset
handleTerminalMatch(productionStack, endIndex, step, currentIndex, evalTextIsBlank, branch, matchCache)
} else {
failBranch(step, evalTextIsBlank, productionStack, currentIndex, matchCache)
}
}
private fun remainderIsBlank(inputSequence: String, currentIndex: Int): Boolean {
for (x in currentIndex until inputSequence.length) {
if (!inputSequence[x].isWhitespace())
return false
}
return true
}
private fun handleTerminalMatch(productionStack: MutableList, endIndex: Int, step: ProductionStep, currentIndex: Int, evalTextIsBlank: Boolean, branch: Branch, matchCache: MutableMap) {
val astNode = ASTNode(step, startIndex = currentIndex, endIndex = endIndex, originalText = productionStack.peek()?.astNode?.originalText
?: "")
productionStack.peek()?.let {
it.astNode.endIndex = endIndex
it.astNode.children.add(astNode)
}
if (endIndex > trackData.highestSuccessfulIndex) {
trackData.expectedTokenNotMatched.clear()
trackData.highestSuccessfulIndex = endIndex
if (!evalTextIsBlank)
trackData.possibleValidTokensIfContinuingParsing.clear()
}
matchCache[CacheKey(step.name, currentIndex)] = CacheValue(astNode, endIndex, true, evalTextIsBlank)
//advance to next step in branch
branch.activeStep.next()
}
private fun failBranch(step: ProductionStep, evalTextIsBlank: Boolean, productionStack: MutableList, currentIndex: Int, matchCache: MutableMap) {
val parentsStartingWithSameIndex = getParentsStartingWithIndex(currentIndex, productionStack)
if (currentIndex == trackData.highestSuccessfulIndex) {
trackData.expectedTokenNotMatched.add(step)
parentsStartingWithSameIndex.forEach {
trackData.expectedTokenNotMatched.add(it)
}
if (evalTextIsBlank) {
trackData.possibleValidTokensIfContinuingParsing.add(step)
parentsStartingWithSameIndex.forEach {
trackData.possibleValidTokensIfContinuingParsing.add(it)
}
}
}
matchCache[CacheKey(step.name, currentIndex)] = CacheValue(ASTNode(originalText = productionStack.peek()?.astNode?.originalText
?: ""), currentIndex, false, evalTextIsBlank)
productionStack.peek()?.let {
//reset matching
it.astNode.endIndex = it.astNode.startIndex
it.activeBranch.node?.value?.matched = FAILED
it.astNode.children.clear()
}
}
private fun getParentsStartingWithIndex(currentIndex: Int, productionStack: MutableList): List {
val result = mutableListOf()
for (index in (productionStack.size - 1) downTo 0) {
val productionEntry = productionStack[index]
if (productionEntry.astNode.startIndex == currentIndex)
result.add(productionEntry.step)
else
break
}
return result
}
private fun handleProductionStep(grammar: Grammar<*>, step: ProductionStep, matchCache: MutableMap, currentIndex: Int, currentEntry: ProductionEntry, productionStack: MutableList) {
val production = grammar.productions[step]
if (production != null) {
val cachedResult = matchCache[CacheKey(step.name, currentIndex)]
if (cachedResult != null) {
if (cachedResult.matched) {
currentEntry.astNode.children.add(cachedResult.astNode)
currentEntry.astNode.endIndex = cachedResult.astNode.endIndex
currentEntry.activeBranch.node?.value?.activeStep?.next()
} else {
productionStack.peek()?.let {
//reset matching
it.astNode.endIndex = it.astNode.startIndex
it.activeBranch.node?.value?.matched = FAILED
it.astNode.children.clear()
}
}
} else
productionStack.add(ProductionEntry(step, production.expressions.map { Branch(it.steps) }, currentIndex, productionStack.peek()?.astNode?.originalText
?: ""))
}
}
private fun matchProduction(matchCache: MutableMap, currentEntry: ProductionEntry, productionStack: MutableList) {
matchCache[CacheKey(currentEntry.step.name, currentEntry.astNode.startIndex)] = CacheValue(currentEntry.astNode, currentEntry.astNode.endIndex, true)
productionStack.pop()
productionStack.peek()?.let { parent ->
parent.astNode.children.add(currentEntry.astNode)
parent.astNode.endIndex = currentEntry.astNode.endIndex
parent.activeBranch.node?.value?.activeStep?.next()
}
}
private fun moveToNextBranch(activeBranchNode: LinkedNodes, currentEntry: ProductionEntry): Branch? {
var branch: Branch? = activeBranchNode.next()?.value
currentEntry.astNode.endIndex = currentEntry.astNode.startIndex
currentEntry.astNode.children.clear()
return branch
}
private fun failProduction(matchCache: MutableMap, currentEntry: ProductionEntry, productionStack: MutableList) {
matchCache[CacheKey(currentEntry.step.name, currentEntry.astNode.startIndex)] = CacheValue(currentEntry.astNode, currentEntry.astNode.endIndex, false)
//pop failed production
productionStack.pop()
productionStack.peek()?.let {
it.activeBranch.node?.value?.matched = FAILED
it.astNode.endIndex = it.astNode.startIndex
it.astNode.children.clear()
}
}
fun find(grammar: Grammar<*>, text: String, delimiterPattern: String?): List {
val resultAsts = mutableListOf()
var index = 0
while (index < text.lastIndex) {
index = try {
val result = match(grammar, text, false, index)
resultAsts.add(result)
result.endIndex
} catch (e: ParserException) {
incrementByDelimiterIndex(delimiterPattern, index, text)
}
}
return resultAsts
}
private fun incrementByDelimiterIndex(delimiterPattern: String?, startIndex: Int, text: String): Int {
var index = startIndex
if (delimiterPattern != null) {
val sub = text.substring(index)
val m = "(?s)(?:${delimiterPattern})".genericRegex().find(sub)
if (m != null) {
index += m.range.end
} else {
index++
}
} else {
index++
}
return index
}
}
private fun List.peek(): E? {
return lastOrNull()
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy