commonMain.org.antlr.v4.kotlinruntime.tree.xpath.XPath.kt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr-kotlin-runtime-jvm Show documentation
Runtime for ANTLR Kotlin
There is a newer version: 1.0.1
Show newest version
// Copyright 2017-present Strumenta and contributors, licensed under Apache 2.0.
// Copyright 2024-present Strumenta and contributors, licensed under BSD 3-Clause.
package org.antlr.v4.kotlinruntime.tree.xpath

import org.antlr.v4.kotlinruntime.*
import org.antlr.v4.kotlinruntime.tree.ParseTree
import org.antlr.v4.kotlinruntime.tree.xpath.XPathLexer.Tokens

/**
 * Represent a subset of XPath XML path syntax for use in identifying nodes in
 * parse trees.
 *
 * Split path into words and separators `/` and `//` via ANTLR
 * itself then walk path elements from left to right. At each separator-word
 * pair, find set of nodes. Next stage uses those as work list.
 *
 * The basic interface is [XPath.findAll]`(tree, xpath, parser)`.
 *
 * But that is just shorthand for:
 *
 * ```
 * val p = XPath(parser, xpath)
 * return p.evaluate(tree)
 * ```
 *
 * See `org.antlr.v4.test.TestXPath` for descriptions.
 *
 * In short, this allows operators:
 *
 * - `/` root
 * - `//` anywhere
 * - `!` invert, this must appear directly after root or anywhere near operator
 *
 * And path elements:
 *
 * - `ID` token name
 * - `'string'` any string literal token from the grammar
 * - `expr` rule name
 * - `*` wildcard matching any node
 *
 * Whitespace is not allowed.
 */
@Suppress("MemberVisibilityCanBePrivate")
public open class XPath(protected var parser: Parser, protected var xpath: String) {
  public companion object {
    public const val WILDCARD: String = "*"   // Word not operator/separator
    public const val NOT: String = "!"        // Word for invert operator

    public fun findAll(tree: ParseTree, xpath: String, parser: Parser): Collection {
      val p = XPath(parser, xpath)
      return p.evaluate(tree)
    }
  }

  @Suppress("LeakingThis")
  protected var elements: Array = split(xpath)

  public open fun split(xpath: String): Array {
    val input = CharStreams.fromString(xpath)
    val lexer = object : XPathLexer(input) {
      override fun recover(e: LexerNoViableAltException): Unit =
        throw e
    }

    lexer.removeErrorListeners()
    lexer.addErrorListener(XPathLexerErrorListener())

    val tokenStream = CommonTokenStream(lexer)

    try {
      tokenStream.fill()
    } catch (e: LexerNoViableAltException) {
      val pos = lexer.charPositionInLine
      val msg = "Invalid tokens or characters at index $pos in path '$xpath'"
      throw IllegalArgumentException(msg, e)
    }

    val tokens = tokenStream.tokens
    val n = tokens.size
    val elements = ArrayList(n)
    var i = 0

    loop@ while (i < n) {
      val el = tokens[i]
      var next: Token?

      when (el.type) {
        Tokens.Root,
        Tokens.Anywhere -> {
          val anywhere = el.type == Tokens.Anywhere
          i++
          next = tokens[i]

          val invert = next.type == Tokens.Bang

          if (invert) {
            i++
            next = tokens[i]
          }

          val pathElement = getXPathElement(next, anywhere)
          pathElement.invert = invert
          elements.add(pathElement)
          i++
        }
        Tokens.TokenRef,
        Tokens.RuleRef,
        Tokens.Wildcard -> {
          elements.add(getXPathElement(el, false))
          i++
        }
        Token.EOF -> break@loop
        else -> throw IllegalArgumentException("Unknown path element $el")
      }
    }

    return elements.toTypedArray()
  }

  /**
   * Convert word like `*` or `ID` or `expr` to a path
   * element. `anywhere` is `true` if `//` precedes the
   * word.
   */
  protected open fun getXPathElement(wordToken: Token, anywhere: Boolean): XPathElement {
    if (wordToken.type == Token.EOF) {
      throw IllegalArgumentException("Missing path element at end of path")
    }

    val word = wordToken.text ?: throw IllegalStateException("Expected wordToken to have text content")
    val ttype = parser.getTokenType(word)
    val ruleIndex = parser.getRuleIndex(word)

    return when (wordToken.type) {
      Tokens.Wildcard -> {
        if (anywhere) {
          XPathWildcardAnywhereElement()
        } else {
          XPathWildcardElement()
        }
      }
      Tokens.TokenRef,
      Tokens.String -> {
        if (ttype == Token.INVALID_TYPE) {
          throw IllegalArgumentException("$word at index ${wordToken.startIndex} isn't a valid token name")
        }

        if (anywhere) {
          XPathTokenAnywhereElement(word, ttype)
        } else {
          XPathTokenElement(word, ttype)
        }
      }
      else -> {
        if (ruleIndex == -1) {
          throw IllegalArgumentException("$word at index ${wordToken.startIndex} isn't a valid rule name")
        }

        if (anywhere) {
          XPathRuleAnywhereElement(word, ruleIndex)
        } else {
          XPathRuleElement(word, ruleIndex)
        }
      }
    }
  }

  /**
   * Return a list of all nodes starting at [t] as root that satisfy the path.
   *
   * The root `/` is relative to the node passed to [evaluate].
   */
  public open fun evaluate(t: ParseTree): Collection {
    val dummyRoot = ParserRuleContext()
    dummyRoot.addChild(t as ParserRuleContext)

    var work = setOf(dummyRoot)
    var i = 0

    while (i < elements.size) {
      val next = LinkedHashSet()

      for (node in work) {
        if (node.childCount > 0) {
          // Only try to match next element if it has children
          // e.g., //func/*/stat might have a token node for which
          // we can't go looking for stat nodes.
          val matching = elements[i].evaluate(node)
          next.addAll(matching)
        }
      }

      i++
      work = next
    }

    return work
  }
}