All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.clulab.reach.utils.PathFinder.scala Maven / Gradle / Ivy

The newest version!
package org.clulab.reach.utils

import org.clulab.processors.Sentence
import org.clulab.struct.Interval

class PathFinder(sentence: Sentence) {

  /** returns a dependency path between two nodes */
  def dependencyPaths(
    start: Int, end: Int,
    withWords: Boolean = false,
    withLemmas: Boolean = false,
    withTags: Boolean = false,
    withEntities: Boolean = false,
    withChunks: Boolean = false,
    withLastTokenConstraint: Boolean = true
  ): Seq[String] = {

    require(sentence.words.isDefinedAt(start), "`start` is not a valid token index")
    require(sentence.words.isDefinedAt(end), "`end` is not a valid token index")

    def mkEdgePaths(edges: Seq[Seq[(Int, Int, String)]]): Seq[Seq[(Int, Int, String)]] = edges match {
      case Nil => Seq(Nil)
      case Seq(first, rest @ _*) => for {
        i <- first
        j <- mkEdgePaths(rest)
      } yield i +: j
    }

    sentence.dependencies match {
      case None => Nil
      case Some(deps) =>
        // get sequence of nodes in the shortest path
        val nodesPath = deps.shortestPath(start, end, ignoreDirection = true)

        // make pairs of nodes in the shortest path
        val pairs = for (i <- 1 until nodesPath.size) yield (nodesPath(i - 1), nodesPath(i))

        // get edges for each pair
        val edges = for ((n1, n2) <- pairs) yield deps.getEdges(n1, n2, ignoreDirection = true)

        // get edges names and direction
        val paths = for (edgePath <- mkEdgePaths(edges)) yield {
          for (((n1, n2), edge) <- pairs zip edgePath) yield edge match {
            case (`n1`, `n2`, dep) =>
              val constraint = mkTokenConstraint(n2, withWords, withLemmas, withTags, withEntities, withChunks)
              if (constraint.isEmpty || n2 == end && !withLastTokenConstraint) s">$dep"
              else s">$dep ${constraint.get}"

            case (`n2`, `n1`, dep) =>
              val constraint = mkTokenConstraint(n2, withWords, withLemmas, withTags, withEntities, withChunks)
              if (constraint.isEmpty || n2 == end && !withLastTokenConstraint) s"<$dep"
              else s"<$dep ${constraint.get}"
          }
        }

        for (path <- paths) yield path.mkString(" ")
    }
  }

  /** escapes single quotes in the string and surrounds it with single quotes */
  def quote(s: String): String = s"'${s.replaceAllLiterally("'", "\\'")}'"

  /** returns a token constraint for a single token */
  def mkTokenConstraint(
    tok: Int,
    withWords: Boolean = true,
    withLemmas: Boolean = false,
    withTags: Boolean = false,
    withEntities: Boolean = false,
    withChunks: Boolean = false
  ): Option[String] = {
    val fields = Seq(
      if (withWords) Some(s"word=${quote(sentence.words(tok))}") else None,
      if (withLemmas) Some(s"lemma=${quote(sentence.lemmas.get(tok))}") else None,
      if (withTags) Some(s"tag=${quote(sentence.tags.get(tok))}") else None,
      if (withEntities) Some(s"entity=${quote(sentence.entities.get(tok))}") else None,
      if (withChunks) Some(s"chunk=${quote(sentence.chunks.get(tok))}") else None
    ).flatten
    if (fields.isEmpty) None else Some(s"[${fields.mkString(" & ")}]")
  }

  /** returns token constraints for all tokens in interval */
  def mkTokenConstraints(
    interval: Interval,
    withWords: Boolean = true,
    withLemmas: Boolean = false,
    withTags: Boolean = false,
    withEntities: Boolean = false,
    withChunks: Boolean = false
  ): Option[String] = {
    val constraints = interval flatMap { i =>
      mkTokenConstraint(i, withWords, withLemmas, withTags, withEntities, withChunks)
    }
    if (constraints.isEmpty) None else Some(constraints.mkString(" "))
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy