org.clulab.reach.apis.Ruler.scala Maven / Gradle / Ivy
The newest version!
package org.clulab.reach.apis
import org.clulab.odin.impl.{OdinCompileException, OdinNamedCompileException}
import org.clulab.processors.Document
import org.clulab.processors.corenlp.CoreNLPProcessor
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.clulab.reach._
import org.clulab.reach.brat.Brat
import org.clulab.reach.apis.open.OpenSystem
import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}
object Ruler {
// read rules for ReachSystem from resource
val reachRules = RuleReader.mkRules()
// pass in reachRules
val reach = new ReachSystem(Some(reachRules))
// setup open domain system
val odProc = new CoreNLPProcessor(withDiscourse = ShallowNLPProcessor.NO_DISCOURSE)
val od = new OpenSystem(Some(odProc))
/** Run the Open Domain system with the given text and rules. */
def runOpen(text: String, rules: String): RulerResults = {
val doc = od.mkDoc(text)
// Were any rules submitted?
if (rules.trim.isEmpty)
return new RulerResults(text, rules, null, null, tokens(doc), synTrees(doc), null, Array(null, "rulesStr is empty"))
// For displaying rules (ruleName -> rule)
val ruleMap = Try(mkRuleMap(rules)).getOrElse(null).asJava
val result = od.extractFrom(rules, doc)
// Reset cachedRules on failure
if (result.isFailure) od.cachedRules = ""
result match {
// either the engine succeeded
case Success(mentions) =>
val eventAnnotations = Brat.dumpStandoff(mentions, doc)
val syntaxAnnotations = Brat.syntaxStandoff(doc)
new RulerResults(text, rules, eventAnnotations, syntaxAnnotations, tokens(doc),
synTrees(doc), ruleMap)
// there may have been a problem compiling the rules
case Failure(OdinNamedCompileException(e, name)) =>
// No standoff in this case...
new RulerResults(text, rules, null, null, tokens(doc), synTrees(doc), ruleMap,
Array(name, e))
// An error without a name
case Failure(OdinCompileException(other)) =>
new RulerResults(text, rules, null, null, tokens(doc), synTrees(doc), ruleMap,
Array(null, other))
// Catch-all for anything else
case Failure(e) =>
new RulerResults(text, rules, null, null, tokens(doc), synTrees(doc), ruleMap,
Array(null, e.getMessage))
}
}
/** Run the bio nlp system with the given text and REACH rules. */
def runReach(text: String): RulerResults = {
val doc = reach.mkDoc(text, "visualizer")
val mentions = reach.extractFrom(doc)
// Remove ModificationTriggers from output
.filterNot(_ matches "ModificationTrigger")
val rules = reach.allRules
val eventAnnotations = Brat.dumpStandoff(mentions, doc)
val syntaxAnnotations = Brat.syntaxStandoff(doc)
val ruleMap = Try(mkRuleMap(rules)).getOrElse(null).asJava
new RulerResults(text, rules, eventAnnotations, syntaxAnnotations,
tokens(doc), synTrees(doc), ruleMap)
}
private def tokens(doc: Document): Array[Token] = {
val allTokens = doc.sentences flatMap { s =>
0 until s.size map { i =>
new Token(s.words(i),
s.lemmas.get(i),
s.tags.get(i),
s.entities.get(i),
s.startOffsets(i),
s.endOffsets(i))
}
}
allTokens
}
private def synTrees(doc: Document): Array[String] = {
val allTrees = doc.sentences map { s =>
s.syntacticTree.map(_.toString).getOrElse("()")
}
allTrees
}
/** removes commented lines */
private def clean(input: String): String = input.replaceAll("""(?m)^\s*#.*$""", "").trim()
/** Create a Map from rule name -> rule. **/
private def mkRuleMap(rules: String): Map[String, String] = {
// to find the rule name (even if it is quoted)
val namePattern = """^- name:\s+("[^\\"]*(?:\\.[^\\"]*)*"|[^\s#]+)""".r
val ruleMap: Map[String, String] =
clean(rules)
.split("(?=- name:)")
.map(_.trim)
.filter(_.nonEmpty) // remove empty chunks from the split
.flatMap { rule => // find the rule name
namePattern.findFirstMatchIn(rule).map { m =>
val name = m.group(1)
val key =
// if the string is quoted, remove the quotes
if (name.startsWith("\"") && name.endsWith("\"")) name.drop(1).dropRight(1)
else name // it isn't quoted, just return it
(key, rule)
}
}.toMap
ruleMap
}
}
class RulerResults(val text: String,
val rules: String,
val eventAnnotations: String,
val syntaxAnnotations: String,
val syntaxTokens: Array[Token],
val syntaxTrees: Array[String],
val ruleMap: java.util.Map[String, String],
// Error is always size of 2 (name, message) whenever present
val error: Array[String] = null)
class Token(val word: String,
val lemma: String,
val tag: String,
val entity: String,
val start: Int,
val end: Int)
© 2015 - 2025 Weber Informatics LLC | Privacy Policy