All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.arizona.sista.discourse.rstparser.CheckSameSentence.scala Maven / Gradle / Ivy

package edu.arizona.sista.discourse.rstparser

import edu.arizona.sista.processors.Document

/**
 * Verifies is segments within a sentence are only connected to other segments in the same sentence
 * User: mihais
 * Date: 6/24/14
 */
object CheckSameSentence {
  def main(args:Array[String]) {
    val trees = CacheReader.load(args(0))
    var inc = 0
    var total = 0
    for(td <- trees) {
      val (i, t) = checkTree(td._1, td._2)
      inc += i
      total += t
    }
    println(s"Found $inc inconsistencies out of $total attachments.")
  }

  def checkTree(tree:DiscourseTree, doc:Document, verbose:Boolean = false):(Int, Int) = {
    var inc = 0
    var total = 0
    if(! tree.isTerminal) {
      val l = tree.children(0)
      val r = tree.children(1)

      if(withinSentence(l, doc)) {
        if(withinSentence(r, doc) && l.firstSentence == r.lastSentence) {
          // we're good
        } else {
          inc += 1
          if(verbose) {
            println("INCONSISTENCY FOR LEFT:")
            println(s"LEFT:\n$l\nRIGHT:$r")
          }
        }
      }
      if(withinSentence(r, doc)) {
        if(withinSentence(l, doc) && l.firstSentence == r.lastSentence) {
          // we're good
        } else {
          inc += 1
          if(verbose) {
            println("INCONSISTENCY FOR RIGHT:")
            println(s"LEFT:\n$l\nRIGHT:$r")
          }
        }
      }

      total = 1
      for(c <- tree.children) {
        val (i, t) = checkTree(c, doc)
        inc += i
        total += t
      }
    }

    (inc, total)
  }

  def withinSentence(tree:DiscourseTree, doc:Document):Boolean = {
    if(tree.firstSentence == tree.lastSentence &&
      (tree.firstToken.token > 0 || tree.lastToken.token < doc.sentences(tree.lastSentence).size - 1)) {
      true
    } else false
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy