All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.arizona.sista.discourse.rstparser.TreeStats.scala Maven / Gradle / Ivy

package edu.arizona.sista.discourse.rstparser

import java.io.File
import scala.collection.mutable.ListBuffer
import edu.arizona.sista.struct.Counter
import edu.arizona.sista.processors.fastnlp.FastNLPProcessor

/**
 * Reads all .dis files in a directory and computes some useful stats
 * User: mihais
 * Date: 4/6/14
 */
object TreeStats {
  def main(args:Array[String]) {
    val reader = new Reader
    lazy val proc = new FastNLPProcessor()
    val top = new File(args(0))
    val trees = new ListBuffer[DiscourseTree]
    if(top.isDirectory) {
      for(f <- top.listFiles()){
        if(f.getName.endsWith(".dis")){
          println("Parsing file " + f)
          val p = reader.read(f, proc, simplifyRelationLabels=true, verbose=true)
          trees += p._1
          //println(p)
        }
      }
    } else {
      val p = reader.read(top, proc, verbose=true)
      trees += p._1
      //println(p)
    }

    labelStats(trees.toList)
    countNodesWithMoreThanTwoChildren(trees.toList)
  }

  def countNodesWithMoreThanTwoChildren(trees:List[DiscourseTree]) {
    val stats = new Counter[String]()
    for(t <- trees) countNodesWithMoreThanTwoChildren(t, stats)
    println("Distribution of labels with more than two children:")
    for(s <- stats.sorted) {
      println("\t" + s._1 + " " + s._2)
    }
  }

  def countNodesWithMoreThanTwoChildren(t:DiscourseTree, stats:Counter[String]) {
    if(t.children != null) {
      if(t.children.length > 2)
        stats.incrementCount(t.relationLabel + " (" + t.relationDirection + ")")
      for(c <- t.children)
        countNodesWithMoreThanTwoChildren(c, stats)
    }
  }

  def labelStats(trees:List[DiscourseTree]) {
    val stats = new Counter[String]()
    for(t <- trees) labelStats(t, stats)
    println("Distribution of labels:")
    for(s <- stats.sorted) {
      println("\t" + s._1 + " " + s._2)
    }
  }

  def labelStats(t:DiscourseTree, stats:Counter[String]) {
    if(! t.isTerminal) {
      stats.incrementCount(t.relationLabel, 1)
      for(c <- t.children)
        labelStats(c, stats)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy