All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.knowitall.tool.parse.MaltParser.scala Maven / Gradle / Ivy

The newest version!
package edu.knowitall
package tool
package parse

import java.io.File
import java.net.URL
import scala.Array.canBuildFrom
import scala.Option.option2Iterable
import scala.collection.JavaConversions.asScalaSet
import org.maltparser.MaltParserService
import edu.knowitall.collection.immutable.Interval
import edu.knowitall.tool.parse.graph.Dependency
import graph.DependencyGraph
import graph.DependencyNode
import postag.OpenNlpPostagger
import postag.Postagger
import stem.MorphaStemmer
import scala.collection.immutable.SortedSet

/** MaltParser is much faster than the StanfordParser but has a lower F-score.
  * It includes wrapper code so that it can still use the Stanford postprocessing.
  */
object MaltParserMain extends DependencyParserMain {
  var model = new File("engmalt.linear-1.7.mco")

  lazy val dependencyParser = new MaltParser(model);
}

class MaltParser(modelUrl: URL = new File("engmalt.linear-1.7.mco").toURI.toURL, tagger: Postagger = new OpenNlpPostagger, logFile: Option[File] = None) extends DependencyParser {
  val parser = initializeMaltParserService()
  val stemmer = MorphaStemmer

  def this(modelFile: File) = this(modelUrl = modelFile.toURI.toURL)

  private def initializeMaltParserService() = {
    // hack to make malt parser work with a different manifest
    import java.lang.reflect._
    val field = classOf[org.maltparser.core.helper.SystemInfo].getDeclaredField("version")
    field.setAccessible(true)
    field.set(null, "1.7")

    val command =
      "-u " + modelUrl +
      " -m parse" +
      // turn logging off if no log file is specified
      (logFile match {
        case Some(file) => " -lfi " + file.getPath
        case None => " -cl off"
      })

    System.err.println("Initializing malt: " + command);
    val service = new MaltParserService()

    try {
      service.initializeParserModel(command);
    }
    catch {
      case e: org.maltparser.core.config.ConfigurationException =>
        println("\n" + 
        "There was an error configurating MaltParser.\n" +
        "This is most likely because the model file '" + modelUrl + "' was not found.\n" +
        "Please download the MaltParser model file from http://www.maltparser.org.\n")

        throw e
    }

    service
  }

  private def clean(sentence: String) = {
    sentence.trim.
      // replace unicode double quotes
      replaceAll("[\u201c\u201d\u201e\u201f\u275d\u275e]", "\"").
      // replace unicode single quotes
      replaceAll("[\u2018\u2019\u201a\u201b\u275b\u275c]", "'")
  }

  override def dependencies(sentence: String): Iterable[Dependency] = {
    val trimmed = clean(sentence)
    if (trimmed.isEmpty) Iterable.empty
    else {
      val tokens = tagger.postag(trimmed).iterator.zipWithIndex.map { case (t, i) =>
        new DependencyNode(t, Interval.singleton(i))
      }.toIndexedSeq

      val lemmatized = tokens.map(stemmer.stemToken)

      val maltTokens: Array[String] = lemmatized.iterator.zipWithIndex.map { case (ltok, i) =>
        Iterable(i+1,
            ltok.token.string,
            ltok.lemma,
            ltok.token.postag,
            ltok.token.postag,
            "-").mkString("\t")
      }.toArray[String]
      val structure = parser.parse(maltTokens)

      val tables = structure.getSymbolTables

      val deps: SortedSet[Dependency] = structure.getEdges.flatMap { edge =>
        if (edge.getSource.getIndex == 0 || edge.getTarget.getIndex == 0) {
          // skip the root
          None
        }
        else {
          val source = tokens(edge.getSource.getIndex - 1)
          val dest = tokens(edge.getTarget.getIndex - 1)

          val types = edge.getLabelTypes
          val labels = types.map(edge.getLabelSymbol)
          val label = labels.head

          Some(new Dependency(source, dest, label))
        }
      }(scala.collection.breakOut)

      deps
    }
  }

  override def dependencyGraph(sentence: String): DependencyGraph = {
    val deps = dependencies(sentence)
    val nodes: Set[DependencyNode] = deps.flatMap(dep => Set(dep.source, dep.dest))(scala.collection.breakOut)
    new DependencyGraph(nodes, deps)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy