edu.stanford.nlp.parser.lexparser.MLEDependencyGrammarExtractor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2

Show newest version

package edu.stanford.nlp.parser.lexparser;

import java.util.*;

import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;


/** Gathers statistics on tree dependencies and then passes them to an
 *  MLEDependencyGrammar for dependency grammar construction.
 *
 *  @author Dan Klein
 */
public class MLEDependencyGrammarExtractor extends AbstractTreeExtractor {

  protected final Index wordIndex;
  protected final Index tagIndex;

  /** This is where all dependencies are stored (using full tag space). */
  protected ClassicCounter dependencyCounter = new ClassicCounter<>();
  //private Set dependencies = new HashSet();

  protected TreebankLangParserParams tlpParams;

  /** Whether left and right is distinguished. */
  protected boolean directional;

  /** Whether dependent distance from head is distinguished. */
  protected boolean useDistance;

  /** Whether dependent distance is distinguished more coarsely. */
  protected boolean useCoarseDistance;

  /** Whether basic category tags are in the dependency grammar. */
  protected final boolean basicCategoryTagsInDependencyGrammar;

  public MLEDependencyGrammarExtractor(Options op, Index wordIndex, Index tagIndex) {
    super(op);
    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
    tlpParams = op.tlpParams;
    directional = op.directional;
    useDistance = op.distance;
    useCoarseDistance = op.coarseDistance;
    basicCategoryTagsInDependencyGrammar = op.trainOptions.basicCategoryTagsInDependencyGrammar;
  }

  @Override
  protected void tallyRoot(Tree lt, double weight) {
    // this list is in full (not reduced) tag space
    List deps = MLEDependencyGrammar.treeToDependencyList(lt, wordIndex, tagIndex);
    for (IntDependency dependency : deps) {
      dependencyCounter.incrementCount(dependency, weight);
    }
  }

  @Override
  public DependencyGrammar formResult() {
    wordIndex.addToIndex(Lexicon.UNKNOWN_WORD);
    MLEDependencyGrammar dg = new MLEDependencyGrammar(tlpParams, directional, useDistance, useCoarseDistance, basicCategoryTagsInDependencyGrammar, op, wordIndex, tagIndex);
    for (IntDependency dependency : dependencyCounter.keySet()) {
      dg.addRule(dependency, dependencyCounter.getCount(dependency));
    }
    return dg;
  }

} // end class MLEDependencyGrammarExtractor