edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2

Show newest version

package edu.stanford.nlp.trees;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.LabelFactory;

import java.io.Reader;

/**
 * This class implements a TreeReaderFactory that produces
 * labeled, scored array-based Trees, which have been cleaned up to
 * delete empties, etc.   This seems to be a common case (for English).
 * By default, the labels are of type CategoryWordTag,
 * but a different Label type can be specified by the user.
 *
 * @author Christopher Manning
 */
public class LabeledScoredTreeReaderFactory implements TreeReaderFactory {

  private final LabelFactory lf;
  private final TreeNormalizer tm;

  /**
   * Create a new TreeReaderFactory with CoreLabel labels.
   */
  public LabeledScoredTreeReaderFactory() {
    lf = CoreLabel.factory();
    tm = new BobChrisTreeNormalizer();
  }

  public LabeledScoredTreeReaderFactory(LabelFactory lf) {
    this.lf = lf;
    tm = new BobChrisTreeNormalizer();
  }

  public LabeledScoredTreeReaderFactory(TreeNormalizer tm) {
    lf = CoreLabel.factory();
    this.tm = tm;
  }

  public LabeledScoredTreeReaderFactory(LabelFactory lf, TreeNormalizer tm) {
    this.lf = lf;
    this.tm = tm;
  }

  /**
   * An implementation of the TreeReaderFactory interface.
   * It creates a TreeReader which normalizes trees using
   * the BobChrisTreeNormalizer, and makes
   * LabeledScoredTree objects with
   * CategoryWordTag labels (unless otherwise specified on
   * construction).
   */
  public TreeReader newTreeReader(Reader in) {
    return new PennTreeReader(in, new LabeledScoredTreeFactory(lf), tm);
  }
}