edu.stanford.nlp.trees.treebank.Dataset Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2

Show newest version

package edu.stanford.nlp.trees.treebank;

import java.util.List;
import java.util.Properties;

/**
 * A generic interface loading, processing, and writing a data set. Classes
 * that implement this interface may be specified in the configuration file
 * using the TYPE parameter. {@link TreebankPreprocessor} will
 * then call {@link #setOptions}, {@link #build} and {@link #getFilenames()}
 * in that order.
 *
 * @author Spence Green
 *
 */
public interface Dataset {

  public enum Encoding {Buckwalter, UTF8}

  /**
   * Sets options for a dataset.
   *
   * @param opts A map from parameter types defined in {@link ConfigParser} to
   * values
   * @return true if opts contains all required options. false, otherwise.
   */
  public boolean setOptions(Properties opts);

  /**
   * Generic method for loading, processing, and writing a dataset.
   */
  public void build();

  /**
   * Returns the filenames written by {@link #build()}.
   *
   * @return A collection of filenames
   */
  public List getFilenames();
}