edu.stanford.nlp.trees.TreeTokenizerFactory Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!

package edu.stanford.nlp.trees; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.AbstractTokenizer;

import java.io.Reader;
import java.io.IOException;
import java.util.Iterator;

/** Wrapper for TreeReaderFactory.  Any IOException in the readTree() method
 *  of the TreeReader will result in a null
 *  tree returned.
 *
 *  @author Roger Levy ([email protected])
 *  @author javanlp
 */
public class TreeTokenizerFactory implements TokenizerFactory  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(TreeTokenizerFactory.class);

  /** Create a TreeTokenizerFactory from a TreeReaderFactory. */
  public TreeTokenizerFactory(TreeReaderFactory trf) {
    this.trf = trf;
  }

  private TreeReaderFactory trf;

  /** Gets a tokenizer from a reader.*/
  public Tokenizer getTokenizer(final Reader r) {
    return new AbstractTokenizer() {
      TreeReader tr = trf.newTreeReader(r);
      @Override
      public Tree getNext() {
        try {
          return tr.readTree();
        }
        catch(IOException e) {
          log.info("Error in reading tree.");
          return null;
        }
      }
    };
  }

  public Tokenizer getTokenizer(final Reader r, String extraOptions) {
    // Silently ignore extra options
    return getTokenizer(r);
  }

  /** Same as getTokenizer().  */
  public Iterator getIterator(Reader r) {
    return null;
  }

  public void setOptions(String options) {
    //Silently ignore
  }
}