All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.process.AbstractTokenizer Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.process;

import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;

// import edu.stanford.nlp.util.logging.Redwood;


/**
 * An abstract tokenizer.  Tokenizers extending AbstractTokenizer need only
 * implement the {@code getNext()} method. This implementation does not
 * allow null tokens, since
 * null is used in the protected nextToken field to signify that no more
 * tokens are available.
 *
 * @author Teg Grenager ([email protected])
 */

public abstract class AbstractTokenizer implements Tokenizer  {

  // /** A logger for this class */
  // private static final Redwood.RedwoodChannels log = Redwood.channels(AbstractTokenizer.class);

  protected T nextToken; // = null;

  /**
   * Internally fetches the next token.
   *
   * @return the next token in the token stream, or null if none exists.
   */
  protected abstract T getNext();

  /**
   * Returns the next token from this Tokenizer.
   *
   * @return the next token in the token stream.
   * @throws java.util.NoSuchElementException
   *          if the token stream has no more tokens.
   */
  @Override
  public T next() {
    if (nextToken == null) {
      nextToken = getNext();
    }
    T result = nextToken;
    nextToken = null;
    if (result == null) {
      throw new NoSuchElementException();
    }
    return result;
  }

  /**
   * Returns {@code true} if this Tokenizer has more elements.
   */
  @Override
  public boolean hasNext() {
    if (nextToken == null) {
      nextToken = getNext();
    }
    return nextToken != null;
  }

  /**
   * This is an optional operation, by default not supported.
   */
  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }

  /**
   * This is an optional operation, by default supported.
   *
   * @return The next token in the token stream.
   * @throws java.util.NoSuchElementException
   *          if the token stream has no more tokens.
   */
  @Override
  public T peek() {
    if (nextToken == null) {
      nextToken = getNext();
    }
    if (nextToken == null) {
      throw new NoSuchElementException();
    }
    return nextToken;
  }

  /**
   * Returns text as a List of tokens.
   *
   * @return A list of all tokens remaining in the underlying Reader
   */
  @Override
  public List tokenize() {
    List result = new ArrayList<>();
    while (hasNext()) {
      result.add(next());
    }
    // log.info("tokenize() produced " + result);
    return result;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy