All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.io.AbstractTokenizer Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.io;

import java.io.*;
import java.util.*;

/**
 * Abstract tokenizer.  Tokenizers extending AbstractTokenizer need only
 * implement the getNext() method.
 *
 * @author Teg Grenager ([email protected])
 */

public abstract class AbstractTokenizer implements Tokenizer {

  protected Object nextToken = null;

  protected abstract Object getNext();

  /** Returns the next token from this Tokenizer. */
  public Object next() {
    if (nextToken == null) nextToken = getNext();
    Object result = nextToken;
    nextToken = getNext();
    return result;
  }

  /** Returns true if this Tokenizer has more elements. */
  public boolean hasNext() {
    if (nextToken == null) nextToken = getNext();
    return nextToken != null;
  }

  /**
   * This is an optional operation, by default not supported.
   */
  public void remove() {
    throw new UnsupportedOperationException();
  }

  /**
   * This is an optional operation, by default supported.
   */
  public Object peek() {
    if (nextToken == null) nextToken = getNext();
    return nextToken;
  }

  /**
   * Returns text as a List of tokens.
   */
  public List tokenize() {
    // System.out.println("tokenize called");
    List result = new ArrayList();
    while (hasNext()) {
      result.add(next());
    }
    return result;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy