edu.stanford.nlp.process.Tokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.process;
import java.util.Iterator;
import java.util.List;
/**
* Tokenizers break up text into individual Objects. These objects may be
* Strings, Words, or other Objects. A Tokenizer extends the Iterator
* interface, but provides a lookahead operation peek()
. An
* implementation of this interface is expected to have a constructor that
* takes a single argument, a Reader.
*
* @author Teg Grenager ([email protected])
*/
public interface Tokenizer extends Iterator {
/**
* Returns the next token from this Tokenizer.
*
* @return the next token in the token stream.
* @throws java.util.NoSuchElementException
* if the token stream has no more tokens.
*/
@Override
public T next();
/**
* Returns true
if and only if this Tokenizer has more elements.
*/
@Override
public boolean hasNext();
/**
* Removes from the underlying collection the last element returned by
* the iterator. This is an optional operation for Iterators - a
* Tokenizer normally would not support it. This method can be called
* only once per call to next.
*/
@Override
public void remove();
/**
* Returns the next token, without removing it, from the Tokenizer, so
* that the same token will be again returned on the next call to
* next() or peek().
*
* @return the next token in the token stream.
* @throws java.util.NoSuchElementException
* if the token stream has no more tokens.
*/
public T peek();
/**
* Returns all tokens of this Tokenizer as a List for convenience.
*
* @return A list of all the tokens
*/
public List tokenize();
}