edu.stanford.nlp.process.AbstractTokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.process;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
/**
* An abstract tokenizer. Tokenizers extending AbstractTokenizer need only
* implement the getNext()
method. This implementation does not
* allow null tokens, since
* null is used in the protected nextToken field to signify that no more
* tokens are available.
*
* @author Teg Grenager ([email protected])
*/
public abstract class AbstractTokenizer implements Tokenizer {
protected T nextToken; // = null;
/**
* Internally fetches the next token.
*
* @return the next token in the token stream, or null if none exists.
*/
protected abstract T getNext();
/**
* Returns the next token from this Tokenizer.
*
* @return the next token in the token stream.
* @throws java.util.NoSuchElementException
* if the token stream has no more tokens.
*/
@Override
public T next() {
if (nextToken == null) {
nextToken = getNext();
}
T result = nextToken;
nextToken = null;
if (result == null) {
throw new NoSuchElementException();
}
return result;
}
/**
* Returns true
if this Tokenizer has more elements.
*/
@Override
public boolean hasNext() {
if (nextToken == null) {
nextToken = getNext();
}
return nextToken != null;
}
/**
* This is an optional operation, by default not supported.
*/
@Override
public void remove() {
throw new UnsupportedOperationException();
}
/**
* This is an optional operation, by default supported.
*
* @return The next token in the token stream.
* @throws java.util.NoSuchElementException
* if the token stream has no more tokens.
*/
@Override
public T peek() {
if (nextToken == null) {
nextToken = getNext();
}
if (nextToken == null) {
throw new NoSuchElementException();
}
return nextToken;
}
/**
* Returns text as a List of tokens.
*
* @return A list of all tokens remaining in the underlying Reader
*/
@Override
public List tokenize() {
// System.out.println("tokenize called");
List result = new ArrayList();
while (hasNext()) {
result.add(next());
}
return result;
}
}