All Downloads are FREE. Search and download functionalities are using the official Maven repository.

justhalf.nlp.tokenizer.WhitespaceTokenizer Maven / Gradle / Ivy

package justhalf.nlp.tokenizer;

import java.util.List;

import edu.stanford.nlp.ling.CoreLabel;

/**
 * A very simple implementation of {@link Tokenizer} by splitting the input on whitespaces.
* * Note that this does not split on special characters which resemble whitespace such as * non-breaking space ( ) */ public class WhitespaceTokenizer implements Tokenizer { private RegexTokenizer regexTokenizer = new RegexTokenizer("[ \\t\\r\\n]+"); @Override public String[] tokenizeToString(String sentence) { return regexTokenizer.tokenizeToString(sentence); } @Override public List tokenize(String sentence) { return regexTokenizer.tokenize(sentence); } @Override public boolean isThreadSafe(){ return true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy