All Downloads are FREE. Search and download functionalities are using the official Maven repository.

justhalf.nlp.tokenizer.Tokenizer Maven / Gradle / Ivy

package justhalf.nlp.tokenizer;

import java.util.List;

import edu.stanford.nlp.ling.CoreLabel;
import justhalf.nlp.NLPInterface;

/**
 * An interface for tokenizers
 */
public interface Tokenizer extends NLPInterface{
	
	/**
	 * Tokenize the given sentence into an array of String
	 * @param sentence
	 * 		The sentence to be tokenized
	 * @return
	 * 		The list of tokens
	 */
	public String[] tokenizeToString(String sentence);
	
	/**
	 * Tokenize the given sentence into a list of CoreLabel, which holds
	 * all information required to get the original string.
	 * @param sentence
	 * 		The sentence to be tokenized
	 * @return
	 * 		The list of tokens as {@link CoreLabel} objects
* Each object holds the spacing information surrounding the token it represents, * enabling faithful restoration of the original string. */ public List tokenize(String sentence); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy