gov.sandia.cognition.text.token.Tokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gov-sandia-cognition-text-core Show documentation
Show all versions of gov-sandia-cognition-text-core Show documentation
Algorithms and components for text analysis and information retrieval.
/*
* File: Tokenizer.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright February 10, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.text.token;
import gov.sandia.cognition.text.Textual;
import gov.sandia.cognition.util.CloneableSerializable;
import java.io.Reader;
/**
* Interface for a class that converts strings into tokens.
*
* @author Justin Basilico
* @since 3.0
*/
public interface Tokenizer
extends CloneableSerializable
{
/**
* Convert the given string into a corresponding ordered list of tokens.
*
* @param s
* The string to tokenize.
* @return
* The ordered list of tokens.
*/
public Iterable tokenize(
final String s);
/**
* Converts the string from the given reader into an ordered list of tokens.
*
* @param reader
* The reader to tokenize the data from.
* @return
* The ordered list of tokens.
*/
public Iterable tokenize(
final Reader reader);
/**
* Convert the given string into a corresponding ordered list of tokens.
*
* @param textual
* The object to tokenize.
* @return
* The ordered list of tokens.
*/
public Iterable tokenize(
final Textual textual);
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy