All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.text.token.Tokenizer Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                Tokenizer.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright February 10, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 * See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.text.token;

import gov.sandia.cognition.text.Textual;
import gov.sandia.cognition.util.CloneableSerializable;
import java.io.Reader;

/**
 * Interface for a class that converts strings into tokens.
 * 
 * @author  Justin Basilico
 * @since   3.0
 */
public interface Tokenizer
    extends CloneableSerializable
{

    /**
     * Convert the given string into a corresponding ordered list of tokens.
     *
     * @param   s
     *      The string to tokenize.
     * @return
     *      The ordered list of tokens.
     */
    public Iterable tokenize(
        final String s);

    /**
     * Converts the string from the given reader into an ordered list of tokens.
     *
     * @param   reader
     *      The reader to tokenize the data from.
     * @return
     *      The ordered list of tokens.
     */
    public Iterable tokenize(
        final Reader reader);


    /**
     * Convert the given string into a corresponding ordered list of tokens.
     *
     * @param   textual
     *      The object to tokenize.
     * @return
     *      The ordered list of tokens.
     */
    public Iterable tokenize(
        final Textual textual);

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy