gov.sandia.cognition.text.token.AbstractTokenizer Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gov-sandia-cognition-text-core Show documentation

Algorithms and components for text analysis and information retrieval.

There is a newer version: 4.0.1

Show newest version

/*
 * File:                AbstractTokenizer.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright March 02, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 * See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.text.token;

import gov.sandia.cognition.text.Textual;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import java.io.StringReader;

/**
 * Abstract implementation of the {@code Tokenizer} interface. It turns the
 * tokenize call for a {@code String} into a {@code Reader}.
 * 
 * @author  Justin Basilico
 * @since   3.0
 */
public abstract class AbstractTokenizer
    extends AbstractCloneableSerializable
    implements Tokenizer
{

    /**
     * Creates a new {@code AbstractTokenizer}.
     */
    public AbstractTokenizer()
    {
        super();
    }

    public Iterable tokenize(
        final String s)
    {
        // Read in the string using a string reader.
        return this.tokenize(new StringReader(s));
    }

    public Iterable tokenize(
        final Textual textual)
    {
        return this.tokenize(textual.readText());
    }

}