All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.tokenizers.WordTokenizer Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * SimpleStringTokenizer.java
 * Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.core.tokenizers;

import java.util.StringTokenizer;

import weka.core.RevisionUtils;

/**
 *  A simple tokenizer that is using the
 * java.util.StringTokenizer class to tokenize the strings.
 * 

* * * Valid options are: *

* *

 * -delimiters <value>
 *  The delimiters to use
 *  (default ' \r\n\t.,;:'"()?!').
 * 
* * * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 10203 $ */ public class WordTokenizer extends CharacterDelimitedTokenizer { /** for serialization */ private static final long serialVersionUID = -930893034037880773L; /** the actual tokenizer */ protected transient StringTokenizer m_Tokenizer; /** * Returns a string describing the stemmer * * @return a description suitable for displaying in the explorer/experimenter * gui */ @Override public String globalInfo() { return "A simple tokenizer that is using the java.util.StringTokenizer " + "class to tokenize the strings."; } /** * Tests if this enumeration contains more elements. * * @return true if and only if this enumeration object contains at least one * more element to provide; false otherwise. */ @Override public boolean hasMoreElements() { return m_Tokenizer.hasMoreElements(); } /** * Returns the next element of this enumeration if this enumeration object has * at least one more element to provide. * * @return the next element of this enumeration. */ @Override public String nextElement() { return m_Tokenizer.nextToken(); } /** * Sets the string to tokenize. Tokenization happens immediately. * * @param s the string to tokenize */ @Override public void tokenize(String s) { m_Tokenizer = new StringTokenizer(s, getDelimiters()); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 10203 $"); } /** * Runs the tokenizer with the given options and strings to tokenize. The * tokens are printed to stdout. * * @param args the commandline options and strings to tokenize */ public static void main(String[] args) { runTokenizer(new WordTokenizer(), args); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy