![JAR search and dependency download from the Maven repository](/logo.png)
gov.nih.nlm.nls.lvg.Flows.ToTokenize Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import gov.nih.nlm.nls.lvg.Lib.*;
/*****************************************************************************
* This class breaks up a term into tokens by delimiters (tokenize).
* Delimiters include space, tab, and all punctuations.
*
* History:
*
*
*
* @author NLM NLS Development Team
*
* @see
* Design Document
*
* @version V-2010
****************************************************************************/
public class ToTokenize extends Transformation implements Cloneable
{
// public methods
/**
* Performs the mutation of this flow component.
*
* @param in a LexItem as the input for this flow component
* @param detailsFlag a boolean flag for processing details information
* @param mutateFlag a boolean flag for processing mutate information
*
* @return Vector - results from this flow component
*/
public static Vector Mutate(LexItem in, boolean detailsFlag,
boolean mutateFlag)
{
// mutate the term
Vector termList = GetToken(in.GetSourceTerm());
// update target LexItem
Vector out = new Vector();
for(int i = 0; i < termList.size(); i++)
{
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = INFO;
}
if(mutateFlag == true)
{
mutate = Transformation.NO_MUTATE_INFO;
}
String term = termList.elementAt(i);
LexItem temp = UpdateLexItem(in, term, Flow.TOKENIZE,
Category.ALL_BIT_VALUE, Inflection.ALL_BIT_VALUE,
details, mutate);
out.addElement(temp);
}
return out;
}
/**
* A unit test driver for this flow component.
*/
public static void main(String[] args)
{
String testStr = GetTestStr(args, "The Club-Foot"); // input String
// Mutate
LexItem in = new LexItem(testStr);
Vector outs = ToTokenize.Mutate(in, true, true);
PrintResults(in, outs); // print out results
}
// private method
private static Vector GetToken(String inStr)
{
// init all delimiters
String delim = " \t-({[)}]_!@#%&*\\:;\"',.?/~+=|<>$`^";
StringTokenizer buf = new StringTokenizer(inStr, delim);
Vector out = new Vector();
while(buf.hasMoreTokens() == true)
{
out.addElement(buf.nextToken());
}
return out;
}
// data members
private static final String INFO = "Tokenize";
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy