All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nih.nlm.nls.lvg.Flows.ToTokenizeKeepAll Maven / Gradle / Ivy

The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import gov.nih.nlm.nls.lvg.Lib.*;
/*****************************************************************************
* This class breaks up a term into tokens by delimiters (tokenize).  
* Delimiters include space, tab, and all punctuations.
*
* 

History: *

    *
* * @author NLM NLS Development Team * * @see * Design Document * * @version V-2010 ****************************************************************************/ public class ToTokenizeKeepAll extends Transformation implements Cloneable { // public methods /** * Performs the mutation of this flow component. * * @param in a LexItem as the input for this flow component * @param detailsFlag a boolean flag for processing details information * @param mutateFlag a boolean flag for processing mutate information * * @return Vector - results from this flow component * of LexItems */ public static Vector Mutate(LexItem in, boolean detailsFlag, boolean mutateFlag) { // mutate the term Vector termList = GetToken(in.GetSourceTerm()); // update target LexItem Vector out = new Vector(); for(int i = 0; i < termList.size(); i++) { // details & mutate String details = null; String mutate = null; if(detailsFlag == true) { details = INFO; } if(mutateFlag == true) { mutate = Transformation.NO_MUTATE_INFO; } String term = termList.elementAt(i); LexItem temp = UpdateLexItem(in, term, Flow.TOKENIZE_KEEP_ALL, Category.ALL_BIT_VALUE, Inflection.ALL_BIT_VALUE, details, mutate); out.addElement(temp); } return out; } /** * A unit test driver for this flow component. */ public static void main(String[] args) { String testStr = GetTestStr(args, "The Club-Foot"); // input String // Mutate LexItem in = new LexItem(testStr); Vector outs = ToTokenizeKeepAll.Mutate(in, true, true); PrintResults(in, outs); // print out results } // private method private static Vector GetToken(String inStr) { // init all delimiters String delim = " \t-({[)}]_!@#%&*\\:;\"',.?/~+=|<>$`^"; StringTokenizer buf = new StringTokenizer(inStr, delim, true); Vector out = new Vector(); String last = new String(); while(buf.hasMoreTokens() == true) { String cur = buf.nextToken(); // combine " " together if((cur.equals(" ") == true) && (last.equals(" ") == true)) { String temp = out.lastElement() + " "; out.setElementAt(temp, out.size()-1); } else { out.addElement(cur); } last = cur; } return out; } // data members private static final String INFO = "Tokenize keep All"; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy