gov.nih.nlm.nls.lvg.Flows.ToStripPunctuationEnhanced Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Util.*;
/*****************************************************************************
* This class performs an enhanced function of stripping punctuations from
* a specified term. This enhanced function does not strip punctuations from
* following cases:
*
* - Floating number: "1.25", "-23", or "-23.38"
*
=> use Float.ParseFloat() to find float words. x.xx or -x or -x.xx
* - Date: "10/12/97" or "10-12-00"
*
=> utilize DateFormat, SimpleDateFormat to find "d/M/YY" and "d-M-YY"
* - Telephone: "301-435-3170" or "301.435.3170"
*
- Catelog: such as "007.12.1234.07" or "007-12-1234-07"
*
=> NN-NN-NN if '-' is arounded by number, don't do anything
*
=> Hypened words or Chemical: XX-XX-XX if '-' is not around by
* number only, replace it with space.
* - Genitive: such as "Guy's", "Guys' ", "Guyz' ", "Guyx' " (TBD)
*
*
* History:
*
*
*
* @author NLM NLS Development Team
*
* @see
* Design Document
*
* @version V-2010
****************************************************************************/
public class ToStripPunctuationEnhanced extends Transformation implements Cloneable
{
// public methods
/**
* Performs the mutation of this flow component.
*
* @param in a LexItem as the input for this flow component
* @param detailsFlag a boolean flag for processing details information
* @param mutateFlag a boolean flag for processing mutate information
*
* @return Vector - results from this flow component
*/
public static Vector Mutate(LexItem in, boolean detailsFlag,
boolean mutateFlag)
{
// mutate the term
String term = StripPunctuationEnhanced(in.GetSourceTerm());
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = INFO;
}
if(mutateFlag == true)
{
mutate = Transformation.NO_MUTATE_INFO;
}
// update target
Vector out = new Vector();
LexItem temp = UpdateLexItem(in, term, Flow.STRIP_PUNCTUATION_ENHANCED,
Transformation.UPDATE, Transformation.UPDATE, details, mutate);
out.addElement(temp);
return out;
}
/**
* A unit test driver for this flow component.
*/
public static void main(String[] args)
{
String testStr = GetTestStr(args, "Left's 12.34.56");
// Mutate
LexItem in = new LexItem(testStr);
Vector outs
= ToStripPunctuationEnhanced.Mutate(in, true, true);
PrintResults(in, outs); // print out results
}
// private methods
// strip punctuations (enhanced) from a term
private static String StripPunctuationEnhanced(String inStr)
{
StringTokenizer buf = new StringTokenizer(inStr, " \t");
String word = null;
Vector strList = new Vector();
while(buf.hasMoreTokens() == true)
{
word = StripPunctuationFromWord(buf.nextToken());
strList.addElement(word);
}
// construct the String
StringBuffer buffer = new StringBuffer();
for(int i = 0; i < strList.size(); i++)
{
buffer.append(strList.elementAt(i));
buffer.append(" ");
}
String out = buffer.toString();
return out.trim();
}
// strip punctuations (enhanced) from a word
static String StripPunctuationFromWord(String inWord)
{
if(Word.HasPunctuation(inWord) == false) // no punctuations
{
return inWord;
}
else if(Word.IsCatelogNumber(inWord) == true) // float, date, Catelog
{
return inWord;
}
inWord = inWord.replace('-', ' '); // replace hyphen with a space
return ToStripPunctuation.StripPunctuation(inWord);
}
// data members
private static final String INFO = "Strip Punctuation Enchanced";
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy