gov.nih.nlm.nls.lvg.Flows.ToStripPunctuationEnhanced Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Util.*;
/*****************************************************************************
* This class performs an enhanced function of stripping punctuations from
* a specified term.  This enhanced function does not strip punctuations from
* following cases:
* 
*  Floating number: "1.25", "-23", or "-23.38"
*   
 => use Float.ParseFloat() to find float words. x.xx or -x or -x.xx
* 
 Date: "10/12/97" or "10-12-00"
*    
 => utilize DateFormat, SimpleDateFormat to find "d/M/YY" and "d-M-YY"
* 
 Telephone: "301-435-3170" or "301.435.3170"
* 
 Catelog: such as "007.12.1234.07" or "007-12-1234-07"
*   
 => NN-NN-NN if '-' is arounded by number, don't do anything
*   
 => Hypened words or Chemical: XX-XX-XX if '-' is not around by 
*        number only, replace it with space.
* 
 Genitive: such as "Guy's", "Guys' ", "Guyz' ", "Guyx' " (TBD)
* 
*
* History:
* 

* 
*
* @author NLM NLS Development Team
*
* @see 
* Design Document 
*
* @version    V-2010
****************************************************************************/
public class ToStripPunctuationEnhanced extends Transformation implements Cloneable
{
    // public methods
    /**
    * Performs the mutation of this flow component.
    *
    * @param   in   a LexItem as the input for this flow component
    * @param   detailsFlag   a boolean flag for processing details information
    * @param   mutateFlag   a boolean flag for processing mutate information
    *
    * @return  Vector - results from this flow component 
    */
    public static Vector Mutate(LexItem in, boolean detailsFlag, 
        boolean mutateFlag)
    {
        // mutate the term
        String term = StripPunctuationEnhanced(in.GetSourceTerm());
        // details & mutate
        String details = null;
        String mutate = null;
        if(detailsFlag == true)
        {
            details = INFO;
        }
        if(mutateFlag == true)
        {
            mutate = Transformation.NO_MUTATE_INFO;
        }
        // update target
        Vector out = new Vector();
        LexItem temp = UpdateLexItem(in, term, Flow.STRIP_PUNCTUATION_ENHANCED, 
            Transformation.UPDATE, Transformation.UPDATE, details, mutate);
        out.addElement(temp);
        return out;
    }
    /**
    * A unit test driver for this flow component.
    */
    public static void main(String[] args)
    {
        String testStr = GetTestStr(args, "Left's 12.34.56");
        // Mutate
        LexItem in = new LexItem(testStr);
        Vector outs 
            = ToStripPunctuationEnhanced.Mutate(in, true, true);
        PrintResults(in, outs);     // print out results
    }
    // private methods
    // strip punctuations (enhanced) from a term
    private static String StripPunctuationEnhanced(String inStr)
    {
        StringTokenizer buf = new StringTokenizer(inStr, " \t");
        String word = null;
        Vector strList = new Vector();
        while(buf.hasMoreTokens() == true)
        {
            word = StripPunctuationFromWord(buf.nextToken());
            strList.addElement(word);
        }
        // construct the String
        StringBuffer buffer = new StringBuffer();
        for(int i = 0; i < strList.size(); i++)
        {
            buffer.append(strList.elementAt(i));
            buffer.append(" ");
        }
        String out = buffer.toString();
        return out.trim();
    }
    // strip punctuations (enhanced) from a word
    static String StripPunctuationFromWord(String inWord)
    {
        if(Word.HasPunctuation(inWord) == false)    // no punctuations
        {
            return inWord;
        }
        else if(Word.IsCatelogNumber(inWord) == true)    // float, date, Catelog
        {
            return inWord;
        }
        inWord = inWord.replace('-', ' ');       // replace hyphen with a space
        return ToStripPunctuation.StripPunctuation(inWord);
    }
    // data members
    private static final String INFO = "Strip Punctuation Enchanced";
}