gov.nih.nlm.nls.lvg.Flows.ToAntiNorm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.sql.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class returns reversed norm of lexicon terms for the input.
* It find normalizations for the input term and then use the normalized form 
* to find the lexicon terms from antiNorm database.
* This process is similar to the reverse process of norm and thus is called
* antiNorm.
*
* History:
* 

* 
*
* @author NLM NLS Development Team
*
* @see 
* Design Document 
*
* @version    V-2010
****************************************************************************/
public class ToAntiNorm extends Transformation
{
    // public methods
    /**
    * Performs the mutation of this flow component.
    *
    * @param   in   a LexItem as the input for this flow component
    * @param   maxTerm   the maxinum number of permutation term (uninflect)
    * @param   stopWords   Vector - stop wrods list
    * @param   conn   LVG database connection
    * @param   trie   LVG Ram trie
    * @param   symbolMap   a hash table contains the unicode symbols mapping
    * @param   unicodeMap   a hash table contains the unicode mapping
    * @param   ligatureMap   a hash table contains the mapping of ligatures
    * @param   diacriticMap  a hash table contains the mapping of diacritics
    * @param   nonStripMap   a hash table contains the non-Strip map unicode
    * @param   removeSTree   a reverse trie tree of removeS pattern rules
    * @param   detailsFlag   a boolean flag for processing details information
    * @param   mutateFlag   a boolean flag for processing mutate information
    *
    * @return  the results from this flow component - a collection (Vector)
    * of LexItems
    *
    * @exception SQLException if errors occurr while connect to LVG database.
    *
    * @see DbBase
    */
    public static Vector Mutate(LexItem in, int maxTerm, 
        Vector stopWords, Connection conn, RamTrie trie, 
        Hashtable symbolMap,
        Hashtable unicodeMap,
        Hashtable ligatureMap, 
        Hashtable diacriticMap, 
        Hashtable nonStripMap,
        RTrieTree removeSTree, boolean detailsFlag, boolean mutateFlag) 
        throws SQLException
    {
        // Mutate the term: retrieve the normalized form
        Vector norms = ToNormalize.Mutate(in, maxTerm, stopWords, 
            conn, trie, symbolMap, unicodeMap, ligatureMap, diacriticMap, 
            nonStripMap, removeSTree, detailsFlag, mutateFlag);
        // Get lexicon term from AntiNorm Table
        Vector antiNorms = new Vector();
        Vector out = new Vector();
        // go through all normalized term list
        for(int i = 0; i < norms.size(); i++)
        {
            LexItem norm = norms.elementAt(i);
            String normStr = norm.GetTargetTerm();
            // find the lexicon term from antiNorm table
            Vector normAntiNorms 
                = DbAntiNorm.GetAntiNorms(normStr, conn);
            // details & mutate
            for(int j = 0; j < normAntiNorms.size(); j++)
            {
                AntiNormRecord cur = normAntiNorms.elementAt(j);
                String details = null;
                String mutate = null;
                if(detailsFlag == true)
                {
                    details = INFO;    // detail mutate info
                }
                if(mutateFlag == true)
                {
                    mutate = cur.GetEui() + GlobalBehavior.GetFieldSeparator();
                }
                // update target LexItems  
                String term = cur.GetInflectedTerm();
                int cat = cur.GetCategory();
                long infl = cur.GetInflection();
                LexItem temp = UpdateLexItem(in, term, Flow.ANTINORM, 
                    cat, infl, details, mutate);
                out = AddToAntiNormOutput(out, temp);
            }
        }
        return out;
    }
    /**
    * A unit test driver for this flow component.
    */
    public static void main(String[] args)
    {
        // read in configuration file: for data base info
        Configuration conf = new Configuration("data.config.lvg", true);
        String testStr = GetTestStr(args, "Rendu-Osler disease");
        int minTermLen = Integer.parseInt(
            conf.GetConfiguration(Configuration.MIN_TERM_LENGTH));
        String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR);
        int maxTerm = Integer.parseInt(
            conf.GetConfiguration(Configuration.MAX_UNINFLS));
        Vector stopWords = ToStripStopWords.GetStopWordsFromFile(conf);
        Hashtable symbolMap
            = ToMapSymbolToAscii.GetSymbolMapFromFile(conf);
        Hashtable unicodeMap
            = ToMapUnicodeToAscii.GetUnicodeMapFromFile(conf);
        Hashtable ligatureMap 
            = ToSplitLigatures.GetLigatureMapFromFile(conf);
        Hashtable diacriticMap 
            = ToStripDiacritics.GetDiacriticMapFromFile(conf);
        Hashtable nonStripMap
            = ToStripMapUnicode.GetNonStripMapFromFile(conf);
        RTrieTree removeSTree = ToRemoveS.GetRTrieTreeFromFile(conf);
        // Mutate: connect to DB
        LexItem in = new LexItem(testStr);
        Vector outs = new Vector();
        try
        {
            Connection conn = DbBase.OpenConnection(conf);
            boolean isInflection = true;
            RamTrie trie = new RamTrie(isInflection, minTermLen, lvgDir, 0);
            if(conn != null)
            {
                outs = ToAntiNorm.Mutate(in, maxTerm, stopWords, conn, trie, 
                    symbolMap, unicodeMap, ligatureMap, diacriticMap,
                    nonStripMap, removeSTree, true, true);
            }
            DbBase.CloseConnection(conn, conf);
        }
        catch (Exception e)
        {
            System.err.println(e.getMessage());
        }
        PrintResults(in, outs);             // print out results
    }
    // private methods
    private static Vector AddToAntiNormOutput(Vector in, 
        LexItem cur)
    {
        boolean existFlag = false;
        Vector out = new Vector(in);
        for(int i = 0; i < in.size(); i++)
        {
            LexItem temp = in.elementAt(i);
            if((temp.GetTargetTerm().equals(cur.GetTargetTerm()) == true)
            && (temp.GetTargetCategory().GetValue() 
              == cur.GetTargetCategory().GetValue())
            && (temp.GetTargetInflection().GetValue() 
              == cur.GetTargetInflection().GetValue()))
            {
                if((temp.GetMutateInformation() == null)
                 || (cur.GetMutateInformation() == null))
                {
                    existFlag = true;
                    break;
                }
                else if(temp.GetMutateInformation().equals(
                    cur.GetMutateInformation()) == true)
                {
                    existFlag = true;
                    break;
                }
            }
        }
        if(existFlag == false)
        {
            out.addElement(cur);
        }
        return out;
    }
    // data members
    private static final String INFO = "AntiNorm";
}