All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nih.nlm.nls.lvg.Flows.ToAntiNorm Maven / Gradle / Ivy

The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.sql.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class returns reversed norm of lexicon terms for the input.
* It find normalizations for the input term and then use the normalized form 
* to find the lexicon terms from antiNorm database.
* This process is similar to the reverse process of norm and thus is called
* antiNorm.
*
* 

History: *

    *
* * @author NLM NLS Development Team * * @see * Design Document * * @version V-2010 ****************************************************************************/ public class ToAntiNorm extends Transformation { // public methods /** * Performs the mutation of this flow component. * * @param in a LexItem as the input for this flow component * @param maxTerm the maxinum number of permutation term (uninflect) * @param stopWords Vector - stop wrods list * @param conn LVG database connection * @param trie LVG Ram trie * @param symbolMap a hash table contains the unicode symbols mapping * @param unicodeMap a hash table contains the unicode mapping * @param ligatureMap a hash table contains the mapping of ligatures * @param diacriticMap a hash table contains the mapping of diacritics * @param nonStripMap a hash table contains the non-Strip map unicode * @param removeSTree a reverse trie tree of removeS pattern rules * @param detailsFlag a boolean flag for processing details information * @param mutateFlag a boolean flag for processing mutate information * * @return the results from this flow component - a collection (Vector) * of LexItems * * @exception SQLException if errors occurr while connect to LVG database. * * @see DbBase */ public static Vector Mutate(LexItem in, int maxTerm, Vector stopWords, Connection conn, RamTrie trie, Hashtable symbolMap, Hashtable unicodeMap, Hashtable ligatureMap, Hashtable diacriticMap, Hashtable nonStripMap, RTrieTree removeSTree, boolean detailsFlag, boolean mutateFlag) throws SQLException { // Mutate the term: retrieve the normalized form Vector norms = ToNormalize.Mutate(in, maxTerm, stopWords, conn, trie, symbolMap, unicodeMap, ligatureMap, diacriticMap, nonStripMap, removeSTree, detailsFlag, mutateFlag); // Get lexicon term from AntiNorm Table Vector antiNorms = new Vector(); Vector out = new Vector(); // go through all normalized term list for(int i = 0; i < norms.size(); i++) { LexItem norm = norms.elementAt(i); String normStr = norm.GetTargetTerm(); // find the lexicon term from antiNorm table Vector normAntiNorms = DbAntiNorm.GetAntiNorms(normStr, conn); // details & mutate for(int j = 0; j < normAntiNorms.size(); j++) { AntiNormRecord cur = normAntiNorms.elementAt(j); String details = null; String mutate = null; if(detailsFlag == true) { details = INFO; // detail mutate info } if(mutateFlag == true) { mutate = cur.GetEui() + GlobalBehavior.GetFieldSeparator(); } // update target LexItems String term = cur.GetInflectedTerm(); int cat = cur.GetCategory(); long infl = cur.GetInflection(); LexItem temp = UpdateLexItem(in, term, Flow.ANTINORM, cat, infl, details, mutate); out = AddToAntiNormOutput(out, temp); } } return out; } /** * A unit test driver for this flow component. */ public static void main(String[] args) { // read in configuration file: for data base info Configuration conf = new Configuration("data.config.lvg", true); String testStr = GetTestStr(args, "Rendu-Osler disease"); int minTermLen = Integer.parseInt( conf.GetConfiguration(Configuration.MIN_TERM_LENGTH)); String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR); int maxTerm = Integer.parseInt( conf.GetConfiguration(Configuration.MAX_UNINFLS)); Vector stopWords = ToStripStopWords.GetStopWordsFromFile(conf); Hashtable symbolMap = ToMapSymbolToAscii.GetSymbolMapFromFile(conf); Hashtable unicodeMap = ToMapUnicodeToAscii.GetUnicodeMapFromFile(conf); Hashtable ligatureMap = ToSplitLigatures.GetLigatureMapFromFile(conf); Hashtable diacriticMap = ToStripDiacritics.GetDiacriticMapFromFile(conf); Hashtable nonStripMap = ToStripMapUnicode.GetNonStripMapFromFile(conf); RTrieTree removeSTree = ToRemoveS.GetRTrieTreeFromFile(conf); // Mutate: connect to DB LexItem in = new LexItem(testStr); Vector outs = new Vector(); try { Connection conn = DbBase.OpenConnection(conf); boolean isInflection = true; RamTrie trie = new RamTrie(isInflection, minTermLen, lvgDir, 0); if(conn != null) { outs = ToAntiNorm.Mutate(in, maxTerm, stopWords, conn, trie, symbolMap, unicodeMap, ligatureMap, diacriticMap, nonStripMap, removeSTree, true, true); } DbBase.CloseConnection(conn, conf); } catch (Exception e) { System.err.println(e.getMessage()); } PrintResults(in, outs); // print out results } // private methods private static Vector AddToAntiNormOutput(Vector in, LexItem cur) { boolean existFlag = false; Vector out = new Vector(in); for(int i = 0; i < in.size(); i++) { LexItem temp = in.elementAt(i); if((temp.GetTargetTerm().equals(cur.GetTargetTerm()) == true) && (temp.GetTargetCategory().GetValue() == cur.GetTargetCategory().GetValue()) && (temp.GetTargetInflection().GetValue() == cur.GetTargetInflection().GetValue())) { if((temp.GetMutateInformation() == null) || (cur.GetMutateInformation() == null)) { existFlag = true; break; } else if(temp.GetMutateInformation().equals( cur.GetMutateInformation()) == true) { existFlag = true; break; } } } if(existFlag == false) { out.addElement(cur); } return out; } // data members private static final String INFO = "AntiNorm"; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy