![JAR search and dependency download from the Maven repository](/logo.png)
gov.nih.nlm.nls.lvg.Flows.ToFruitfulVariants Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.sql.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class generates all fruitful variants. The variants are created by
* generating inflectional variants, spelling variants, acronyms and
* abbreviations, expansion, derivational variants (recursively), synonyms
* (recursively), and combinations of these
*
* History:
*
*
*
* @author NLM NLS Development Team
*
* @see
* Design Document
*
* @version V-2010
****************************************************************************/
public class ToFruitfulVariants extends Transformation implements Cloneable
{
// public methods
/**
* Performs the mutation of this flow component.
*
* @param in a LexItem as the input for this flow component
* @param conn LVG database connection
* @param trieI LVG Ram trie for inflections
* @param trieD LVG Ram trie for derivations
* @param detailsFlag a boolean flag for processing details information
* @param mutateFlag a boolean flag for processing mutate information
*
* @return Vector - the results from this flow component
* of LexItems
*
* @see DbBase
*/
public static Vector Mutate(LexItem in, Connection conn,
RamTrie trieI, RamTrie trieD, boolean detailsFlag, boolean mutateFlag)
throws SQLException
{
// get no operation with legal category and inflection
Vector originalSet =
GetNoOperationSet(in, conn, trieI, detailsFlag, false);
// get the variant set
// n, b, A/a, y, d, A/ad, yd, dy, A/ady, ydy, A/ay, yA/a
Vector variantSet
= GetVariantSet(in, conn, trieI, trieD, detailsFlag);
// get spelling and inflectional variants of above set
Vector sivSet
= GetSpellingAndInflections(variantSet, conn, trieI,
detailsFlag, OutputFilter.LVG_OR_ALL);
// add original & spelling inflectional set to fruitful variants
originalSet = AddToVariantList(originalSet, sivSet);
// filter out LexItems with shortest distance, update mutate information
// and clean up history
String flowName = Flow.GetBitName(Flow.FRUITFUL_VARIANTS, 1);
Vector outs = GetFinalSet(originalSet, flowName, mutateFlag);
return outs;
}
/**
* A unit test driver for this flow component.
*/
public static void main(String[] args)
{
// load config file
Configuration conf = new Configuration("data.config.lvg", true);
String testStr = GetTestStr(args, "neurological"); // get input String
int minTermLen = Integer.parseInt(
conf.GetConfiguration(Configuration.MIN_TERM_LENGTH));
String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR);
int minTrieStemLength = Integer.parseInt(
conf.GetConfiguration(Configuration.DIR_TRIE_STEM_LENGTH));
// Mutate: connect to DB
LexItem in = new LexItem(testStr);
Vector outs = new Vector();
try
{
Connection conn = DbBase.OpenConnection(conf);
RamTrie trieI = new RamTrie(true, minTermLen, lvgDir, 0);
RamTrie trieD =
new RamTrie(false, minTermLen, lvgDir, minTrieStemLength);
if(conn != null)
{
outs = ToFruitfulVariants.Mutate(in, conn, trieI, trieD, true,
true);
}
DbBase.CloseConnection(conn, conf);
}
catch (Exception e)
{
System.err.println(e.getMessage());
}
PrintResults(in, outs); // print out results
}
// package methods
// Get spelling variants, and inflectional variants, then update history
static Vector GetSpellingAndInflections(Vector ins,
Connection conn, RamTrie trieI, boolean detailsFlag, int restrictFlag)
{
// List 7: spelling variants
Vector list7 = GetSpellingVariants(ins, conn, detailsFlag);
// add ins to list7
list7 = AddToVariantList(list7, ins);
// List 8: inflections
Vector list8 = GetInflections(list7, conn, trieI, detailsFlag,
restrictFlag);
// add list 8 into list 7
list7 = AddToVariantList(list7, list8);
return list7;
}
// filter out lexItems with same term, category, and longer distance
// filter out LexItems with inflections greater than 256
// update mutate information: cat|flow history|distance|tag
// change flowHistory to flowName
static Vector GetFinalSet(Vector ins, String flowName,
boolean mutateFlag)
{
Hashtable ht
= new Hashtable();
// filter out item with same term, category but longer distance
for(int i = 0; i < ins.size(); i++)
{
LexItem tempRec = ins.elementAt(i);
TermCatCatKey tempKey = new TermCatCatKey(
tempRec.GetTargetTerm().toLowerCase(),
GetFirstCategory(tempRec),
(int) tempRec.GetTargetCategory().GetValue());
if(ht.containsKey(tempKey) == true)
{
LexItem existRec = ht.get(tempKey);
int existDist = CalculateDistance(existRec.GetFlowHistory());
int tempDist = CalculateDistance(tempRec.GetFlowHistory());
if(existDist > tempDist)
{
ht.remove(tempKey);
ht.put(tempKey, tempRec);
}
}
else
{
ht.put(tempKey, tempRec);
}
}
Vector uniqueList = new Vector(ht.values());
// filter out all inflection greater than 256
Vector legalList
= OutputFilter.GetEnhancedSimpleInflection(uniqueList);
// clean up history and calculate distance
Vector outs = UpdateVariants(legalList, flowName, mutateFlag);
return outs;
}
// package methods
static Vector GetVariantSet(LexItem in, Connection conn,
RamTrie trieI, RamTrie trieD, boolean detailsFlag) throws SQLException
{
// Get the base forms (b/n), and category information
Vector bn = GetBaseSet(in, conn, trieI, detailsFlag);
// Get spelling variants set of base
Vector s = GetSpellingVariants(bn, conn, detailsFlag);
// go through all different follow components
// List 1: A/a, y
Vector A = GetAcronyms(s, conn, detailsFlag); // A & a
Vector y = GetRecursiveSynonyms(s, conn, detailsFlag);
Vector list1 = new Vector();
list1.addAll(s);
list1 = AddToVariantList(list1, A);
list1 = AddToVariantList(list1, y);
// List 2: d, A/ad, yd
Vector list2
= GetRecursiveDerivations(list1, conn, trieD, detailsFlag);
// List 3: dy, A/ady, ydy
Vector list3 = GetRecursiveSynonyms(list2, conn, detailsFlag);
// List 4: A/ay
Vector list4 = GetRecursiveSynonyms(A, conn, detailsFlag);
// List 5: yA/a
Vector list5 = GetAcronyms(y, conn, detailsFlag);
// List 6: list 1, 2, 3, 4, 5:
// n, b, A/a, y, d, A/ad, yd, dy, A/ady, ydy, A/ay, yA/a
Vector list6 = new Vector();
list6 = AddToVariantList(list6, list1); // A/a, y
list6 = AddToVariantList(list6, list2); // d, A/ad, yd
list6 = AddToVariantList(list6, list3); // dy, A/ady, ydy
list6 = AddToVariantList(list6, list4); // A/ay
list6 = AddToVariantList(list6, list5); // yA/a
return list6;
}
static Vector GetNoOperationSet(LexItem in, Connection conn,
RamTrie trieI, boolean detailsFlag, boolean lexiconOnly)
throws SQLException
{
// Retrieve category and inflections
String term = in.GetSourceTerm();
String details = null;
if(detailsFlag == true)
{
details = "No Operation (Retrieve Cat/Infl)";
}
String mutate = null;
// Facts:
Vector facts = new Vector();
facts = DbInflection.GetCatInfl(in.GetSourceTerm(), conn);
Hashtable ht
= new Hashtable(); // unique hash table
Vector catInfl = new Vector();
for(int i = 0; i< facts.size(); i++)
{
InflectionRecord record = facts.elementAt(i);
// check inflections: must be simplify or aux or modal
if((record.GetInflection() > Inflection.LOWER_INFLECTIONS)
&& (record.GetCategory() != Category.ToValue("aux"))
&& (record.GetCategory() != Category.ToValue("modal")))
{
continue;
}
// assign the first category and inflection
mutate = Long.toString(record.GetCategory())
+ GlobalBehavior.GetFieldSeparator()
+ Long.toString(record.GetInflection());
LexItem temp = UpdateLexItem(in, term, Flow.NO_OPERATION,
record.GetCategory(), record.GetInflection(), details, mutate);
catInfl.addElement(temp);
CatInflKey tempKey =
new CatInflKey((int) temp.GetTargetCategory().GetValue(),
temp.GetTargetInflection().GetValue());
ht.put(tempKey, temp);
}
// Rules:
if((lexiconOnly == false) && (catInfl.size() == 0))
{
Vector rules = trieI.GetCatInflsByRules(term,
in.GetSourceCategory().GetValue(),
in.GetSourceInflection().GetValue());
for(int i = 0; i < rules.size(); i++)
{
CatInfl rec = rules.elementAt(i);
// check inflections
if(rec.GetInflection() > Inflection.LOWER_INFLECTIONS)
{
continue;
}
mutate = Long.toString(rec.GetCategory());
LexItem temp = UpdateLexItem(in, term, Flow.NO_OPERATION,
rec.GetCategory(), rec.GetInflection(), details,
mutate);
catInfl.addElement(temp);
CatInflKey tempKey =
new CatInflKey((int) temp.GetTargetCategory().GetValue(),
temp.GetTargetInflection().GetValue());
ht.put(tempKey, temp);
}
}
// convert hash table to a vector
Vector outs = new Vector(ht.values());
return outs;
}
// add to the variant list if the spelling is different
// keep the one with shorter distance if the spelling are the same
static Vector AddToVariantList(Vector orgList,
Vector newList)
{
Hashtable ht
= new Hashtable(
orgList.size() + newList.size());
// put all items of orgList into hash table
for(int i = 0; i < orgList.size(); i++)
{
LexItem curOrg = orgList.elementAt(i);
TermCatCatKey curOrgKey = new TermCatCatKey(curOrg.GetTargetTerm(),
GetFirstCategory(curOrg),
(int)curOrg.GetTargetCategory().GetValue());
ht.put(curOrgKey, curOrg);
}
// put all items of newList into hash table
for(int i = 0; i < newList.size(); i++)
{
LexItem curNew = newList.elementAt(i);
TermCatCatKey curNewKey = new TermCatCatKey(curNew.GetTargetTerm(),
GetFirstCategory(curNew),
(int) curNew.GetTargetCategory().GetValue());
if(ht.containsKey(curNewKey) == true)
{
LexItem existRec = ht.get(curNewKey);
int existDist = CalculateDistance(existRec.GetFlowHistory());
int curNewDist = CalculateDistance(curNew.GetFlowHistory());
if(existDist > curNewDist)
{
ht.remove(curNewKey);
ht.put(curNewKey, curNew);
}
}
else
{
ht.put(curNewKey, curNew);
}
}
Vector outs = new Vector(ht.values());
return outs;
}
public static int GetFirstCategory(LexItem in)
{
int category = 0;
if(in.GetMutateInformation() != null)
{
try
{
String delim = "|";
StringTokenizer buf =
new StringTokenizer(in.GetMutateInformation(), delim);
category = Integer.parseInt(buf.nextToken());
}
catch (Exception e) { }
}
return category;
}
// private methods
private static Vector GetBaseSet(LexItem in, Connection conn,
RamTrie trieI, boolean detailsFlag) throws SQLException
{
String inTerm = in.GetSourceTerm();
Vector b = new Vector();
// Get base form by using uninflected a term
b = ToUninflectTerm.Mutate(in, conn, trieI, detailsFlag, false);
// combine record with same term and category
Vector fb
= CombineRecords.Combine(b, CombineRecords.BY_CATEGORY);
// change flow history to "n" if the term are the same as input
// add category to mutate information
String noOperationFlowName = Flow.GetBitName(Flow.NO_OPERATION, 1);
for(int i = 0; i < fb.size(); i++)
{
LexItem temp = fb.elementAt(i);
String curCategory =
Long.toString(temp.GetTargetCategory().GetValue());
String curInflection =
Long.toString(temp.GetTargetInflection().GetValue());
String mutate = curCategory + GlobalBehavior.GetFieldSeparator()
+ curInflection;
temp.SetMutateInformation(mutate);
if(inTerm.equals(temp.GetTargetTerm()) == true)
{
temp.SetFlowHistory(noOperationFlowName);
}
}
return fb;
}
// calculate distance from the history
private static int CalculateDistance(String history)
{
int dist = 0;
for(int i = 0; i < history.length(); i++)
{
switch(history.charAt(i))
{
case 'i':
case 'b':
dist += 1;
break;
case 'a':
case 'A':
case 'y':
dist += 2;
break;
case 'd':
dist += 3;
break;
default:
break;
}
}
return dist;
}
// update history & mutate info
private static Vector UpdateVariants(Vector outList,
String flowName, boolean mutateFlag)
{
Vector outs = new Vector();
// go through all outputs
for(int i = 0; i < outList.size(); i++)
{
LexItem cur = outList.elementAt(i);
// mutate information
if(mutateFlag == true)
{
String fs = GlobalBehavior.GetFieldSeparator();
String flowHis = cur.GetFlowHistory();
String cat = cur.GetMutateInformation();
StringBuffer buffer = new StringBuffer();
buffer.append(cat);
buffer.append(fs);
buffer.append(flowHis);
buffer.append(fs);
buffer.append(CalculateDistance(flowHis));
buffer.append(fs);
// add tag information
buffer.append(cur.GetTag());
buffer.append(fs);
String mutateInfo = buffer.toString();
// assign new attribute properties
cur.SetMutateInformation(mutateInfo);
}
// flow history: need to be done after setting mutate info
cur.SetFlowHistory(flowName);
outs.addElement(cur);
}
return outs;
}
private static Vector GetRecursiveDerivations(Vector ins,
Connection conn, RamTrie trieD, boolean detailsFlag)
{
Vector outs = new Vector();
// transfer from out of ins to in of ins
Vector tempIns = LexItem.TargetsToSources(ins);
// get derivations
for(int i = 0; i < tempIns.size(); i++)
{
LexItem curIn = tempIns.elementAt(i);
Vector derivations =
ToRecursiveDerivations.Mutate(curIn, conn,
trieD, OutputFilter.LVG_ONLY, detailsFlag, false, true);
PassCategory(derivations, curIn.GetMutateInformation());
outs.addAll(derivations);
}
return outs;
}
private static Vector GetAcronyms(Vector ins,
Connection conn, boolean detailsFlag)
{
Vector outs = new Vector();
// transfer from out of ins to in of ins
Vector tempIns = LexItem.TargetsToSources(ins);
// get acronyms
for(int i = 0; i < tempIns.size(); i++)
{
LexItem curIn = tempIns.elementAt(i);
Vector acronyms = ToAcronyms.Mutate(curIn, conn,
detailsFlag, false);
PassCategory(acronyms, curIn.GetMutateInformation());
outs.addAll(acronyms);
Vector expansions = ToExpansions.Mutate(curIn, conn,
detailsFlag, false);
PassCategory(expansions, curIn.GetMutateInformation());
outs.addAll(expansions);
}
return outs;
}
// This method is used for passing category
private static void PassCategory(Vector ins, String mutateInfo)
{
for(int i = 0; i < ins.size(); i++)
{
LexItem cur = ins.elementAt(i);
cur.SetMutateInformation(mutateInfo);
}
}
private static Vector GetRecursiveSynonyms(Vector ins,
Connection conn, boolean detailsFlag)
{
Vector outs = new Vector();
// transfer from out of ins to in of ins
Vector tempIns = LexItem.TargetsToSources(ins);
// get recursive synonyms
for(int i = 0; i < tempIns.size(); i++)
{
LexItem curIn = tempIns.elementAt(i);
Vector rSynonyms = ToRecursiveSynonyms.Mutate(curIn, conn,
detailsFlag, false, true);
PassCategory(rSynonyms, curIn.GetMutateInformation());
outs.addAll(rSynonyms);
}
return outs;
}
private static Vector GetInflections(Vector ins,
Connection conn, RamTrie trieI, boolean detailsFlag, int restrictFlag)
{
Vector outs = new Vector();
// transfer from out of ins to in of ins
Vector tempIns = LexItem.TargetsToSources(ins);
// get inlfections
for(int i = 0; i < tempIns.size(); i++)
{
LexItem curIn = tempIns.elementAt(i);
// TBD: toxica -> toxicum (singular, not base)
CatInfl converted = CatInfl.ConvertToBase(new CatInfl(
curIn.GetSourceCategory().GetValue(),
curIn.GetSourceInflection().GetValue()));
curIn.SetSourceInflection(converted.GetInflection());
// filter out all non-simple infelctions
Vector infls = ToInflection.Mutate(curIn, conn, trieI,
restrictFlag, detailsFlag, false);
PassCategory(infls, curIn.GetMutateInformation());
outs.addAll(infls);
}
return outs;
}
private static Vector GetSpellingVariants(Vector ins,
Connection conn, boolean detailsFlag)
{
Vector outs = new Vector();
// transfer from out of ins to in of ins
Vector tempIns = LexItem.TargetsToSources(ins);
// get inlfections
for(int i = 0; i < tempIns.size(); i++)
{
LexItem curIn = tempIns.elementAt(i);
Vector sv = ToSpellingVariants.Mutate(curIn, conn,
detailsFlag, false);
PassCategory(sv, curIn.GetMutateInformation());
outs.addAll(sv);
}
// remove "+s" if it is not a real spellign variant
for(int i = 0; i < outs.size(); i++)
{
LexItem temp = outs.elementAt(i);
if(temp.GetSourceTerm().equalsIgnoreCase(temp.GetTargetTerm()))
{
String flowHistory = temp.GetFlowHistory();
int endIndex = flowHistory.length() - 2;
temp.SetFlowHistory(flowHistory.substring(0, endIndex));
}
}
return outs;
}
// data members
private static final String INFO = "Generate Fruitful Variants";
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy