![JAR search and dependency download from the Maven repository](/logo.png)
gov.nih.nlm.nls.lvg.Flows.ToDerivation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.sql.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Util.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class provides features of generating derivational variants.
* Derivational variants are terms which are somehow related to the original
* term but do not share the same meaning. Often, the derivational variant
* changes syntactic category from the original term. Derivational variants
* are pre-computed and are put in Derivation table in Lvg database (facts).
* Derivations can also be generated by derivation rules through Lvg trie.
*
* History:
*
*
*
* @author NLM NLS Development Team
*
* @see
* Design Document
*
* @version V-2010
****************************************************************************/
public class ToDerivation extends Transformation implements Cloneable
{
// public methods
/**
* Performs the mutation of this flow component.
*
* @param in a LexItem as the input for this flow component
* @param conn LVG database connection
* @param trie LVG persistent trie
* @param restrictFlag a numberical flag to restrict out into LVG_ONLY
* LVG_OR_ALL, or ALL (defined in OutputFilter).
* @param detailsFlag a boolean flag for processing details information
* @param mutateFlag a boolean flag for processing mutate information
*
* @return Vector - results from this flow component
*
* @see DbBase
* @see OutputFilter
*/
public static Vector Mutate(LexItem in, Connection conn,
RamTrie trie, int restrictFlag, boolean detailsFlag, boolean mutateFlag)
{
// Mutate:
Vector out = GetDerivations(in, conn, trie, restrictFlag, INFO,
detailsFlag, mutateFlag);
return out;
}
/**
* A unit test driver for this flow component.
*/
public static void main(String[] args)
{
// load config file
Configuration conf = new Configuration("data.config.lvg", true);
String testStr = GetTestStr(args, "help"); // get input String
int minTermLen = Integer.parseInt(
conf.GetConfiguration(Configuration.MIN_TERM_LENGTH));
String lvgDir = conf.GetConfiguration(Configuration.LVG_DIR);
int minTrieStemLength = Integer.parseInt(
conf.GetConfiguration(Configuration.DIR_TRIE_STEM_LENGTH));
// Mutate: connect to DB
LexItem in = new LexItem(testStr, Category.ALL_BIT_VALUE,
Inflection.ALL_BIT_VALUE);
Vector outs = new Vector();
try
{
Connection conn = DbBase.OpenConnection(conf);
boolean isInflection = false;
RamTrie trie = new RamTrie(isInflection, minTermLen, lvgDir,
minTrieStemLength);
if(conn != null)
{
outs = ToDerivation.Mutate(in, conn, trie,
OutputFilter.LVG_ONLY, true, true);
}
DbBase.CloseConnection(conn, conf);
}
catch (Exception e)
{
System.err.println(e.getMessage());
}
PrintResults(in, outs); // print out results
}
// private methods
/**
* Get the derivational variants using both facts (database) and rules
* (trie). The implementation algorithm is:
*
* - Facts:
*
* - Performs a case insensitive search on the input term and term1
* in the derivation table.
*
- Performs a case insensitive search on the input term and term2
* in the derivation table.
*
- Assigns term and category for both source and target.
*
* - Rules:
*
* - Uses persistent trie to apply rules (and check exceptions) on
* the input term.
*
- Assigns term and category for both source and target.
*
* - Display results according to the restriction filter.
*
- Sort the output by the frequency of categories.
*
*
* @param in a LexItem as the input for this flow component
* @param conn LVG database connection
* @param trie LVG persistent trie
* @param restrictFlag a numberical flag to restrict out into LVG_ONLY
* LVG_OR_ALL, or ALL (defined in OutputFilter).
* @param infoStr the header of detail information, usually is the
* full name of the current flow
*
* @return Vector - results from this flow component
*
* @see DbBase
* @see OutputFilter
*/
protected static Vector GetDerivations(LexItem in, Connection conn,
RamTrie trie, int restrictFlag, String infoStr, boolean detailsFlag,
boolean mutateFlag)
{
// init the input string and output Vector
String inStr = in.GetSourceTerm();
Vector outs = new Vector();
long inCat = in.GetSourceCategory().GetValue();
long inInfl = in.GetSourceInflection().GetValue();
try
{
// Fact: get derivation from database
Vector factList
= DbDerivation.GetDerivations(inStr, conn);
// update LexItems
for(int i = 0; i < factList.size(); i++)
{
DerivationRecord record = factList.elementAt(i);
String term = record.GetTarget();
long curCat = record.GetSourceCat();
// input filter for category;
// inflection is not in the database table, can't be checked
if(InputFilter.IsLegal(inCat, curCat) == false)
{
continue;
}
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = infoStr + " (FACT)";
}
if(mutateFlag == true)
{
mutate = "FACT" + GlobalBehavior.GetFieldSeparator() +
record.GetString(GlobalBehavior.GetFieldSeparator());
}
LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION,
record.GetTargetCat(),
Inflection.GetBitValue(Inflection.BASE_BIT),
details, mutate);
outs.addElement(temp);
}
// Rule: rule generated derivations
// Rule: Use trie to get the result from rule
Vector ruleList =
trie.GetDerivationsByRules(inStr, inCat, inInfl, true);
// update LexItems
for(int i = 0; i < ruleList.size(); i++)
{
RuleResult record = ruleList.elementAt(i);
String term = record.GetOutTerm();
// details & mutate
String details = null;
String mutate = null;
if(detailsFlag == true)
{
details = infoStr + " (RULE|" + record.GetRuleString()
+ ")";
}
if(mutateFlag == true)
{
mutate = "RULE" + GlobalBehavior.GetFieldSeparator()
+ record.GetRuleString()
+ GlobalBehavior.GetFieldSeparator();
}
LexItem temp = UpdateLexItem(in, term, Flow.DERIVATION,
Category.ToValue(record.GetOutCategory()),
Inflection.ToValue(record.GetOutInflection()),
details, mutate);
outs.addElement(temp);
}
// Restrict the outputs
outs = RestrictDerivations(outs, conn, restrictFlag);
}
catch (SQLException e)
{
System.err.println("** Error: Sql Exception in ToDerivation Flow.");
}
// Sort: category, length, case incentive sort
LexItemComparator lc = new LexItemComparator();
lc.SetRule(LexItemComparator.LVG_RULE);
Collections.sort(outs, lc);
return outs;
}
// protected methods
protected static Vector RestrictDerivations(Vector in,
Connection conn, int restrictFlag) throws SQLException
{
Vector out = new Vector();
switch(restrictFlag)
{
// uninflected terms in Lvg, if no terms, return all
case OutputFilter.LVG_OR_ALL:
out = VerifyDerivationsFromLvg(in, conn);
if(out.size() == 0)
{
out.addAll(in);
}
break;
case OutputFilter.ALL:
out.addAll(in);
break;
case OutputFilter.LVG_ONLY:
default:
out = VerifyDerivationsFromLvg(in, conn);
break;
}
return out;
}
// return only terms which are uninflected terms in Lvg database
private static Vector VerifyDerivationsFromLvg(Vector in,
Connection conn) throws SQLException
{
Vector out = new Vector();
for(int i = 0; i < in.size(); i++)
{
LexItem cur = in.elementAt(i);
String derivation = cur.GetTargetTerm();
if(DbUninflection.IsExistUninflectedTerm(derivation, conn) == true)
{
out.addElement(cur);
}
}
return out;
}
// data members
private static final String INFO = "Derivation";
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy