![JAR search and dependency download from the Maven repository](/logo.png)
gov.nih.nlm.nls.lvg.Api.ToAsciiApi Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lvg2010dist Show documentation
Show all versions of lvg2010dist Show documentation
LVG tools is used by Apache cTAKES.
The newest version!
package gov.nih.nlm.nls.lvg.Api;
import java.util.*;
import java.io.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Flows.*;
/*****************************************************************************
* This class provides an API for ToAscii, convert UTF-8 to pure ASCII.
*
* History:
*
*
*
* @author NLM NLS Development Team
*
* @version V-2010
****************************************************************************/
public class ToAsciiApi
{
// public constructor
/**
* Creates a ToAsciiApi object and initiate related data (default).
*/
public ToAsciiApi()
{
Init();
}
/**
* Creates a ToAsciiApi object and initiate related data using a specified
* configuration file.
*
* @param configFile the absolute path of the configuration file
*/
public ToAsciiApi(String configFile)
{
configFile_ = configFile;
Init();
}
/**
* Creates a ToAsciiApi object and initiate related data using a specified
* configuration file.
*
* @param conf lvg configuration object
*/
public ToAsciiApi(Configuration conf)
{
conf_ = conf;
Init();
}
/**
* Creates a ToAsciiApi object and initiate related data with properties
* needs to be overwritten
*
* @param properties properties to be overwritten in config
*/
public ToAsciiApi(Hashtable properties)
{
properties_ = properties;
Init();
}
/**
* Creates a ToAsciiApi object and initiate related data using a specified
* configuration file with properties to be wverwritten.
*
* @param configFile the absolute path of the configuration file
* @param properties properties to be overwritten in config
*/
public ToAsciiApi(String configFile, Hashtable properties)
{
configFile_ = configFile;
properties_ = properties;
Init();
}
// public methods
/**
* A method to get the ASCII strings of an input string
*
* @param inTerm an input term in a string format to be mutated
*
* @return String - ASCII string from toAscii result
*/
public String Mutate(String inTerm)
{
// declare a new LexItem for input
LexItem in = new LexItem(inTerm);
LexItem out = Mutate(in);
String outStr = out.GetTargetTerm();
return outStr;
}
/**
* A method to get the ASCII strings of an input LexItem
*
* @param in an input LexItem to be mutated
*
* @return LexItem - ASCII results
*/
public LexItem Mutate(LexItem in)
{
boolean showDetails = false;
LexItem out = Mutate(in, showDetails);
return out;
}
/**
* A method to get the normalized strings of an input string along with
* details information of norm operations
*
* @param in an input LexItem to be mutated
* @param showDetails a boolean flag of showing details
*
* @return Vector - normalized results
*/
public LexItem Mutate(LexItem in, boolean showDetails)
{
// declare a new LexItem for input
Vector outs = new Vector();
// process: Mutate
// -f:q7, Unicode Core Norm
Vector outs1 = ToUnicodeCoreNorm.Mutate(in,
symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_,
showDetails, false);
// -f:q8, Strip and Map
LexItem out1 = outs1.elementAt(0);
LexItem in1 = LexItem.TargetToSource(out1);
Vector outs2 = ToStripMapUnicode.Mutate(in1,
nonStripMap_, showDetails, false);
// should have only 1 optput
LexItem out = new LexItem();
if(outs2.size() > 0)
{
out = outs2.elementAt(0);
}
return out;
}
/**
* A method to get the configuration object
*
* @return Configuration
*/
public Configuration GetConfiguration()
{
return conf_;
}
// private methods
// init data: read in data from configuration file, instantiate trie,
// and establishes a connection to Lvg Db
private void Init()
{
// get config file from environment variable
boolean useClassPath = false;
if(configFile_ == null)
{
useClassPath = true;
configFile_ = "data.config.lvg";
}
//read in configuration file
if(conf_ == null)
{
conf_ = new Configuration(configFile_, useClassPath);
}
if(properties_ != null)
{
conf_.OverwriteProperties(properties_);
}
String lvgDir = conf_.GetConfiguration(Configuration.LVG_DIR);
if(symbolMap_ == null)
{
symbolMap_ = ToMapSymbolToAscii.GetSymbolMapFromFile(conf_);
}
if(unicodeMap_ == null)
{
unicodeMap_ = ToMapUnicodeToAscii.GetUnicodeMapFromFile(conf_);
}
if(ligatureMap_ == null)
{
ligatureMap_ = ToSplitLigatures.GetLigatureMapFromFile(conf_);
}
if(diacriticMap_ == null)
{
diacriticMap_ = ToStripDiacritics.GetDiacriticMapFromFile(conf_);
}
if(nonStripMap_ == null)
{
nonStripMap_ = ToStripMapUnicode.GetNonStripMapFromFile(conf_);
}
}
// data members
private String configFile_ = null; // configuration file
private Configuration conf_ = null; // configuration object
private Hashtable properties_ = null; // overwrite properties
private Hashtable symbolMap_ = null;
private Hashtable unicodeMap_ = null;
private Hashtable ligatureMap_ = null;
private Hashtable diacriticMap_ = null;
private Hashtable nonStripMap_ = null;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy