All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nih.nlm.nls.lvg.Api.LvgCmdApi Maven / Gradle / Ivy

The newest version!
package gov.nih.nlm.nls.lvg.Api;
import java.util.*;
import java.sql.*;
import java.io.*;
import gov.nih.nlm.nls.lvg.CmdLineSyntax.*;
import gov.nih.nlm.nls.lvg.Lib.*;
import gov.nih.nlm.nls.lvg.Util.*;
import gov.nih.nlm.nls.lvg.Flows.*;
import gov.nih.nlm.nls.lvg.Db.*;
import gov.nih.nlm.nls.lvg.Trie.*;
/*****************************************************************************
* This class provides an LVG API for users to setup flows by defining a 
* Lvg command.  Two methods, ProcessLine( ) and Mutate( ) are used for
* flow mutation for the case of using interface prompt or not.  The input of 
* this API is a term (string).  The output is a Vector of Lvg ouputs .
*
* 

All LVG API consists methods for three phases: *

    *
  • PreProcess: *
    Taking care of preparation work for using Lvg transformations. Such as * initiating configuration data, declaring persistent files for trie, and * establishing database connection. *
  • Process: *
    This is the core program for user's applications. Users define and run * their flow(s) in this process. *
  • PostProcess: *
    Methods in this phases are used to cleanly close database connection, * persistent files, etc.. *
* *

History: *

    *
  • SCR-6, chlu, 08-05-09, update Lexical Tool version to 2010 *
* * @author NLM NLS Development Team * * @see * Design Document * * @version V-2010 ****************************************************************************/ public class LvgCmdApi extends SystemOption { // public constructor /** * Creates an LvgCmdApi object and initiate related data (default). * This constructor is consider as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object */ public LvgCmdApi() { Init(); } /** * Creates an LvgApi object, initiate related data, using a command string. * This constructor is considered as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object * * @param optionStr the initial lvg option string */ public LvgCmdApi(String optionStr) { option_ = new Option(optionStr); Init(); } /** * Creates an LvgApi object, initiate related data, using a command string * and the path of configuration file. * This constructor is considered as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object * * @param optionStr the initial lvg option string * @param configFile the absolute path of the configuration file */ public LvgCmdApi(String optionStr, String configFile) { option_ = new Option(optionStr); configFile_ = configFile; Init(); } /** * Creates an LvgCmdApi object and initiate related data with * properties to be overwritten in configuration. * This constructor is consider as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object * * @param properties properties to be overwrite in configuration */ public LvgCmdApi(Hashtable properties) { properties_ = properties; Init(); } /** * Creates an LvgApi object, initiate related data, using a command string * with properties to be overwritten in configuration. * This constructor is considered as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object * * @param optionStr the initial lvg option string * @param properties properties to be overwrite in configuration */ public LvgCmdApi(String optionStr, Hashtable properties) { option_ = new Option(optionStr); properties_ = properties; Init(); } /** * Creates an LvgApi object, initiate related data, using a command string * and the path of configuration file with properties to be overwritten * in configuration. * This constructor is considered as a preprocess method. * *

CleanUp( ) method must be called to close Db connection * after using this object * * @param optionStr the initial lvg option string * @param configFile the absolute path of the configuration file * @param properties properties to be overwrite in configuration */ public LvgCmdApi(String optionStr, String configFile, Hashtable properties) { option_ = new Option(optionStr); configFile_ = configFile; properties_ = properties; Init(); } // public methods /** * Set the prompt string. This method allows users to set their * customerized prompt string for using Lvg prompt interface. * * @param promptStr the customerized prompt string */ public void SetPromptStr(String promptStr) { promptStr_ = promptStr; } /** * Set a list for quiting the program while using Lvg prompt interface. * * @param quitStrList Vector quiting program */ public void SetQuitStrList(Vector quitStrList) { quitStrList_ = new Vector(quitStrList); } /** * Check if the input command is legal. * * @return true or false if the input command is legal or illegal */ public boolean IsLegalOption() { boolean isLegalOption = ((SystemOption.CheckSyntax(option_, GetOption(), false, true)) && (CheckInflectionByCatInfl())); return isLegalOption; } /** * Set the Lvg command for flows. * * @param optionStr Lvg command for flows */ public void SetOption(String optionStr) { option_ = new Option(optionStr); // check input command, and open database connection and tries PreProcess(); // Init Database and Persistant Trie InitDbAndTrie(); } /** * Get Lvg Output Option. * * @return lvg output option object */ public LvgOutputOption GetLvgOutputOption() { return lvgOutputOption_; } /** * A method to get the configuration object * * @return Configuration */ public Configuration GetConfiguration() { return conf_; } /** * Get the Db connection * * @return Connection lvg database connection */ public Connection GetConnection() { return conn_; } /** * Close Lvg database connection and persistent tries. This methods must * be called before exiting LvgCmdApi. It is a method from post process. */ public void CleanUp() { try { Close(); } catch (Exception e) { e.printStackTrace(); } } /** * Performs flow mutation by processing the input line (term). This method * provides Lvg command line interface prompt, read input term from system * input or from a file. The result of this method include output filter. * * @return true or false if the input line is not or is to quit the program */ public boolean ProcessLine() throws SQLException, IOException { // check RunFlag if(runFlag_ == false) { return false; } // Display a prompt to the user if(promptFlag_ == true) { GetPrompt(); } // read line from System.in or a file String line = null; if(inReader_ == null) { inReader_ = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } line = inReader_.readLine(); // check if the input is a command for quiting if((line == null) || (quitStrList_.contains(line))) { return false; } // Process the line Process(line, false); return true; } /** * Print out the Lvg help menu. */ public static void PrintLvgHelp() { LvgHelp.LvgHelp(outWriter_, fileOutput_); } /** * Performs flow mutation by processing the input term. * The result of this method also include go through all output filter. * * @param inTerm the term to be mutated. */ public void Mutate(String inTerm) throws Exception { // Process the mutation on the input term Process(inTerm, false); } /** * Performs flow mutation by processing the input term and return the result * as a string. * The result of this method also include go through all output filter. * * @param inTerm the term to be mutated. * * @return the Lvg output result as a string */ public String MutateToString(String inTerm) throws Exception { Out.ResetOutString(); // Process the mutation on the input term Process(inTerm, true); return Out.GetOutString(); } /** * Get Lvg Flow Specific Options * * @return the object of flow specific options */ public LvgFlowSpecificOption GetFlowSpecificOptions() { return lvgFlowSpecificOption_; } /** * Set minimum Trie Length * * @param minTermLength minimum trie term length */ public void SetMinTermLength(int minTermLength) { if(ramTrieI_ != null) { ramTrieI_.SetMinTermLength(minTermLength); } if(ramTrieD_ != null) { ramTrieD_.SetMinTermLength(minTermLength); } } // protected methods /** * Execute Lvg command for a specified option item in a given system option. * This function needs to be modified if adding a new flow option. * * @param optionItem the option to be executed * @param systemOption the system option that the option item will be * run on */ protected void ExecuteCommand(OptionItem optionItem, Option systemOption) { OptionItem nameItem = OptionUtility.GetItemByName(optionItem, systemOption, false); Vector systemItems = systemOption.GetOptionItems(); if(CheckOption(nameItem, "-C:INT") == true) { lvgOutputOption_.SetCaseFlag( Integer.parseInt(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-cf:INT") == true) { catFieldNum_ = Integer.parseInt(nameItem.GetOptionArgument()); } else if(CheckOption(nameItem, "-if:INT") == true) { inflFieldNum_ = Integer.parseInt(nameItem.GetOptionArgument()); } else if(CheckOption(nameItem, "-ccgi") == true) { lvgOutputOption_.SetMarkEndFlag(true); } else if(CheckOption(nameItem, "-ci") == true) { try { // get config file from environment variable boolean useClassPath = false; String configFile = configFile_; if(configFile == null) { useClassPath = true; configFile = "data.config.lvg"; } Configuration conf = new Configuration(configFile, useClassPath); if(properties_ != null) { conf.OverwriteProperties(properties_); } Out.Println(outWriter_, conf.GetInformation(), fileOutput_, false); } catch (IOException e) { } runFlag_ = false; } else if(CheckOption(nameItem, "-CR:o") == true) { lvgOutputOption_.SetCombineRule(CombineRecords.BY_TERM); } else if(CheckOption(nameItem, "-CR:oc") == true) { lvgOutputOption_.SetCombineRule(CombineRecords.BY_CATEGORY); } else if(CheckOption(nameItem, "-CR:oe") == true) { lvgOutputOption_.SetCombineRule(CombineRecords.BY_EUI); } else if(CheckOption(nameItem, "-CR:oi") == true) { lvgOutputOption_.SetCombineRule(CombineRecords.BY_INFLECTION); } else if(CheckOption(nameItem, "-DC:LONG") == true) { lvgOutputOption_.SetOutCategory( Long.parseLong(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-DI:LONG") == true) { lvgOutputOption_.SetOutInflection( Long.parseLong(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-EC:LONG") == true) { lvgOutputOption_.SetExcludeCategory( Long.parseLong(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-EI:LONG") == true) { lvgOutputOption_.SetExcludeInflection( Long.parseLong(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-d") == true) { detailsFlag_ = true; } else if(CheckOption(nameItem, "-h") == true) { LvgHelp.LvgHelp(outWriter_, fileOutput_); runFlag_ = false; } else if(CheckOption(nameItem, "-hs") == true) { systemOption.PrintOptionHierachy(); //not UTF-8 runFlag_ = false; } else if(CheckOption(nameItem, "-F:INT") == true) { Integer fieldNum = new Integer(nameItem.GetOptionArgument()); Vector outputFieldList = lvgOutputOption_.GetOutputFieldList(); outputFieldList.addElement(fieldNum); lvgOutputOption_.SetOutputFieldList(outputFieldList); } else if(CheckOption(nameItem, "-F:h") == true) { LvgHelp.OutputFieldHelp(outWriter_, fileOutput_); runFlag_ = false; } else if(CheckOption(nameItem, "-f:0") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_NEC_NOS)); } else if(CheckOption(nameItem, "-f:A") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.ACRONYMS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:An") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.ANTINORM)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:a") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.EXPANSIONS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:B") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.UNINFLECT_WORDS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Bn") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NORM_UNINFLECT_WORDS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:b") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.UNINFLECT_TERM)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:C") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.CANONICALIZE)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Ct") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.CITATION)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:c") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.TOKENIZE)); } else if(CheckOption(nameItem, "-f:ca") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.TOKENIZE_KEEP_ALL)); } else if(CheckOption(nameItem, "-f:ch") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.TOKENIZE_NO_HYPHENS)); } else if(CheckOption(nameItem, "-f:d") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.DERIVATION)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:dc~LONG") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.DERIVATION_BY_CATEGORY)); dbFlag_ = true; derivationCatList_.addElement(nameItem.GetOptionArgument()); } else if(CheckOption(nameItem, "-f:e") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.BASE_SPELLING_VARIANTS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:f") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FILTER)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:fa") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FILTER_ACRONYM)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:fp") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FILTER_PROPER_NOUN)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:E") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RETRIEVE_EUI)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:G") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FRUITFUL_VARIANTS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Ge") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FRUITFUL_ENHANCED)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Gn") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FRUITFUL_VARIANTS_LEX)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:g") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.REMOVE_GENITIVE)); } else if(CheckOption(nameItem, "-f:h") == true) { LvgHelp.FlowHelp(outWriter_, fileOutput_); runFlag_ = false; } else if(CheckOption(nameItem, "-f:i") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.INFLECTION)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:is") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.INFLECTION_SIMPLE)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:ici~STR+STR") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.INFLECTION_BY_CAT_INFL)); dbFlag_ = true; Vector inList = OptionItem.GetArgumentList(nameItem.GetOptionArgument()); inflectionCatList_.addElement(inList.elementAt(0)); inflectionInflList_.addElement(inList.elementAt(1)); curInflectionByCatInflNum_++; } else if(CheckOption(nameItem, "-f:L") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RETRIEVE_CAT_INFL)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Ln") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RETRIEVE_CAT_INFL_DB)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:Lp") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RETRIEVE_CAT_INFL_BEGIN)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:l") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.LOWER_CASE)); } else if(CheckOption(nameItem, "-f:m") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.METAPHONE)); } else if(CheckOption(nameItem, "-f:N") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NORMALIZE)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:N3") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.LUI_NORMALIZE)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:n") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NO_OPERATION)); } else if(CheckOption(nameItem, "-f:nom") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NOMINALIZATION)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:o") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.REPLACE_PUNCTUATION_WITH_SPACE)); } else if(CheckOption(nameItem, "-f:P") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_PUNCTUATION_ENHANCED)); } else if(CheckOption(nameItem, "-f:p") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_PUNCTUATION)); } else if(CheckOption(nameItem, "-f:q") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_DIACRITICS)); } else if(CheckOption(nameItem, "-f:q0") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.MAP_SYMBOL_TO_ASCII)); } else if(CheckOption(nameItem, "-f:q1") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.MAP_UNICODE_TO_ASCII)); } else if(CheckOption(nameItem, "-f:q2") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.SPLIT_LIGATURES)); } else if(CheckOption(nameItem, "-f:q3") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.GET_UNICODE_NAME)); } else if(CheckOption(nameItem, "-f:q4") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.GET_UNICODE_SYNONYM)); } else if(CheckOption(nameItem, "-f:q5") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NORM_UNICODE)); } else if(CheckOption(nameItem, "-f:q6") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.NORM_UNICODE_WITH_SYNONYM)); } else if(CheckOption(nameItem, "-f:q7") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.UNICODE_CORE_NORM)); } else if(CheckOption(nameItem, "-f:q8") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_MAP_UNICODE)); } else if(CheckOption(nameItem, "-f:R") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RECURSIVE_DERIVATIONS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:r") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.RECURSIVE_SYNONYMS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:rs") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.REMOVE_S)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:S") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.SYNTACTIC_UNINVERT)); } else if(CheckOption(nameItem, "-f:Si") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.SIMPLE_INFLECTIONS)); } else if(CheckOption(nameItem, "-f:s") == true) { flowStrs_.addElement(Flow.GetBitName( Flow.GENERATE_SPELLING_VARIANTS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:T") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_AMBIGUITY_TAGS)); } else if(CheckOption(nameItem, "-f:t") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.STRIP_STOP_WORDS)); } else if(CheckOption(nameItem, "-f:U") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.CONVERT_OUTPUT)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:u") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.UNINVERT)); } else if(CheckOption(nameItem, "-f:v") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.FRUITFUL_VARIANTS_DB)); dbFlag_ = true; } else if(CheckOption(nameItem, "-f:w") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.SORT_BY_WORD_ORDER)); } else if(CheckOption(nameItem, "-f:ws~INT") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.WORD_SIZE)); wordSize_ = Integer.parseInt(nameItem.GetOptionArgument()); } else if(CheckOption(nameItem, "-f:y") == true) { flowStrs_.addElement(Flow.GetBitName(Flow.SYNONYMS)); dbFlag_ = true; } else if(CheckOption(nameItem, "-i:STR") == true) { String inFile = nameItem.GetOptionArgument(); if(inFile != null) { try { inReader_ = new BufferedReader(new InputStreamReader( new FileInputStream(inFile), "UTF-8")); } catch (IOException e) { runFlag_ = false; System.err.println( "**Error: problem of opening/reading file " + inFile); } } } else if(CheckOption(nameItem, "-kd:INT") == true) { lvgFlowSpecificOption_.SetDerivationFilter( Integer.parseInt(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-ki:INT") == true) { lvgFlowSpecificOption_.SetInflectionFilter( Integer.parseInt(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-m") == true) { mutateFlag_ = true; } else if(CheckOption(nameItem, "-n") == true) { lvgOutputOption_.SetNoOutputFlag(true); } else if(CheckOption(nameItem, "-o:STR") == true) { String outFile = nameItem.GetOptionArgument(); if(outFile != null) { try { outWriter_ = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(outFile), "UTF-8")); fileOutput_ = true; } catch (IOException e) { runFlag_ = false; System.err.println( "**Error: problem of opening/writing file " + outFile); } } } else if(CheckOption(nameItem, "-p") == true) { promptFlag_ = true; } else if(CheckOption(nameItem, "-R:INT") == true) { lvgOutputOption_.SetOutRecordNum( Integer.parseInt(nameItem.GetOptionArgument())); } else if(CheckOption(nameItem, "-SC") == true) { lvgOutputOption_.SetShowCategoryStrFlag(true); } else if(CheckOption(nameItem, "-SI") == true) { lvgOutputOption_.SetShowInflectionStrFlag(true); } else if(CheckOption(nameItem, "-St:o") == true) { lvgOutputOption_.SetSortFlag(LexItemComparator.TERM); } else if(CheckOption(nameItem, "-St:oc") == true) { lvgOutputOption_.SetSortFlag(LexItemComparator.TERM_CAT); } else if(CheckOption(nameItem, "-St:oci") == true) { lvgOutputOption_.SetSortFlag(LexItemComparator.TERM_CAT_INFL); } else if(CheckOption(nameItem, "-s:STR") == true) { String separator = nameItem.GetOptionArgument(); // if sub by tab if(separator.equals("\\t")) { separator = new Character((char)(9)).toString(); // tab } GlobalBehavior.SetFieldSeparator(separator); } else if(CheckOption(nameItem, "-t:INT") == true) { termFieldNum_ = Integer.parseInt(nameItem.GetOptionArgument()); } else if(CheckOption(nameItem, "-ti") == true) { lvgOutputOption_.SetFilterInputFlag(true); } else if(CheckOption(nameItem, "-v") == true) { try { Out.Println(outWriter_, "lvg.2010", fileOutput_, false); } catch (IOException e) { } runFlag_ = false; } else if(CheckOption(nameItem, "-x:STR") == true) { configFile_ = nameItem.GetOptionArgument(); } } /** * Define the Lvg system option by defining a string. * This function needs to be modified if adding a new flow option. */ protected void DefineFlag() { // define all option flags & arguments by giving a option string String flagStr = "-cf:INT -C:INT -ccgi -ci -CR:o:oc:oe:oi -DC:LONG -DI:LONG -d -EC:LONG -EI:LONG -F:INT:h -f:0:A:An:a:B:Bn:b:C:Ct:c:ca:ch:d:dc~LONG:e:E:f:fa:fp:G::Ge:Gn:g:h:i:is:ici~STR+STR:L:Ln:Lp:l:m:N:N3:n:nom:o:P:p:q:q0:q1:q2:q3:q4:q5:q6:q7:q8:R:r:rs:S:Si:s:T:t:U:u:v:w:ws~INT:y -h -hs -i:STR -if:INT -kd:INT -ki:INT -m -n -o:STR -p -R:INT -SC -SI -St:o:oc:oci -s:STR -t:INT -ti -v -x:STR"; // init the system option systemOption_ = new Option(flagStr); // Add the full name for flags systemOption_.SetFlagFullName("-cf:INT", "Input_Category_Field"); systemOption_.SetFlagFullName("-C:INT", "Case_Setting"); systemOption_.SetFlagFullName("-ccgi", "Mark_The_End"); systemOption_.SetFlagFullName("-ci", "Show_Config_Info"); systemOption_.SetFlagFullName("-CR:o", "Combine_By_Output_Term"); systemOption_.SetFlagFullName("-CR:oc", "Combine_By_Category"); systemOption_.SetFlagFullName("-CR:oe", "Combine_By_Eui"); systemOption_.SetFlagFullName("-CR:oi", "Combine_By_Inflection"); systemOption_.SetFlagFullName("-DC:LONG", "Specify_Categories"); systemOption_.SetFlagFullName("-DI:LONG", "Specify_Inflections"); systemOption_.SetFlagFullName("-d", "Detail_Operations"); systemOption_.SetFlagFullName("-EC:LONG", "Exclude_Categories"); systemOption_.SetFlagFullName("-EI:LONG", "Exclude_Inflections"); systemOption_.SetFlagFullName("-F:INT", "Output_Field"); systemOption_.SetFlagFullName("-F:h", "Output_Field_Menu"); systemOption_.SetFlagFullName("-f", "Flow"); systemOption_.SetFlagFullName("-f:0", "Strip_NEC_NOS"); systemOption_.SetFlagFullName("-f:A", "Acronyms"); systemOption_.SetFlagFullName("-f:An", "AntiNorm"); systemOption_.SetFlagFullName("-f:a", "Expansions"); systemOption_.SetFlagFullName("-f:B", "Uninflect_Words"); systemOption_.SetFlagFullName("-f:Bn", "Normalize_Uninflect_Words"); systemOption_.SetFlagFullName("-f:b", "Uninflect_Term"); systemOption_.SetFlagFullName("-f:C", "Canonicalize"); systemOption_.SetFlagFullName("-f:Ct", "Citation"); systemOption_.SetFlagFullName("-f:c", "Tokenize"); systemOption_.SetFlagFullName("-f:ca", "Tokenize_Keep_All"); systemOption_.SetFlagFullName("-f:ch", "Tokenize_No_Hyphens"); systemOption_.SetFlagFullName("-f:d", "Derivation"); systemOption_.SetFlagFullName("-f:dc~LONG", "Derivation_By_Category"); systemOption_.SetFlagFullName("-f:e", "Base_From_Spelling_Variants"); systemOption_.SetFlagFullName("-f:E", "Retrieve_Eui"); systemOption_.SetFlagFullName("-f:f", "Filter_Output"); systemOption_.SetFlagFullName("-f:fa", "Filter_Out_Acronym"); systemOption_.SetFlagFullName("-f:fp", "Filter_Out_ProperNouns"); systemOption_.SetFlagFullName("-f:G", "Fruitful_Variants"); systemOption_.SetFlagFullName("-f:Ge", "Fruitful_Variants_Enhanced"); systemOption_.SetFlagFullName("-f:Gn", "Fruitful_Variants_Lex"); systemOption_.SetFlagFullName("-f:g", "Remove_Genitive"); systemOption_.SetFlagFullName("-f:h", "Flow_Help_Menu"); systemOption_.SetFlagFullName("-f:i", "Inflection"); systemOption_.SetFlagFullName("-f:is", "Inflection_Simple"); systemOption_.SetFlagFullName("-f:ici~STR+STR", "Inflection_By_Cat_Infl"); systemOption_.SetFlagFullName("-f:L", "Retrieve_Cat_Infl"); systemOption_.SetFlagFullName("-f:Ln", "Retrieve_Cat_Infl_Db"); systemOption_.SetFlagFullName("-f:Lp", "Retrieve_Cat_Infl_Begin"); systemOption_.SetFlagFullName("-f:l", "LowerCase"); systemOption_.SetFlagFullName("-f:m", "Metaphone"); systemOption_.SetFlagFullName("-f:N", "Normalize"); systemOption_.SetFlagFullName("-f:N3", "LuiNormalize"); systemOption_.SetFlagFullName("-f:n", "No_Operation"); systemOption_.SetFlagFullName("-f:nom", "Retrieve_Nominalizations"); systemOption_.SetFlagFullName("-f:o", "Replace_Punctuation_With_Space"); systemOption_.SetFlagFullName("-f:P", "Strip_Punctuation_Enhanced"); systemOption_.SetFlagFullName("-f:p", "Strip_Punctuation"); systemOption_.SetFlagFullName("-f:q", "Strip_Diacritics"); systemOption_.SetFlagFullName("-f:q0", "Map_Symbol_To_ASCII"); systemOption_.SetFlagFullName("-f:q1", "Map_Unicode_To_ASCII"); systemOption_.SetFlagFullName("-f:q2", "Split_Ligatures"); systemOption_.SetFlagFullName("-f:q3", "Get_Unicode_Name"); systemOption_.SetFlagFullName("-f:q4", "Get_Unicode_Synonym"); systemOption_.SetFlagFullName("-f:q5", "Norma_Unicode"); systemOption_.SetFlagFullName("-f:q6", "Norm_Unicode_With_Synonym"); systemOption_.SetFlagFullName("-f:q7", "Unicode_Core_Norm"); systemOption_.SetFlagFullName("-f:q8", "Strip_Map_Unicode"); systemOption_.SetFlagFullName("-f:R", "Recursive_Derivations"); systemOption_.SetFlagFullName("-f:r", "Recursive_Synonyms"); systemOption_.SetFlagFullName("-f:rs", "Remove_(s)_(es)_(ies)"); systemOption_.SetFlagFullName("-f:S", "Syntactic_Uninvert"); systemOption_.SetFlagFullName("-f:Si", "Simple_Inflections"); systemOption_.SetFlagFullName("-f:s", "Spelling_Variants"); systemOption_.SetFlagFullName("-f:T", "Strip_Ambiguity_Tags"); systemOption_.SetFlagFullName("-f:t", "Strip_Stop_Words"); systemOption_.SetFlagFullName("-f:U", "Convert_Output"); systemOption_.SetFlagFullName("-f:u", "Uninvert"); systemOption_.SetFlagFullName("-f:v", "Fruitful_Variants_Db"); systemOption_.SetFlagFullName("-f:w", "Sort_By_Word_Order"); systemOption_.SetFlagFullName("-f:ws~INT", "Word_Size_Filter"); systemOption_.SetFlagFullName("-f:y", "Synonyms"); systemOption_.SetFlagFullName("-h", "Help"); systemOption_.SetFlagFullName("-hs", "Hierarchy_Struture"); systemOption_.SetFlagFullName("-i:STR", "Input_File"); systemOption_.SetFlagFullName("-if:INT", "Input_inflection_field"); systemOption_.SetFlagFullName("-kd:INT", "Restrict_Derivations"); systemOption_.SetFlagFullName("-ki:INT", "Restrict_Inflections"); systemOption_.SetFlagFullName("-m", "Mutation_Information"); systemOption_.SetFlagFullName("-n", "No_Output"); systemOption_.SetFlagFullName("-o:STR", "Output_file"); systemOption_.SetFlagFullName("-p", "Show_Prompt"); systemOption_.SetFlagFullName("-R:INT", "Restrict_Out_Number"); systemOption_.SetFlagFullName("-SC", "Show_Category_String"); systemOption_.SetFlagFullName("-SI", "Show_Inflection_String"); systemOption_.SetFlagFullName("-St:o", "Sort_By_Term"); systemOption_.SetFlagFullName("-St:oc", "Sort_By_Term_Cat"); systemOption_.SetFlagFullName("-St:oci", "Sort_By_Term_Cat_Infl"); systemOption_.SetFlagFullName("-s:STR", "Field_Separator"); systemOption_.SetFlagFullName("-t:INT", "Term_Field"); systemOption_.SetFlagFullName("-ti", "Filter_Input_Term"); systemOption_.SetFlagFullName("-v", "Version"); systemOption_.SetFlagFullName("-x:STR", "Load_Configuration_file"); } /** * Get the Lvg interface prompt and print it out to system output. */ protected void GetPrompt() throws IOException { Out.Println(outWriter_, promptStr_, fileOutput_, false); } /** * Execute a specified Lvg flow transformation for a given LexItem. * *

Notes This function needs to be modified if adding a new flow option. * * @param in the LexItem to be transformed * @param flowStr the specified flow in a string format * * @return Vector - output * */ protected Vector ExecuteFlow(LexItem in, String flowStr) throws SQLException { long flowNum = Flow.Enumerate(flowStr); Vector outs = new Vector(); // reset vars curDerivationCatNum_ = 0; curInflectionByCatInflCount_ = 0; curInflectionByCatInflNum_ = 0; if(flowNum == Flow.GetBitValue(Flow.LOWER_CASE)) { outs = ToLowerCase.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_STOP_WORDS)) { outs = ToStripStopWords.Mutate(in, stopWords_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.REMOVE_GENITIVE)) { outs = ToRemoveGenitive.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.REPLACE_PUNCTUATION_WITH_SPACE)) { outs = ToReplacePunctuationWithSpace.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_PUNCTUATION)) { outs = ToStripPunctuation.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_PUNCTUATION_ENHANCED)) { outs = ToStripPunctuationEnhanced.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.SORT_BY_WORD_ORDER)) { outs = ToSortWordsByOrder.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_NEC_NOS)) { outs = ToStripNecNos.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NO_OPERATION)) { outs = ToNoOperation.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.TOKENIZE)) { outs = ToTokenize.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.TOKENIZE_NO_HYPHENS)) { outs = ToTokenizeNoHyphens.Mutate(in, detailsFlag_, mutateFlag_); } // use DB & trie else if(flowNum == Flow.GetBitValue(Flow.UNINFLECT_TERM)) { outs = ToUninflectTerm.Mutate(in, conn_, ramTrieI_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.INFLECTION)) { outs = ToInflection.Mutate(in, conn_, ramTrieI_, lvgFlowSpecificOption_.GetInflectionFilter(), detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.UNINFLECT_WORDS)) { outs = ToUninflectWords.Mutate(in, lvgFlowSpecificOption_.GetMaxPermuteTermNum(), conn_, ramTrieI_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NORMALIZE)) { outs = ToNormalize.Mutate(in, lvgFlowSpecificOption_.GetMaxPermuteTermNum(), stopWords_, conn_, ramTrieI_, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, nonStripMap_, removeSTree_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.CANONICALIZE)) { outs = ToCanonicalize.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.LUI_NORMALIZE)) { outs = ToLuiNormalize.Mutate(in, lvgFlowSpecificOption_.GetMaxPermuteTermNum(), stopWords_, conn_, ramTrieI_, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, nonStripMap_, removeSTree_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.GENERATE_SPELLING_VARIANTS)) { outs = ToSpellingVariants.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.ACRONYMS)) { outs = ToAcronyms.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.EXPANSIONS)) { outs = ToExpansions.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.DERIVATION)) { outs = ToDerivation.Mutate(in, conn_, ramTrieD_, lvgFlowSpecificOption_.GetDerivationFilter(), detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.DERIVATION_BY_CATEGORY)) { long category = Long.parseLong( derivationCatList_.elementAt(curDerivationCatNum_)); curDerivationCatNum_++; outs = ToDerivationByCategory.Mutate(in, conn_, ramTrieD_, lvgFlowSpecificOption_.GetDerivationFilter(), category, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.INFLECTION_BY_CAT_INFL)) { long category = Long.parseLong( inflectionCatList_.elementAt(curInflectionByCatInflCount_)); long inflection = Long.parseLong( inflectionInflList_.elementAt(curInflectionByCatInflCount_)); curInflectionByCatInflCount_++; outs = ToInflectionByCatInfl.Mutate(in, conn_, ramTrieI_, lvgFlowSpecificOption_.GetInflectionFilter(), category, inflection, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.BASE_SPELLING_VARIANTS)) { outs = ToBaseSpellingVariants.Mutate(in, conn_, ramTrieI_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.RETRIEVE_EUI)) { outs = ToRetrieveEui.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.RETRIEVE_CAT_INFL)) { outs = ToRetrieveCatInfl.Mutate(in, conn_, ramTrieI_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.RETRIEVE_CAT_INFL_DB)) { outs = ToRetrieveCatInflDb.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.RETRIEVE_CAT_INFL_BEGIN)) { outs = ToRetrieveCatInflBegin.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.SYNONYMS)) { outs = ToSynonyms.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FILTER)) { outs = ToFilter.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FILTER_PROPER_NOUN)) { outs = ToFilterProperNoun.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FILTER_ACRONYM)) { outs = ToFilterAcronym.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_AMBIGUITY_TAGS)) { outs = ToStripAmbiguityTags.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.UNINVERT)) { outs = ToUninvert.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.CONVERT_OUTPUT)) { outs = ToConvertOutput.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.RECURSIVE_SYNONYMS)) { outs = ToRecursiveSynonyms.Mutate(in, conn_, detailsFlag_, mutateFlag_, false); } else if(flowNum == Flow.GetBitValue(Flow.RECURSIVE_DERIVATIONS)) { outs = ToRecursiveDerivations.Mutate(in, conn_, ramTrieD_, lvgFlowSpecificOption_.GetDerivationFilter(), detailsFlag_, mutateFlag_, false); } else if(flowNum == Flow.GetBitValue(Flow.CITATION)) { outs = ToCitation.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NORM_UNINFLECT_WORDS)) { outs = ToNormUninflectWords.Mutate(in, lvgFlowSpecificOption_.GetMaxPermuteTermNum(), conn_, ramTrieI_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_DIACRITICS)) { outs = ToStripDiacritics.Mutate(in, diacriticMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.METAPHONE)) { outs = ToMetaphone.Mutate(in, lvgFlowSpecificOption_.GetMaxMetaphoneCodeLength(), detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FRUITFUL_VARIANTS)) { outs = ToFruitfulVariants.Mutate(in, conn_, ramTrieI_, ramTrieD_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.TOKENIZE_KEEP_ALL)) { outs = ToTokenizeKeepAll.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.SYNTACTIC_UNINVERT)) { outs = ToSyntacticUninvert.Mutate(in, nonInfoWords_, conjunctionWords_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FRUITFUL_VARIANTS_LEX)) { outs = ToFruitfulVariantsLex.Mutate(in, conn_, ramTrieI_, ramTrieD_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FRUITFUL_VARIANTS_DB)) { outs = ToFruitfulVariantsDb.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.ANTINORM)) { outs = ToAntiNorm.Mutate(in, lvgFlowSpecificOption_.GetMaxPermuteTermNum(), stopWords_, conn_, ramTrieI_, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, nonStripMap_, removeSTree_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.WORD_SIZE)) { outs = ToWordSize.Mutate(in, wordSize_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.FRUITFUL_ENHANCED)) { outs = ToFruitfulEnhanced.Mutate(in, conn_, ramTrieI_, ramTrieD_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.SIMPLE_INFLECTIONS)) { outs = ToSimpleInflections.Mutate(in, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.INFLECTION_SIMPLE)) { outs = ToInflectionSimple.Mutate(in, conn_, ramTrieI_, lvgFlowSpecificOption_.GetInflectionFilter(), detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.SPLIT_LIGATURES)) { outs = ToSplitLigatures.Mutate(in, ligatureMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.GET_UNICODE_NAME)) { outs = ToGetUnicodeNames.Mutate(in, startTag_, endTag_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.GET_UNICODE_SYNONYM)) { outs = ToGetUnicodeSynonyms.Mutate(in, unicodeSynonymMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NORM_UNICODE)) { outs = ToNormUnicode.Mutate(in, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, startTag_, endTag_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NORM_UNICODE_WITH_SYNONYM)) { outs = ToNormUnicodeWithSynonym.Mutate(in, unicodeSynonymMap_, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, startTag_, endTag_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.NOMINALIZATION)) { outs = ToNominalization.Mutate(in, conn_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.REMOVE_S)) { outs = ToRemoveS.Mutate(in, removeSTree_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.MAP_SYMBOL_TO_ASCII)) { outs = ToMapSymbolToAscii.Mutate(in, symbolMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.MAP_UNICODE_TO_ASCII)) { outs = ToMapUnicodeToAscii.Mutate(in, unicodeMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.UNICODE_CORE_NORM)) { outs = ToUnicodeCoreNorm.Mutate(in, symbolMap_, unicodeMap_, ligatureMap_, diacriticMap_, detailsFlag_, mutateFlag_); } else if(flowNum == Flow.GetBitValue(Flow.STRIP_MAP_UNICODE)) { outs = ToStripMapUnicode.Mutate(in, nonStripMap_, detailsFlag_, mutateFlag_); } return outs; } // private methods private void Process(String line, boolean toStringFlag) throws SQLException, IOException { originalTerm_ = line; // Input filter: get the term from appropriate field String fs = GlobalBehavior.GetFieldSeparator(); String inTerm = InputFilter.GetInputTerm(line, fs, termFieldNum_); long inCat = InputFilter.GetInputCategory(line, fs, catFieldNum_); long inInfl = InputFilter.GetInputInflection(line, fs, inflFieldNum_); // go through all parallel flows for(int flowNum = 0; flowNum < flowStrsList_.size(); flowNum++) { LexItem in = new LexItem(inTerm, inCat, inInfl); flowStrs_ = flowStrsList_.elementAt(flowNum); // temp ins & outs for each flow Vector ins = new Vector(); // input Vector outs = new Vector(); // output results ins.addElement(in); // first flow component // go through all flow compoments in a flow for(int flowComp = 0; flowComp < flowStrs_.size(); flowComp++) { // go through all LexItems generated from flow components Vector cur = new Vector(); for(int j = 0; j < ins.size(); j++) { LexItem tempIn = ins.elementAt(j); tempIn.SetFlowNumber(flowNum+1); // set flow number Vector tempOuts = ExecuteFlow(tempIn, flowStrs_.elementAt(flowComp)); cur.addAll(tempOuts); } // Update outs outs.removeAllElements(); outs.addAll(cur); // convert results from one flow component into ins for next ins = LexItem.TargetsToSources(outs); } // Output Filter Options String outStr = OutputFilter.ExecuteOutputFilter(outs, mutateFlag_, detailsFlag_, fs, originalTerm_, inTerm, lvgOutputOption_); // print results Out.Print(outWriter_, outStr, fileOutput_, toStringFlag); } } // check if category and inflection value are legal in -f:ici private boolean CheckInflectionByCatInfl() { boolean legalFlag = true; for(int i = 0; i < curInflectionByCatInflNum_; i++) { // category String catStr = inflectionCatList_.elementAt(i); try { Long.parseLong(catStr); } catch (Exception e) { if(catStr.equalsIgnoreCase("all") == true) { inflectionCatList_.setElementAt( Long.toString(Category.ALL_BIT_VALUE), i); } else { System.err.println("** Error: Illegal category value (" + catStr + ") for -f:ici."); legalFlag = false; } } // inflection String inflStr = inflectionInflList_.elementAt(i); try { Long.parseLong(inflStr); } catch (Exception e) { if(inflStr.equalsIgnoreCase("all") == true) { inflectionInflList_.setElementAt( Long.toString(Inflection.ALL_BIT_VALUE), i); } else { System.err.println("** Error: Illegal inflection value (" + inflStr + ") for -f:ici."); legalFlag = false; } } } return legalFlag; } private static Vector GetOptions(String inStr) { Vector out = new Vector(); StringTokenizer buf = new StringTokenizer(inStr, " \t"); while(buf.hasMoreTokens() == true) { out.addElement(buf.nextToken()); } return out; } private void Init() { PreProcess(); // Init Quit String List //quitStrList_.addElement("q"); //quitStrList_.addElement("quit"); // Init config vars InitConfigVars(); // Init Database and Persistant Trie if(runFlag_ == true) { InitDbAndTrie(); } } // This method must be call after the optionStr is set and before Mutate private void PreProcess() { // go through all options int oldFlowNum = 0; Vector args = GetOptions(option_.GetOptionStr()); // Reset the pararell flows flowStrsList_.removeAllElements(); // reset fieldList lvgOutputOption_.GetOutputFieldList().removeAllElements(); // go through all options for(int i = 0; i < args.size(); i++) { String temp = args.elementAt(i); Option io = new Option(temp); // check if it is a new flow if(IsNewFlow(temp) == true) { flowNum_++; flowStrs_ = new Vector(); } // Decode input option to form options ExecuteCommands(io, GetOption()); // update flowStrsList_ if((flowNum_ > 0) && (flowNum_ != oldFlowNum)) { // update flow String list flowStrsList_.addElement(flowStrs_); oldFlowNum = flowNum_; } } } // open database connection and persistent tries private void InitDbAndTrie() { int minTermLen = Integer.parseInt( conf_.GetConfiguration(Configuration.MIN_TERM_LENGTH)); String lvgDir = conf_.GetConfiguration(Configuration.LVG_DIR); int minTrieStemLength = Integer.parseInt( conf_.GetConfiguration(Configuration.DIR_TRIE_STEM_LENGTH)); try { if((dbFlag_ == true) && (conn_ == null)) { conn_ = DbBase.OpenConnection(conf_); // connect to DB } if(ramTrieI_ == null) { ramTrieI_ = new RamTrie(true, minTermLen, lvgDir, 0); } if(ramTrieD_ == null) { ramTrieD_ = new RamTrie(false, minTermLen, lvgDir, minTrieStemLength); } } catch (Exception e) { e.printStackTrace(); } } private void InitConfigVars() { // get config file from environment variable boolean useClassPath = false; if(configFile_ == null) { useClassPath = true; configFile_ = "data.config.lvg"; } // read in configuration file conf_ = new Configuration(configFile_, useClassPath); // overwrite properties to configuration if(properties_ != null) { conf_.OverwriteProperties(properties_); } // Init varaibles that defines in configuration file if(lvgOutputOption_.GetOutRecordNum() == -1) { lvgOutputOption_.SetOutRecordNum(Integer.parseInt( conf_.GetConfiguration(Configuration.MAX_RESULT))); } if(lvgOutputOption_.GetNoOutputStr() == null) { lvgOutputOption_.SetNoOutputStr( conf_.GetConfiguration(Configuration.NO_OUTPUT)); } if(lvgOutputOption_.GetMarkEndStr() == null) { lvgOutputOption_.SetMarkEndStr( conf_.GetConfiguration(Configuration.CCGI)); } // set default prompt if(Platform.IsWindow() == true) { promptStr_ = "- Please input a term (type \"Ctl-z\" then \"Enter\" to quit) >"; } else { promptStr_ = "- Please input a term (type \"Ctl-d\" to quit) >"; } // read in the prompt from config file if(conf_.GetConfiguration(Configuration.LVG_PROMPT).equals("DEFAULT") == false) { promptStr_ = conf_.GetConfiguration(Configuration.LVG_PROMPT); } if(lvgFlowSpecificOption_.GetMaxPermuteTermNum() == -1) { lvgFlowSpecificOption_.SetMaxPermuteTermNum(Integer.parseInt( conf_.GetConfiguration(Configuration.MAX_UNINFLS))); } if(lvgFlowSpecificOption_.GetMaxMetaphoneCodeLength() == -1) { lvgFlowSpecificOption_.SetMaxMetaphoneCodeLength(Integer.parseInt( conf_.GetConfiguration(Configuration.MAX_METAPHONE))); } if(stopWords_ == null) { stopWords_ = ToStripStopWords.GetStopWordsFromFile(conf_); } if(nonInfoWords_ == null) { nonInfoWords_ = ToSyntacticUninvert.GetNonInfoWordsFromFile(conf_); } if(conjunctionWords_ == null) { conjunctionWords_ = ToSyntacticUninvert.GetConjunctionWordsFromFile(conf_); } if(diacriticMap_ == null) { diacriticMap_ = ToStripDiacritics.GetDiacriticMapFromFile(conf_); } if(ligatureMap_ == null) { ligatureMap_ = ToSplitLigatures.GetLigatureMapFromFile(conf_); } if(startTag_ == null) { startTag_ = conf_.GetConfiguration(Configuration.START_TAG); } if(endTag_ == null) { endTag_ = conf_.GetConfiguration(Configuration.END_TAG); } if(unicodeSynonymMap_ == null) { unicodeSynonymMap_ = ToGetUnicodeSynonyms.GetUnicodeSynonymMapFromFile(conf_); } if(removeSTree_ == null) { removeSTree_ = ToRemoveS.GetRTrieTreeFromFile(conf_); } if(symbolMap_ == null) { symbolMap_ = ToMapSymbolToAscii.GetSymbolMapFromFile(conf_); } if(unicodeMap_ == null) { unicodeMap_ = ToMapUnicodeToAscii.GetUnicodeMapFromFile(conf_); } if(nonStripMap_ == null) { nonStripMap_ = ToStripMapUnicode.GetNonStripMapFromFile(conf_); } } private void Close() throws IOException, SQLException { if((outWriter_ != null) && (fileOutput_ == true)) { outWriter_.close(); } if(inReader_ != null) { inReader_.close(); } if(conn_ != null) { DbBase.CloseConnection(conn_, conf_); // close db connection } } // check if the option means another new flow (parallel) private static boolean IsNewFlow(String option) { boolean flag = false; if((option.length() >= 3) && (option.substring(0, 3).equals("-f:") == true)) { flag = true; } return flag; } // data member protected Vector quitStrList_ = new Vector(); //quiting str protected boolean runFlag_ = true; // flag for running LVG protected static BufferedReader inReader_ = null; // infile buffer private static BufferedWriter outWriter_ = null; // outfile buffer private static boolean fileOutput_ = false; // flag for file output protected Vector flowStrs_ = new Vector(); protected Vector> flowStrsList_ = new Vector>(); private boolean dbFlag_ = false; // flag for connecting to DB private Option option_ = new Option(""); // input option private String promptStr_ = null; private String originalTerm_ = null; // orignal input // input filter options protected int termFieldNum_ = 1; // field num for input term protected int catFieldNum_ = -1; // field num for input cat protected int inflFieldNum_ = -1; // field num for input infl // output options private LvgOutputOption lvgOutputOption_ = new LvgOutputOption(); // flow specific options private LvgFlowSpecificOption lvgFlowSpecificOption_ = new LvgFlowSpecificOption(); // global behavior options protected boolean promptFlag_ = false; // flag for display prompt private int wordSize_ = 2; // word size private boolean detailsFlag_ = false; // flag for details print private boolean mutateFlag_ = false; // flag for mutate print // for q3 flow component private String startTag_ = null; private String endTag_ = null; // for di flow compoment private Vector derivationCatList_ = new Vector(); private int curDerivationCatNum_ = 0; // for ici flow compoment private Vector inflectionCatList_ = new Vector(); private Vector inflectionInflList_ = new Vector(); private int curInflectionByCatInflCount_ = 0; private int curInflectionByCatInflNum_ = 0; private int flowNum_ = 0; // flow number private Connection conn_ = null; // database connection private RamTrie ramTrieI_ = null; // Ram trie: inflection private RamTrie ramTrieD_ = null; // Ram trie: derivation private String configFile_ = null; // configuration related vars private Configuration conf_ = null; private Hashtable properties_ = null; // overwrite properties private Vector stopWords_ = null; private Vector nonInfoWords_ = null; private Vector conjunctionWords_ = null; private Hashtable diacriticMap_ = null; private Hashtable ligatureMap_ = null; private Hashtable unicodeSynonymMap_ = null; private Hashtable symbolMap_ = null; private Hashtable unicodeMap_ = null; private Hashtable nonStripMap_ = null; private RTrieTree removeSTree_ = null; // ram trie tree: remove S rules static { try { outWriter_ = new BufferedWriter(new OutputStreamWriter( System.out, "UTF-8")); } catch (IOException e) { System.err.println("**Error: problem of opening Std-out."); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy