All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nih.nlm.nls.lvg.Flows.ToSyntacticUninvert Maven / Gradle / Ivy

The newest version!
package gov.nih.nlm.nls.lvg.Flows;
import java.util.*;
import java.io.*;
import gov.nih.nlm.nls.lvg.Util.*;
import gov.nih.nlm.nls.lvg.Lib.*;
/*****************************************************************************
* This class syntactic uninverts phrases.
*
* 

History: *

    *
* * @author NLM NLS Development Team * * @see * Design Document * * @version V-2010 ****************************************************************************/ public class ToSyntacticUninvert extends Transformation implements Cloneable { // public methods /** * Performs the mutation of this flow component. * * @param in a LexItem as the input for this flow component * @param nonInfoWords non-information words to be stripped * @param conjunctionWords conjuction words * @param detailsFlag a boolean flag for processing details information * @param mutateFlag a boolean flag for processing mutate information * * @return Vector - the results from this flow component * of LexItems */ public static Vector Mutate(LexItem in, Vector nonInfoWords, Vector conjunctionWords, boolean detailsFlag, boolean mutateFlag) { String inStr = in.GetSourceTerm(); // Strip non-fino words String stripedStr = Strip.StripStrings(inStr, nonInfoWords, false); // mutate the term String term = Uninvert(stripedStr, conjunctionWords); // details & mutate String details = null; String mutate = null; if(detailsFlag == true) { details = INFO; } if(mutateFlag == true) { mutate = Transformation.NO_MUTATE_INFO; } // updatea target Vector out = new Vector(); LexItem temp = UpdateLexItem(in, term, Flow.SYNTACTIC_UNINVERT, Transformation.UPDATE, Transformation.UPDATE, details, mutate); out.addElement(temp); return out; } /** * read in non-information words from configuration file * * @param conf Configuratin object * * @return Vector - non information words */ public static Vector GetNonInfoWordsFromFile(Configuration conf) { String fName = conf.GetConfiguration(Configuration.LVG_DIR) + conf.GetConfiguration(Configuration.NONINFO_WORD_FILE); String line = null; Vector nonInfoWords = new Vector(); try // load non-info words from file { BufferedReader in = new BufferedReader(new FileReader(fName)); // read in line by line from a file while((line = in.readLine()) != null) { // skip the line if it is empty or comments (#) if((line.length() > 0) && (line.charAt(0) != '#')) { nonInfoWords.addElement(line); } } in.close(); } catch (Exception e) { System.err.println("Exception: " + e.toString()); System.err.println( "** Error: problem of opening/reading non-Info words file: '" + fName + "'."); } return nonInfoWords; } /** * Read in conjunction words from configuration file * * @param conf Configuratin object * * @return Vector - of conjunction words */ public static Vector GetConjunctionWordsFromFile(Configuration conf) { String fName = conf.GetConfiguration(Configuration.LVG_DIR) + conf.GetConfiguration(Configuration.CONJ_WORD_FILE); Vector conjunctionWords = new Vector(); String line = null; try // load conjunction words from file { BufferedReader in = new BufferedReader(new FileReader(fName)); // read in line by line from a file while((line = in.readLine()) != null) { // skip the line if it is empty or comments (#) if((line.length() > 0) && (line.charAt(0) != '#')) { conjunctionWords.addElement(line); } } in.close(); } catch (Exception e) { System.err.println("Exception: " + e.toString()); System.err.println( "** Error: problem of opening/reading conjunction words file: '" + fName + "'."); } return conjunctionWords; } /** * A unit test driver for this flow component. */ public static void main(String[] args) { // load config file Configuration conf = new Configuration("data.config.lvg", true); String testStr = GetTestStr(args, "Angioplasty, Transluminal, Percutaneous Coronary"); Vector nonInfoWords = GetNonInfoWordsFromFile(conf); Vector conjunctionWords = GetConjunctionWordsFromFile(conf); // mutate LexItem in = new LexItem(testStr); Vector outs = ToSyntacticUninvert.Mutate(in, nonInfoWords, conjunctionWords, true, true); PrintResults(in, outs); // print out results } // private methods // uninvert the input phrase around commas. private static String Uninvert(String inStr, Vector conjunctionWords) { Vector tokenList = new Vector(); // Use token class to put tokens into a Vector String delim = ","; StringTokenizer buf = new StringTokenizer(inStr, delim); boolean conjunctionFlag = false; while(buf.hasMoreTokens() == true) { String tempStr = buf.nextToken(); tokenList.addElement(tempStr); String firstWord = GetFirstWord(tempStr); if(conjunctionWords.contains(firstWord)) { conjunctionFlag = true; } } // Combine token together if they start with " " String lastStr = new String(); Vector list = new Vector(); for(int i = 0; i < tokenList.size(); i++) { String tempStr = tokenList.elementAt(i); // rearrange the token list if(tempStr.startsWith(" ") == true) // case of "xxx, xxx" { // if the word after comma is conjunction word if(conjunctionFlag == true) { lastStr += ", " + tempStr.trim(); } else { list.addElement(lastStr); lastStr = new String(tempStr.trim()); } } else // case of beginning or xxx,xxx { if(lastStr.length() == 0) // beginning { lastStr = tempStr.trim(); } else // put "," back { lastStr += "," + tempStr.trim(); } } } list.addElement(lastStr); // reform the out from the Vector StringBuffer buffer = new StringBuffer(); for(int i = list.size()-1; i >= 0; i--) { String tempStr = list.elementAt(i); buffer.append(tempStr); buffer.append(" "); } String outStr = buffer.toString(); return outStr.trim(); } private static String GetFirstWord(String inStr) { String delim = " \t"; StringTokenizer buf = new StringTokenizer(inStr, delim); return buf.nextToken(); } // data members private static final String INFO = "Syntactic Uninvert"; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy