semRewrite.datesandnumber.Utilities Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
package semRewrite.datesandnumber;
/*
Copyright 2014-2015 IPsoft
Author: Nagaraj Bhat [email protected]
Rashmi Rao
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
*/
import semRewrite.datesandnumber.DateInfo;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import java.util.*;
import java.util.regex.Pattern;
public class Utilities {
//HashMap dateMap = new LinkedHashMap();
public static final List MONTHS = new ArrayList(Arrays.asList("january",
"february","march","april","may","june","july","august",
"september","october","november","december"));
public static final List DAYS = new ArrayList(Arrays.asList("monday",
"tuesday","wednesday","thursday","friday","saturday","sunday"));
public static final List VerbTags = new ArrayList(Arrays.asList("VB",
"VBD","VBG","VBN","VBP","VBZ"));
public static final List nounTags = new ArrayList(Arrays.asList("NN","NNS","NNP","NNPS","/NN","/NNS","/NNP", "/NNPS"));
public static final Pattern sumoTermPattern = Pattern.compile("^([a-zA-Z]+)\\(([a-zA-Z\\-0-9]+)(\\s)?,(\\s)?([a-zA-Z(\\-)?0-9]+)\\)");
public static final Pattern cnfPattern = Pattern.compile("^([a-zA-Z]+)\\((.*(\\-)?[0-9]*)(\\s)?,(\\s)?(.*(\\-)?[0-9]*)\\)");
public static final List datesAndNumbersPredicates = new ArrayList(Arrays.asList("time","day","month"));
public static final List stopWords = new ArrayList(Arrays.asList("of",",","-"));
List sumoTerms = new LinkedList();
List datesList = new LinkedList();
SemanticGraph StanfordDependencies;
List lemmatizedResults = new ArrayList<>();
HashMap lemmaWordMap = new HashMap<>();
int timeCount = 1;
/** ***************************************************************
*/
public boolean containsIndexWord(String word) {
for (String verbTag: VerbTags) {
if (verbTag.contains(word)) {
return true;
}
}
return false;
}
/** ***************************************************************
*/
public String populateRootWord(int wordIndex) {
IndexedWord tempParent = StanfordDependencies.getNodeByIndex(wordIndex);
while (!tempParent.equals(StanfordDependencies.getFirstRoot())) {
tempParent = StanfordDependencies.getParent(tempParent);
if (containsIndexWord(tempParent.tag())) {
return tempParent.word()+"-"+tempParent.index();
}
}
return null;
}
/** ***************************************************************
*/
public void filterSumoTerms() {
Set hashsetList = new HashSet(sumoTerms);
sumoTerms.clear();
sumoTerms.addAll(hashsetList);
//List removableList = new ArrayList();
Set removableSumoTerms = new HashSet();
for (DateInfo d : datesList) {
if (d.isDuration()) {
//removableList.add("time-"+d.getTimeCount());
for(String sumoTerm : sumoTerms) {
if(sumoTerm.matches("^time\\(.*,time-"+d.getTimeCount()+"\\)$")) {
removableSumoTerms.add(sumoTerm);
}
}
}
}
sumoTerms.removeAll(removableSumoTerms);
}
}