semRewrite.datesandnumber.StanfordDateTimeExtractor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
package semRewrite.datesandnumber;
/*
Copyright 2014-2015 IPsoft
Author: Nagaraj Bhat [email protected]
Rashmi Rao
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
*/
import semRewrite.datesandnumber.Tokens;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations.*;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class StanfordDateTimeExtractor {
public static List DATE_ENTITIES = new ArrayList(Arrays.asList("DATE", "TIME"));
public static List MEASURE_ENTITIES = new ArrayList(Arrays.asList(
"NUMBER", "PERCENT", "ORDINAL"));
private List dependencyList = new ArrayList();
private SemanticGraph dependencies;
private int tokenCount = 0;
/** ***************************************************************
*/
public List getDependencyList() {
return dependencyList;
}
/** ***************************************************************
*/
public int getTokenCount() {
return tokenCount;
}
/** ***************************************************************
*/
public SemanticGraph getDependencies() {
return dependencies;
}
/** ***************************************************************
*/
public void setDependencies(SemanticGraph dependencies) {
this.dependencies = dependencies;
}
/** ***************************************************************
* Calls the stanford parser and extracts the necessary information about the words in the string
* and stores them in Token object for further usage.
* @param input: The natural language string.
* @return List of Tokens.
*/
public List populateParserInfo(String inputSentence) {
Properties props = new Properties();
// props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
Annotation annotation;
annotation = new Annotation(inputSentence);
pipeline.annotate(annotation);
List sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
int sentenceCount = 0;
List tokenList = new ArrayList();
for (CoreMap sentence: sentences) {
tokenCount = 1;
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
String namedEntity = token.get(NamedEntityTagAnnotation.class);
if ((DATE_ENTITIES.contains(namedEntity)) || ((MEASURE_ENTITIES.contains(namedEntity))&& (token.get(PartOfSpeechAnnotation.class).equals("CD") || token.get(PartOfSpeechAnnotation.class).equals("JJ")))
|| (namedEntity.equals("DURATION") && token.get(PartOfSpeechAnnotation.class).equals("CD"))) {
Tokens tokens = new Tokens();
tokens.setId(tokenCount);
tokens.setWord(token.get(TextAnnotation.class));
tokens.setNer(token.get(NamedEntityTagAnnotation.class));
tokens.setNormalizedNer(token.get(NormalizedNamedEntityTagAnnotation.class));
tokens.setCharBegin(token.get(BeginIndexAnnotation.class));
tokens.setCharEnd(token.get(EndIndexAnnotation.class));
tokens.setPos(token.get(PartOfSpeechAnnotation.class));
tokens.setLemma(token.get(LemmaAnnotation.class));
tokenList.add(tokens);
}
tokenCount++;
}
dependencies = (sentence.get(CollapsedDependenciesAnnotation.class));
dependencyList = (StringUtils.split(dependencies.toList(), "\n"));
}
return tokenList;
}
}