edu.stanford.nlp.naturalli.SentenceFragment Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.naturalli;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* A representation of a sentence fragment.
*
* @author Gabor Angeli
*/
public class SentenceFragment {
/**
* The words in this sentence fragment (e.g., for use as the gloss of the fragment).
*/
public final List words = new ArrayList<>();
/**
* The parse tree for this sentence fragment.
*/
public final SemanticGraph parseTree;
/**
* The assumed truth of this fragment; this is relevant for what entailments are supported
*/
public final boolean assumedTruth;
/**
* A score for this fragment. This is 1.0 by default.
*/
public double score = 1.0;
public SentenceFragment(SemanticGraph tree, boolean assumedTruth, boolean copy) {
if (copy) {
this.parseTree = new SemanticGraph(tree);
} else {
this.parseTree = tree;
}
this.assumedTruth = assumedTruth;
words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
}
/** The length of this fragment, in words */
public int length() {
return words.size();
}
/**
* Changes the score of this fragment in place.
* @param score The new score of the fragment
* @return This sentence fragment.
*/
public SentenceFragment changeScore(double score) {
this.score = score;
return this;
}
/**
* Return the tokens in this fragment, but padded with null so that the index in this
* sentence matches the index of the parse tree.
*/
public List paddedWords() {
int maxIndex = -1;
for (IndexedWord vertex : parseTree.vertexSet()) {
maxIndex = Math.max(maxIndex, vertex.index());
}
List tokens = new ArrayList<>(maxIndex);
for (int i = 0; i < maxIndex; ++i) { tokens.add(null); }
for (CoreLabel token : this.words) {
tokens.set(token.index() - 1, token);
}
return tokens;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof SentenceFragment)) return false;
SentenceFragment that = (SentenceFragment) o;
return this.parseTree.vertexSet().equals((that.parseTree.vertexSet()));
}
@Override
public int hashCode() {
return this.parseTree.vertexSet().hashCode();
}
@Override
public String toString() {
List> glosses = new ArrayList<>();
for (CoreLabel word : words) {
// Add the word itself
glosses.add(Pair.makePair(word.word(), word.index() - 1));
String addedConnective = null;
// Find additional connectives
for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) {
String rel = edge.getRelation().toString();
if (rel.contains("_")) { // for Stanford dependencies only
addedConnective = rel.substring(rel.indexOf('_') + 1);
}
}
if (addedConnective != null) {
// Found a connective (e.g., a preposition or conjunction)
Pair yield = parseTree.yieldSpan(new IndexedWord(word));
glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1));
}
}
// Sort the sentence
Collections.sort(glosses, (a, b) -> a.second - b.second);
// Return the sentence
return StringUtils.join(glosses.stream().map(Pair::first), " ");
}
}