edu.stanford.nlp.ie.util.RelationTriple Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
The newest version!
package edu.stanford.nlp.ie.util;
import java.text.DecimalFormat;
import java.util.*;
import java.util.function.ToIntFunction;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.PriorityQueue;
import edu.stanford.nlp.util.*;
import static edu.stanford.nlp.util.logging.Redwood.Util.err;
/**
* A (subject, relation, object) triple; e.g., as used in the KBP challenges or in OpenIE systems.
*
* @author Gabor Angeli
*/
@SuppressWarnings("UnusedDeclaration")
public class RelationTriple implements Comparable, Iterable {
/** The subject (first argument) of this triple */
public final List subject;
/** The subject (first argument) of this triple, in its canonical mention (i.e., coref resolved) */
public final List canonicalSubject;
/**
* The relation (second argument) of this triple.
* Note that this is only the part of the relation that can be grounded in the sentence itself.
* Often, for a standalone readable relation string, you want to attach additional modifiers
* otherwise stored in the dependnecy arc.
* Therefore, for getting a String form of the relation, we recommend using
* {@link RelationTriple#relationGloss} or {@link RelationTriple#relationLemmaGloss}.
*/
public final List relation;
/** The object (third argument) of this triple */
public final List object;
/** The object (third argument) of this triple, in its canonical mention (i.e., coref resolved). */
public final List canonicalObject;
/** A marker for the relation expressing a tmod not grounded in a word in the sentence. */
private boolean istmod = false;
/** A marker for the relation expressing a prefix "be" not grounded in a word in the sentence. */
private boolean prefixBe = false;
/** A marker for the relation expressing a suffix "be" not grounded in a word in the sentence. */
private boolean suffixBe = false;
/** A marker for the relation expressing a suffix "of" not grounded in a word in the sentence. */
private boolean suffixOf = false;
/** An optional score (confidence) for this triple */
public final double confidence;
/**
* Create a new triple with known values for the subject, relation, and object.
* For example, "(cats, play with, yarn)"
* @param subject The subject of this triple; e.g., "cats".
* @param relation The relation of this triple; e.g., "play with".
* @param object The object of this triple; e.g., "yarn".
*/
public RelationTriple(List subject, List relation, List object,
double confidence) {
this.subject = subject;
this.canonicalSubject = subject;
this.relation = relation;
this.object = object;
this.canonicalObject = object;
this.confidence = confidence;
}
/**
* @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double)
*/
public RelationTriple(List subject, List relation, List object) {
this(subject, relation, object, 1.0);
}
/**
* Create a new triple with known values for the subject, relation, and object.
* For example, "(cats, play with, yarn)"
* @param subject The subject of this triple; e.g., "cats".
* @param relation The relation of this triple; e.g., "play with".
* @param object The object of this triple; e.g., "yarn".
*/
public RelationTriple(List subject,
List canonicalSubject,
List relation,
List object,
List canonicalObject,
double confidence) {
this.subject = subject;
this.canonicalSubject = canonicalSubject;
this.relation = relation;
this.object = object;
this.canonicalObject = canonicalObject;
this.confidence = confidence;
}
/**
* @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double)
*/
public RelationTriple(List subject,
List canonicalSubject,
List relation,
List canonicalObject,
List object) {
this(subject, canonicalSubject, relation, object, canonicalObject, 1.0);
}
/**
* Returns all the tokens in the extraction, in the order subject then relation then object.
*/
public List allTokens() {
List allTokens = new ArrayList<>();
allTokens.addAll(canonicalSubject);
allTokens.addAll(relation);
allTokens.addAll(canonicalObject);
return allTokens;
}
/** The subject of this relation triple, as a String */
public String subjectGloss() {
return StringUtils.join(canonicalSubject.stream().map(CoreLabel::word), " ");
}
/** The head of the subject of this relation triple. */
public CoreLabel subjectHead() {
return subject.get(subject.size() - 1);
}
/** The entity link of the subject */
public String subjectLink() {
return subjectLemmaGloss();
}
/**
* The subject of this relation triple, as a String of the subject's lemmas.
* This method will additionally strip out punctuation as well.
*/
public String subjectLemmaGloss() {
return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}
/** The object of this relation triple, as a String */
public String objectGloss() {
return StringUtils.join(canonicalObject.stream().map(CoreLabel::word), " ");
}
/** The head of the object of this relation triple. */
public CoreLabel objectHead() {
return object.get(object.size() - 1);
}
/** The entity link of the subject */
public String objectLink() {
return objectLemmaGloss();
}
/**
* The object of this relation triple, as a String of the object's lemmas.
* This method will additionally strip out punctuation as well.
*/
public String objectLemmaGloss() {
return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}
/**
* The relation of this relation triple, as a String
*/
public String relationGloss() {
String relationGloss = (
(prefixBe ? "is " : "")
+ StringUtils.join(relation.stream().map(CoreLabel::word), " ")
+ (suffixBe ? " is" : "")
+ (suffixOf ? " of" : "")
+ (istmod ? " at_time" : "")
).trim();
// Some cosmetic tweaks
if ("'s".equals(relationGloss)) {
return "has";
} else {
return relationGloss;
}
}
/**
* The relation of this relation triple, as a String of the relation's lemmas.
* This method will additionally strip out punctuation as well, and lower-cases the relation.
*/
public String relationLemmaGloss() {
// Construct a human readable relation string
String relationGloss = (
(prefixBe ? "be " : "")
+ StringUtils.join(relation.stream()
.filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
.map(x -> x.lemma() == null ? x.word() : x.lemma()),
" ")
.toLowerCase()
+ (suffixBe ? " be" : "")
+ (suffixOf ? " of" : "")
+ (istmod ? " at_time" : "")
).trim();
// Some cosmetic tweaks
if ("'s".equals(relationGloss)) {
return "have";
} else {
return relationGloss;
}
}
/** The head of the relation of this relation triple. This is usually the main verb. */
public CoreLabel relationHead() {
return relation.stream()
.filter(x -> x.tag().startsWith("V"))
.reduce((x, y) -> y)
.orElse(relation.get(relation.size() - 1));
}
/** A textual representation of the confidence. */
public String confidenceGloss() {
return new DecimalFormat("0.000").format(confidence);
}
private static Pair getSpan(List tokens, ToIntFunction toMin, ToIntFunction toMax) {
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
for (CoreLabel token : tokens) {
min = Math.min(min, toMin.applyAsInt(token));
max = Math.max(max, toMax.applyAsInt(token) + 1);
}
return Pair.makePair(min, max);
}
/**
* Gets the span of the NON-CANONICAL subject.
*/
public Pair subjectTokenSpan() {
return getSpan(subject, x -> x.index() - 1, x -> x.index() - 1);
}
/**
* Get a representative span for the relation expressed by this triple.
*
* This is a bit more complicated than the subject and object spans, as the relation
* span is occasionally discontinuous.
* If this is the case, this method returns the largest contiguous chunk.
* If the relation span is empty, return the object span.
*/
public Pair relationTokenSpan() {
if (relation.isEmpty()) {
return objectTokenSpan();
} else if (relation.size() == 1) {
return Pair.makePair(relation.get(0).index() - 1, relation.get(0).index());
} else {
// Variables to keep track of the longest chunk
int longestChunk = 0;
int longestChunkStart = 0;
int thisChunk = 1;
int thisChunkStart = 0;
// Find the longest chunk
for (int i = 1; i < relation.size(); ++i) {
CoreLabel token = relation.get(i);
CoreLabel lastToken = relation.get(i - 1);
if (lastToken.index() + 1 == token.index()) {
thisChunk += 1;
} else if (lastToken.index() + 2 == token.index()) {
thisChunk += 2; // a skip of one character is _usually_ punctuation
} else {
if (thisChunk > longestChunk) {
longestChunk = thisChunk;
longestChunkStart = thisChunkStart;
}
thisChunkStart = i;
thisChunk = 1;
}
}
// (subcase: the last chunk is the longest)
if (thisChunk > longestChunk) {
longestChunk = thisChunk;
longestChunkStart = thisChunkStart;
}
// Return the longest chunk
return Pair.makePair(
relation.get(longestChunkStart).index() - 1,
relation.get(longestChunkStart).index() - 1 + longestChunk
);
}
}
/**
* Gets the span of the NON-CANONICAL object.
*/
public Pair objectTokenSpan() {
return getSpan(object, x -> x.index() - 1, x -> x.index() - 1);
}
/**
* If true, this relation expresses a "to be" relation.
*
* For example, "President Obama" expresses the relation
* (Obama; be; President).
*/
public boolean isPrefixBe() {
return this.prefixBe;
}
/**
* Set the value of this relation triple expressing a "to be" relation.
*
* @param newValue The new value of this relation being a "to be" relation.
* @return The old value of whether this relation expressed a "to be" relation.
*/
public boolean isPrefixBe(boolean newValue) {
boolean oldValue = this.prefixBe;
this.prefixBe = newValue;
return oldValue;
}
/**
* If true, this relation expresses a "to be" relation (with the be at the end of the sentence).
*
* For example, "Tim's father Tom" expresses the relation
* (Tim; 's father is; Tom).
*/
public boolean isSuffixBe() {
return this.suffixBe;
}
/**
* Set the value of this relation triple expressing a "to be" relation (suffix).
*
* @param newValue The new value of this relation being a "to be" relation.
* @return The old value of whether this relation expressed a "to be" relation.
*/
public boolean isSuffixBe(boolean newValue) {
boolean oldValue = this.suffixBe;
this.suffixBe = newValue;
return oldValue;
}
/**
* If true, this relation has an ungrounded "of" at the end of the relation.
*
* For example, "United States president Barack Obama" expresses the relation
* (Obama; is president of; United States).
*/
public boolean isSuffixOf() {
return this.suffixOf;
}
/**
* Set the value of this triple missing an ungrounded "of" in the relation string.
*
* @param newValue The new value of this relation missing an "of".
* @return The old value of whether this relation missing an "of".
*/
public boolean isSuffixOf(boolean newValue) {
boolean oldValue = this.suffixOf;
this.suffixOf = newValue;
return oldValue;
}
/**
* If true, this relation expresses a tmod (temporal modifier) relation that is not grounded in
* the sentence.
*
* For example, "I went to the store Friday" would otherwise yield a strange triple
* (I; go to store; Friday).
*/
public boolean istmod() {
return this.istmod;
}
/**
* Set the value of this relation triple expressing a tmod (temporal modifier) relation.
*
* @param newValue The new value of this relation being a tmod relation.
* @return The old value of whether this relation expressed a tmod relation.
*/
public boolean istmod(boolean newValue) {
boolean oldValue = this.istmod;
this.istmod = newValue;
return oldValue;
}
/** An optional method, returning the dependency tree this triple was extracted from */
public Optional asDependencyTree() {
return Optional.empty();
}
/** Return the given relation triple as a flat sentence */
public List asSentence() {
PriorityQueue orderedSentence = new FixedPrioritiesPriorityQueue<>();
double defaultIndex = 0.0;
for (CoreLabel token : subject) {
orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
defaultIndex += 1.0;
}
for (CoreLabel token : relation) {
orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
defaultIndex += 1.0;
}
for (CoreLabel token : object) {
orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
defaultIndex += 1.0;
}
return orderedSentence.toSortedList();
}
/** {@inheritDoc} */
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof RelationTriple)) return false;
RelationTriple that = (RelationTriple) o;
return object.equals(that.object) && relation.equals(that.relation) && subject.equals(that.subject);
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return toString().hashCode(); // Faster than checking CoreLabels
// int result = subject.hashCode();
// result = 31 * result + relation.hashCode();
// result = 31 * result + object.hashCode();
// return result;
}
/** Print a human-readable description of this relation triple, as a tab-separated line. */
@Override
public String toString() {
return String.valueOf(this.confidence) + '\t' + subjectGloss() + '\t' + relationGloss() + '\t' + objectGloss();
}
/** Print in the format expected by Gabriel Stanovsky and Ido Dagan, Creating a Large Benchmark for Open
* Information Extraction, EMNLP 2016. https://gabrielstanovsky.github.io/assets/papers/emnlp16a/paper.pdf ,
* with equivalence classes.
*/
public String toQaSrlString(CoreMap sentence) {
String equivalenceClass = subjectHead().index() + "." + relationHead().index() + '.' + objectHead().index();
return equivalenceClass + '\t' +
subjectGloss().replace('\t', ' ') + '\t' +
relationGloss().replace('\t', ' ') + '\t' +
objectGloss().replace('\t', ' ') + '\t' +
confidence + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ");
}
/** Print a description of this triple, formatted like the ReVerb outputs. */
@SuppressWarnings("Duplicates")
public String toReverbString(String docid, CoreMap sentence) {
int sentIndex = -1;
int subjIndex = -1;
int relationIndex = -1;
int objIndex = -1;
int subjIndexEnd = -1;
int relationIndexEnd = -1;
int objIndexEnd = -1;
if (!relation.isEmpty()) {
sentIndex = relation.get(0).sentIndex();
relationIndex = relation.get(0).index() - 1;
relationIndexEnd = relation.get(relation.size() - 1).index();
}
if ( ! subject.isEmpty()) {
if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
subjIndex = subject.get(0).index() - 1;
subjIndexEnd = subject.get(subject.size() - 1).index();
}
if ( ! object.isEmpty()) {
if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
objIndex = object.get(0).index() - 1;
objIndexEnd = object.get(object.size() - 1).index();
}
return (docid == null ? "no_doc_id" : docid) + '\t' +
sentIndex + '\t' +
subjectGloss().replace('\t', ' ') + '\t' +
relationGloss().replace('\t', ' ') + '\t' +
objectGloss().replace('\t', ' ') + '\t' +
subjIndex + '\t' +
subjIndexEnd+ '\t' +
relationIndex + '\t' +
relationIndexEnd + '\t' +
objIndex + '\t' +
objIndexEnd + '\t' +
confidenceGloss() + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + '\t' +
subjectLemmaGloss().replace('\t', ' ') + '\t' +
relationLemmaGloss().replace('\t', ' ') + '\t' +
objectLemmaGloss().replace('\t', ' ');
}
@Override
public int compareTo(RelationTriple o) {
return Double.compare(this.confidence, o.confidence);
}
@SuppressWarnings("unchecked")
@Override
public Iterator iterator() {
return CollectionUtils.concatIterators(subject.iterator(), relation.iterator(), object.iterator());
}
/**
* A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with the tree saved as well.
*/
public static class WithTree extends RelationTriple {
public final SemanticGraph sourceTree;
/**
* Create a new triple with known values for the subject, relation, and object.
* For example, "(cats, play with, yarn)"
*
* @param subject The subject of this triple; e.g., "cats".
* @param relation The relation of this triple; e.g., "play with".
* @param object The object of this triple; e.g., "yarn".
* @param tree The tree this extraction was created from; we create a deep copy of the tree.
*/
public WithTree(List subject, List relation, List object, SemanticGraph tree,
double confidence) {
super(subject, relation, object, confidence);
this.sourceTree = new SemanticGraph(tree);
}
/**
* Create a new triple with known values for the subject, relation, and object,
* along with their canonical spans (i.e., resolving coreference)
* For example, "(cats, play with, yarn)"
*/
public WithTree(List subject,
List canonicalSubject,
List relation,
List object,
List canonicalObject,
double confidence,
SemanticGraph tree) {
super(subject, canonicalSubject, relation, object, canonicalObject, confidence);
this.sourceTree = tree;
}
/** The head of the subject of this relation triple. */
@Override
public CoreLabel subjectHead() {
if (subject.size() == 1) { return subject.get(0); }
Span subjectSpan = Span.fromValues(subject.get(0).index(), subject.get(subject.size() - 1).index());
for (int i = subject.size() - 1; i >= 0; --i) {
for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(subject.get(i)))) {
if (edge.getGovernor().index() < subjectSpan.start() || edge.getGovernor().index() >= subjectSpan.end()) {
return subject.get(i);
}
}
}
return subject.get(subject.size() - 1);
}
/** The head of the object of this relation triple. */
@Override
public CoreLabel objectHead() {
if (object.size() == 1) { return object.get(0); }
Span objectSpan = Span.fromValues(object.get(0).index(), object.get(object.size() - 1).index());
for (int i = object.size() - 1; i >= 0; --i) {
for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(object.get(i)))) {
if (edge.getGovernor().index() < objectSpan.start() || edge.getGovernor().index() >= objectSpan.end()) {
return object.get(i);
}
}
}
return object.get(object.size() - 1);
}
/** The head of the relation of this relation triple. */
@Override
public CoreLabel relationHead() {
if (relation.size() == 1) { return relation.get(0); }
CoreLabel guess = null;
CoreLabel newGuess = super.relationHead();
int iters = 0; // make sure we don't infinite loop...
while (guess != newGuess && iters < 100) {
guess = newGuess;
iters += 1;
for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(guess))) {
// find a node in the relation list which is a governor of the candidate root
Optional governor = relation.stream().filter(x -> x.index() == edge.getGovernor().index()).findFirst();
// if we found one, this is the new root. The for loop continues
if (governor.isPresent()) {
newGuess = governor.get();
}
}
}
// Return
if (iters >= 100) {
err("Likely cycle in relation tree");
}
return guess;
}
/** {@inheritDoc} */
@Override
public Optional asDependencyTree() {
return Optional.of(sourceTree);
}
}
/**
* A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with both the tree and the entity
* links saved as well.
*/
public static class WithLink extends WithTree {
/** The canonical entity link of the subject */
public final Optional subjectLink;
/** The canonical entity link of the object */
public final Optional objectLink;
/** Create a new relation triple */
public WithLink(List subject, List canonicalSubject, List relation, List object, List canonicalObject, double confidence,
SemanticGraph tree,
String subjectLink,
String objectLink
) {
super(subject, canonicalSubject, relation, object, canonicalObject, confidence, tree);
this.subjectLink = Optional.ofNullable(subjectLink);
this.objectLink = Optional.ofNullable(objectLink);
}
/** {@inheritDoc} */
@Override
public String subjectLink() {
return subjectLink.orElseGet(super::subjectLink);
}
/** {@inheritDoc} */
@Override
public String objectLink() {
return objectLink.orElseGet(super::objectLink);
}
}
}