edu.stanford.nlp.ie.util.RelationTriple Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
The newest version!
package edu.stanford.nlp.ie.util;

import java.text.DecimalFormat;
import java.util.*;
import java.util.function.ToIntFunction;

import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.PriorityQueue;
import edu.stanford.nlp.util.*;

import static edu.stanford.nlp.util.logging.Redwood.Util.err;


/**
 * A (subject, relation, object) triple; e.g., as used in the KBP challenges or in OpenIE systems.
 *
 * @author Gabor Angeli
 */
@SuppressWarnings("UnusedDeclaration")
public class RelationTriple implements Comparable, Iterable {

  /** The subject (first argument) of this triple */
  public final List subject;

  /** The subject (first argument) of this triple, in its canonical mention (i.e., coref resolved) */
  public final List canonicalSubject;

  /**
   * The relation (second argument) of this triple.
   * Note that this is only the part of the relation that can be grounded in the sentence itself.
   * Often, for a standalone readable relation string, you want to attach additional modifiers
   * otherwise stored in the dependnecy arc.
   * Therefore, for getting a String form of the relation, we recommend using
   * {@link RelationTriple#relationGloss} or {@link RelationTriple#relationLemmaGloss}.
   */
  public final List relation;

  /** The object (third argument) of this triple */
  public final List object;

  /** The object (third argument) of this triple, in its canonical mention (i.e., coref resolved). */
  public final List canonicalObject;

  /** A marker for the relation expressing a tmod not grounded in a word in the sentence. */
  private boolean istmod = false;
  /** A marker for the relation expressing a prefix "be" not grounded in a word in the sentence. */
  private boolean prefixBe = false;
  /** A marker for the relation expressing a suffix "be" not grounded in a word in the sentence. */
  private boolean suffixBe = false;
  /** A marker for the relation expressing a suffix "of" not grounded in a word in the sentence. */
  private boolean suffixOf = false;
  /** An optional score (confidence) for this triple */
  public final double confidence;

  /**
   * Create a new triple with known values for the subject, relation, and object.
   * For example, "(cats, play with, yarn)"
   * @param subject The subject of this triple; e.g., "cats".
   * @param relation The relation of this triple; e.g., "play with".
   * @param object The object of this triple; e.g., "yarn".
   */
  public RelationTriple(List subject, List relation, List object,
                        double confidence) {
    this.subject = subject;
    this.canonicalSubject = subject;
    this.relation = relation;
    this.object = object;
    this.canonicalObject = object;
    this.confidence = confidence;
  }

  /**
   * @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double)
   */
  public RelationTriple(List subject, List relation, List object) {
    this(subject, relation, object, 1.0);
  }

  /**
   * Create a new triple with known values for the subject, relation, and object.
   * For example, "(cats, play with, yarn)"
   * @param subject The subject of this triple; e.g., "cats".
   * @param relation The relation of this triple; e.g., "play with".
   * @param object The object of this triple; e.g., "yarn".
   */
  public RelationTriple(List subject,
                        List canonicalSubject,
                        List relation,
                        List object,
                        List canonicalObject,
                        double confidence) {
    this.subject = subject;
    this.canonicalSubject = canonicalSubject;
    this.relation = relation;
    this.object = object;
    this.canonicalObject = canonicalObject;
    this.confidence = confidence;
  }

  /**
   * @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double)
   */
  public RelationTriple(List subject,
                        List canonicalSubject,
                        List relation,
                        List canonicalObject,
                        List object) {
    this(subject, canonicalSubject, relation, object, canonicalObject, 1.0);
  }

  /**
   * Returns all the tokens in the extraction, in the order subject then relation then object.
   */
  public List allTokens() {
    List allTokens = new ArrayList<>();
    allTokens.addAll(canonicalSubject);
    allTokens.addAll(relation);
    allTokens.addAll(canonicalObject);
    return allTokens;
  }

  /** The subject of this relation triple, as a String */
  public String subjectGloss() {
    return StringUtils.join(canonicalSubject.stream().map(CoreLabel::word), " ");
  }

  /** The head of the subject of this relation triple. */
  public CoreLabel subjectHead() {
    return subject.get(subject.size() - 1);
  }

  /** The entity link of the subject */
  public String subjectLink() {
    return subjectLemmaGloss();
  }

  /**
   * The subject of this relation triple, as a String of the subject's lemmas.
   * This method will additionally strip out punctuation as well.
   */
   public String subjectLemmaGloss() {
    return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
  }

  /** The object of this relation triple, as a String */
  public String objectGloss() {
    return StringUtils.join(canonicalObject.stream().map(CoreLabel::word), " ");
  }

  /** The head of the object of this relation triple. */
  public CoreLabel objectHead() {
    return object.get(object.size() - 1);
  }

  /** The entity link of the subject */
  public String objectLink() {
    return objectLemmaGloss();
  }

  /**
   * The object of this relation triple, as a String of the object's lemmas.
   * This method will additionally strip out punctuation as well.
   */
  public String objectLemmaGloss() {
    return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
  }

  /**
   * The relation of this relation triple, as a String
   */
  public String relationGloss() {
    String relationGloss = (
        (prefixBe ? "is " : "")
        + StringUtils.join(relation.stream().map(CoreLabel::word), " ")
        + (suffixBe ? " is" : "")
        + (suffixOf ? " of" : "")
        + (istmod ? " at_time" : "")
    ).trim();
    // Some cosmetic tweaks
    if ("'s".equals(relationGloss)) {
      return "has";
    } else {
      return relationGloss;
    }
  }

  /**
   * The relation of this relation triple, as a String of the relation's lemmas.
   * This method will additionally strip out punctuation as well, and lower-cases the relation.
   */
  public String relationLemmaGloss() {
    // Construct a human readable relation string
    String relationGloss = (
        (prefixBe ? "be " : "")
        + StringUtils.join(relation.stream()
            .filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
            .map(x -> x.lemma() == null ? x.word() : x.lemma()),
          " ")
          .toLowerCase()
        + (suffixBe ? " be" : "")
        + (suffixOf ? " of" : "")
        + (istmod ? " at_time" : "")
    ).trim();
    // Some cosmetic tweaks
    if ("'s".equals(relationGloss)) {
      return "have";
    } else {
      return relationGloss;
    }
  }

  /** The head of the relation of this relation triple. This is usually the main verb. */
  public CoreLabel relationHead() {
    return relation.stream()
        .filter(x -> x.tag().startsWith("V"))
        .reduce((x, y) -> y)
        .orElse(relation.get(relation.size() - 1));
  }

  /** A textual representation of the confidence. */
  public String confidenceGloss() {
    return new DecimalFormat("0.000").format(confidence);
  }

  private static Pair getSpan(List tokens, ToIntFunction toMin, ToIntFunction toMax) {
    int min = Integer.MAX_VALUE;
    int max = Integer.MIN_VALUE;
    for (CoreLabel token : tokens) {
      min = Math.min(min, toMin.applyAsInt(token));
      max = Math.max(max, toMax.applyAsInt(token) + 1);
    }
    return Pair.makePair(min, max);
  }

  /**
   * Gets the span of the NON-CANONICAL subject.
   */
  public Pair subjectTokenSpan() {
    return getSpan(subject, x -> x.index() - 1, x -> x.index() - 1);
  }

  /**
   *   Get a representative span for the relation expressed by this triple.
   *
   *   This is a bit more complicated than the subject and object spans, as the relation
   *   span is occasionally discontinuous.
   *   If this is the case, this method returns the largest contiguous chunk.
   *   If the relation span is empty, return the object span.
   */
  public Pair relationTokenSpan() {
    if (relation.isEmpty()) {
      return objectTokenSpan();
    } else if (relation.size() == 1) {
      return Pair.makePair(relation.get(0).index() - 1, relation.get(0).index());
    } else {
      // Variables to keep track of the longest chunk
      int longestChunk = 0;
      int longestChunkStart = 0;
      int thisChunk = 1;
      int thisChunkStart = 0;
      // Find the longest chunk
      for (int i = 1; i < relation.size(); ++i) {
        CoreLabel token = relation.get(i);
        CoreLabel lastToken = relation.get(i - 1);
        if (lastToken.index() + 1 == token.index()) {
          thisChunk += 1;
        } else if (lastToken.index() + 2 == token.index()) {
          thisChunk += 2;  // a skip of one character is _usually_ punctuation
        } else {
          if (thisChunk > longestChunk) {
            longestChunk = thisChunk;
            longestChunkStart = thisChunkStart;
          }
          thisChunkStart = i;
          thisChunk = 1;
        }
      }
      // (subcase: the last chunk is the longest)
      if (thisChunk > longestChunk) {
        longestChunk = thisChunk;
        longestChunkStart = thisChunkStart;
      }
      // Return the longest chunk
      return Pair.makePair(
          relation.get(longestChunkStart).index() - 1,
          relation.get(longestChunkStart).index() - 1 + longestChunk
      );
    }
  }

  /**
   * Gets the span of the NON-CANONICAL object.
   */
  public Pair objectTokenSpan() {
    return getSpan(object, x -> x.index() - 1, x -> x.index() - 1);
  }

  /**
   * If true, this relation expresses a "to be" relation.
   *
   * For example, "President Obama" expresses the relation
   * (Obama; be; President).
   */
  public boolean isPrefixBe() {
    return this.prefixBe;
  }

  /**
   * Set the value of this relation triple expressing a "to be" relation.
   *
   * @param newValue The new value of this relation being a "to be" relation.
   * @return The old value of whether this relation expressed a "to be" relation.
   */
  public boolean isPrefixBe(boolean newValue) {
    boolean oldValue = this.prefixBe;
    this.prefixBe = newValue;
    return oldValue;
  }

  /**
   * If true, this relation expresses a "to be" relation (with the be at the end of the sentence).
   *
   * For example, "Tim's father Tom" expresses the relation
   * (Tim; 's father is; Tom).
   */
  public boolean isSuffixBe() {
    return this.suffixBe;
  }

  /**
   * Set the value of this relation triple expressing a "to be" relation (suffix).
   *
   * @param newValue The new value of this relation being a "to be" relation.
   * @return The old value of whether this relation expressed a "to be" relation.
   */
  public boolean isSuffixBe(boolean newValue) {
    boolean oldValue = this.suffixBe;
    this.suffixBe = newValue;
    return oldValue;
  }

  /**
   * If true, this relation has an ungrounded "of" at the end of the relation.
   *
   * For example, "United States president Barack Obama" expresses the relation
   * (Obama; is president of; United States).
   */
  public boolean isSuffixOf() {
    return this.suffixOf;
  }

  /**
   * Set the value of this triple missing an ungrounded "of" in the relation string.
   *
   * @param newValue The new value of this relation missing an "of".
   * @return The old value of whether this relation missing an "of".
   */
  public boolean isSuffixOf(boolean newValue) {
    boolean oldValue = this.suffixOf;
    this.suffixOf = newValue;
    return oldValue;
  }

  /**
   * If true, this relation expresses a tmod (temporal modifier) relation that is not grounded in
   * the sentence.
   *
   * For example, "I went to the store Friday" would otherwise yield a strange triple
   * (I; go to store; Friday).
   */
  public boolean istmod() {
    return this.istmod;
  }

  /**
   * Set the value of this relation triple expressing a tmod (temporal modifier) relation.
   *
   * @param newValue The new value of this relation being a tmod relation.
   * @return The old value of whether this relation expressed a tmod relation.
   */
  public boolean istmod(boolean newValue) {
    boolean oldValue = this.istmod;
    this.istmod = newValue;
    return oldValue;
  }

  /** An optional method, returning the dependency tree this triple was extracted from */
  public Optional asDependencyTree() {
    return Optional.empty();
  }

  /** Return the given relation triple as a flat sentence */
  public List asSentence() {
    PriorityQueue orderedSentence = new FixedPrioritiesPriorityQueue<>();
    double defaultIndex = 0.0;
    for (CoreLabel token : subject) {
      orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
      defaultIndex += 1.0;
    }
    for (CoreLabel token : relation) {
      orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
      defaultIndex += 1.0;
    }
    for (CoreLabel token : object) {
      orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
      defaultIndex += 1.0;
    }
    return orderedSentence.toSortedList();
  }


  /** {@inheritDoc} */
  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (!(o instanceof RelationTriple)) return false;
    RelationTriple that = (RelationTriple) o;
    return object.equals(that.object) && relation.equals(that.relation) && subject.equals(that.subject);
  }

  /** {@inheritDoc} */
  @Override
  public int hashCode() {
    return toString().hashCode();  // Faster than checking CoreLabels
//    int result = subject.hashCode();
//    result = 31 * result + relation.hashCode();
//    result = 31 * result + object.hashCode();
//    return result;
  }

  /** Print a human-readable description of this relation triple, as a tab-separated line. */
  @Override
  public String toString() {
    return String.valueOf(this.confidence) + '\t' + subjectGloss() + '\t' + relationGloss() + '\t' + objectGloss();
  }


  /** Print in the format expected by Gabriel Stanovsky and Ido Dagan, Creating a Large Benchmark for Open
   *  Information Extraction, EMNLP 2016. https://gabrielstanovsky.github.io/assets/papers/emnlp16a/paper.pdf ,
   *  with equivalence classes.
   */
  public String toQaSrlString(CoreMap sentence) {
    String equivalenceClass = subjectHead().index() + "." + relationHead().index() + '.' + objectHead().index();
    return equivalenceClass + '\t' +
        subjectGloss().replace('\t', ' ') + '\t' +
        relationGloss().replace('\t', ' ') + '\t' +
        objectGloss().replace('\t', ' ') + '\t' +
        confidence + '\t' +
        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ");
  }

  /** Print a description of this triple, formatted like the ReVerb outputs. */
  @SuppressWarnings("Duplicates")
  public String toReverbString(String docid, CoreMap sentence) {
    int sentIndex = -1;
    int subjIndex = -1;
    int relationIndex = -1;
    int objIndex = -1;
    int subjIndexEnd = -1;
    int relationIndexEnd = -1;
    int objIndexEnd = -1;
    if (!relation.isEmpty()) {
      sentIndex = relation.get(0).sentIndex();
      relationIndex = relation.get(0).index() - 1;
      relationIndexEnd = relation.get(relation.size() - 1).index();
    }
    if ( ! subject.isEmpty()) {
      if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
      subjIndex = subject.get(0).index() - 1;
      subjIndexEnd = subject.get(subject.size() - 1).index();
    }
    if ( ! object.isEmpty()) {
      if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
      objIndex = object.get(0).index() - 1;
      objIndexEnd = object.get(object.size() - 1).index();
    }
    return (docid == null ? "no_doc_id" : docid) + '\t' +
        sentIndex + '\t' +
        subjectGloss().replace('\t', ' ') + '\t' +
        relationGloss().replace('\t', ' ') + '\t' +
        objectGloss().replace('\t', ' ') + '\t' +
        subjIndex + '\t' +
        subjIndexEnd+ '\t' +
        relationIndex + '\t' +
        relationIndexEnd + '\t' +
        objIndex + '\t' +
        objIndexEnd + '\t' +
        confidenceGloss() + '\t' +
        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + '\t' +
        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + '\t' +
        subjectLemmaGloss().replace('\t', ' ') + '\t' +
        relationLemmaGloss().replace('\t', ' ') + '\t' +
        objectLemmaGloss().replace('\t', ' ');
  }

  @Override
  public int compareTo(RelationTriple o) {
    return Double.compare(this.confidence, o.confidence);
  }

  @SuppressWarnings("unchecked")
  @Override
  public Iterator iterator() {
    return CollectionUtils.concatIterators(subject.iterator(), relation.iterator(), object.iterator());
  }


  /**
   * A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with the tree saved as well.
   */
  public static class WithTree extends RelationTriple {
    public final SemanticGraph sourceTree;

    /**
     * Create a new triple with known values for the subject, relation, and object.
     * For example, "(cats, play with, yarn)"
     *
     * @param subject  The subject of this triple; e.g., "cats".
     * @param relation The relation of this triple; e.g., "play with".
     * @param object   The object of this triple; e.g., "yarn".
     * @param tree     The tree this extraction was created from; we create a deep copy of the tree.
     */
    public WithTree(List subject, List relation, List object, SemanticGraph tree,
                    double confidence) {
      super(subject, relation, object, confidence);
      this.sourceTree = new SemanticGraph(tree);
    }

    /**
     * Create a new triple with known values for the subject, relation, and object,
     * along with their canonical spans (i.e., resolving coreference)
     * For example, "(cats, play with, yarn)"
     */
    public WithTree(List subject,
                          List canonicalSubject,
                          List relation,
                          List object,
                          List canonicalObject,
                          double confidence,
                    SemanticGraph tree) {
      super(subject, canonicalSubject, relation, object, canonicalObject, confidence);
      this.sourceTree = tree;
    }

    /** The head of the subject of this relation triple. */
    @Override
    public CoreLabel subjectHead() {
      if (subject.size() == 1) { return subject.get(0); }
      Span subjectSpan = Span.fromValues(subject.get(0).index(), subject.get(subject.size() - 1).index());
      for (int i = subject.size() - 1; i >= 0; --i) {
        for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(subject.get(i)))) {
          if (edge.getGovernor().index() < subjectSpan.start() || edge.getGovernor().index() >= subjectSpan.end()) {
            return subject.get(i);
          }
        }
      }
      return subject.get(subject.size() - 1);
    }

    /** The head of the object of this relation triple. */
    @Override
    public CoreLabel objectHead() {
      if (object.size() == 1) { return object.get(0); }
      Span objectSpan = Span.fromValues(object.get(0).index(), object.get(object.size() - 1).index());
      for (int i = object.size() - 1; i >= 0; --i) {
        for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(object.get(i)))) {
          if (edge.getGovernor().index() < objectSpan.start() || edge.getGovernor().index() >= objectSpan.end()) {
            return object.get(i);
          }
        }
      }
      return object.get(object.size() - 1);
    }


    /** The head of the relation of this relation triple. */
    @Override
    public CoreLabel relationHead() {
      if (relation.size() == 1) { return relation.get(0); }
      CoreLabel guess = null;
      CoreLabel newGuess = super.relationHead();
      int iters = 0;  // make sure we don't infinite loop...
      while (guess != newGuess && iters < 100) {
        guess = newGuess;
        iters += 1;
        for (SemanticGraphEdge edge : sourceTree.incomingEdgeIterable(new IndexedWord(guess))) {
          // find a node in the relation list which is a governor of the candidate root
          Optional governor = relation.stream().filter(x -> x.index() == edge.getGovernor().index()).findFirst();
          // if we found one, this is the new root. The for loop continues
          if (governor.isPresent()) {
            newGuess = governor.get();
          }
        }
      }
      // Return
      if (iters >= 100) {
        err("Likely cycle in relation tree");
      }
      return guess;
    }

    /** {@inheritDoc} */
    @Override
    public Optional asDependencyTree() {
      return Optional.of(sourceTree);
    }
  }


  /**
   * A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with both the tree and the entity
   * links saved as well.
   */
  public static class WithLink extends WithTree {
    /** The canonical entity link of the subject */
    public final Optional subjectLink;
    /** The canonical entity link of the object */
    public final Optional objectLink;

    /** Create a new relation triple */
    public WithLink(List subject, List canonicalSubject, List relation, List object, List canonicalObject, double confidence,
                    SemanticGraph tree,
                    String subjectLink,
                    String objectLink
                    ) {
      super(subject, canonicalSubject, relation, object, canonicalObject, confidence, tree);
      this.subjectLink = Optional.ofNullable(subjectLink);
      this.objectLink = Optional.ofNullable(objectLink);
    }

    /** {@inheritDoc} */
    @Override
    public String subjectLink() {
      return subjectLink.orElseGet(super::subjectLink);
    }

    /** {@inheritDoc} */
    @Override
    public String objectLink() {
      return objectLink.orElseGet(super::objectLink);
    }
  }

}