All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.machinereading.structure.EntityMention Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ie.machinereading.structure;

import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;

/**
 * Each entity mention is described by a type (possibly subtype) and a span of text
 *
 * @author Andrey Gusev
 * @author Mihai
 */
public class EntityMention extends ExtractionObject {

  private static final long serialVersionUID = -2745903102654191527L;

  /** Mention type, if available, e.g., nominal */
  private final String mentionType;
  private String corefID = "-1";

  /**
   * Offsets the head span, e.g., "George Bush" in the extent "the president George Bush"
   * The offsets are relative to the sentence containing this mention
   */
  private Span headTokenSpan;

  /**
   * Position of the syntactic head word of this mention, e.g., "Bush" for the head span "George Bush"
   * The offset is relative the sentence containing this mention
   * Note: use headTokenSpan when sequence tagging entity mentions not this.
   *       This is meant to be used only for event/relation feature extraction!
   */
  private int syntacticHeadTokenPosition;

  private String normalizedName;

  public EntityMention(String objectId,
      CoreMap sentence,
      Span extentSpan,
      Span headSpan,
      String type,
      String subtype,
      String mentionType) {
    super(objectId, sentence, extentSpan, type, subtype);
    this.mentionType = (mentionType != null ? mentionType.intern() : null);
    this.headTokenSpan = headSpan;
    this.syntacticHeadTokenPosition = -1;
    this.normalizedName = null;
  }

  public String getCorefID(){
    return corefID;
  }

  public void setCorefID(String id) {
    this.corefID = id;
  }
  public String getMentionType() { return mentionType; }

  public Span getHead() { return headTokenSpan; }

  public int getHeadTokenStart() {
    return headTokenSpan.start();
  }

  public int getHeadTokenEnd() {
    return headTokenSpan.end();
  }

  public void setHeadTokenSpan(Span s) {
    headTokenSpan = s;
  }

  public void setHeadTokenPosition(int i) {
    this.syntacticHeadTokenPosition = i;
  }

  public int getSyntacticHeadTokenPosition() {
    return this.syntacticHeadTokenPosition;
  }

  public CoreLabel getSyntacticHeadToken() {
    List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    return tokens.get(syntacticHeadTokenPosition);
  }

  public Tree getSyntacticHeadTree() {
    Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    return tree.getLeaves().get(syntacticHeadTokenPosition);
  }

  public String getNormalizedName() { return normalizedName; }
  public void setNormalizedName(String n) { normalizedName = n; }

  /*
  @Override
  public boolean equals(Object other) {
    if(! (other instanceof EntityMention)) return false;
    ExtractionObject o = (ExtractionObject) other;
    if(o.objectId.equals(objectId) && o.sentence == sentence) return true;
    return false;
  }
   */

  @Override
  public boolean equals(Object other) {
    if(! (other instanceof EntityMention)) return false;
    EntityMention otherEnt = (EntityMention) other;
    return equals(otherEnt, true);
  }

  public boolean headIncludes(EntityMention otherEnt, boolean useSubType) {
    return otherEnt.getSyntacticHeadTokenPosition() >= getHeadTokenStart() &&
            otherEnt.getSyntacticHeadTokenPosition() < getHeadTokenEnd() &&
            ((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
            ( ! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)));
  }

  public boolean equals(EntityMention otherEnt, boolean useSubType) {
    //
    // two mentions are equal if they are over the same sentence,
    // have the same head span, the same type/subtype, and the same text.
    // We need this for scoring NER, and in various places in KBP
    //
    if(sentence.get(CoreAnnotations.TextAnnotation.class).equals(otherEnt.sentence.get(CoreAnnotations.TextAnnotation.class)) && textEquals(otherEnt) && labelEquals(otherEnt, useSubType)){
      return true;
    }
    /*
  	if(((headTokenSpan != null && headTokenSpan.equals(otherEnt.headTokenSpan)) ||
        (extentTokenSpan != null && extentTokenSpan.equals(otherEnt.extentTokenSpan))) &&
        ((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
        (! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null))) &&
        AnnotationUtils.getTextContent(sentence, headTokenSpan).equals(AnnotationUtils.getTextContent(otherEnt.getSentence(), otherEnt.headTokenSpan))){
      return true;
    }
     */
    return false;
  }

  /**
   * Compares the labels of the two mentions
   * @param otherEnt
   * @param useSubType
   */
  public boolean labelEquals(EntityMention otherEnt, boolean useSubType) {
    if(((type != null && otherEnt.type != null && type.equals(otherEnt.type)) || (type == null && otherEnt.type == null)) &&
        (! useSubType || ((subType != null && otherEnt.subType != null && subType.equals(otherEnt.subType)) || (subType == null && otherEnt.subType == null)))){
      return true;
    }
    return false;
  }

  /**
   * Compares the text spans of the two entity mentions.
   *
   * @param otherEnt
   */
  public boolean textEquals(EntityMention otherEnt) {
    //
    // we attempt three comparisons:
    // a) if syntactic heads are defined we consider two texts similar if they have the same syntactic head
    //    (this is necessary because in NFL we compare entities with different spans but same heads, e.g. "49ers" vs "San Francisco 49ers"
    // b) if head spans are defined we consider two texts similar if they have the same head span
    // c) if extent spans are defined we consider two texts similar if they have the same extent span
    //
    if(syntacticHeadTokenPosition != -1 && otherEnt.syntacticHeadTokenPosition != -1){
      if(syntacticHeadTokenPosition == otherEnt.syntacticHeadTokenPosition) return true;
      return false;
    }

    if(headTokenSpan != null && otherEnt.headTokenSpan != null){
      if(headTokenSpan.equals(otherEnt.headTokenSpan)) return true;
      return false;
    }

    if(extentTokenSpan != null && otherEnt.extentTokenSpan != null){
      if(extentTokenSpan.equals(otherEnt.extentTokenSpan)) return true;
      return false;
    }

    if (!this.getExtentString().equals(otherEnt.getExtentString())) {
      return false;
    }

    return false;
  }

  /**
   * Get the text value of this entity.
   * The headTokenSpan MUST be set before calling this method!
   */
  public String getValue() {
    List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    // int lastEnd = -1;
    StringBuilder sb = new StringBuilder();
    for (int i = headTokenSpan.start(); i < headTokenSpan.end(); i ++){
      CoreLabel token = tokens.get(i);

      // we are not guaranteed to have CharacterOffsets so we can't use them...
      /*
    	Integer start = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    	Integer end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);

    	if (start != null && end != null) {
    	  if (lastEnd != -1 && !start.equals(lastEnd)) {
    	    sb.append(StringUtils.repeat(" ", start - lastEnd));
    	    lastEnd = end;
    	  }
    	} else {
    	  if (lastEnd != -1) sb.append(" ");
    	  lastEnd = 0;
    	}
       */
      if(i > headTokenSpan.start()) sb.append(" ");

      sb.append(token.word());

    }

    return sb.toString();
  }

  
  @Override
  public String toString() {
    return "EntityMention [type=" + type 
    + (subType != null ? ", subType=" + subType : "")
    + (mentionType != null ? ", mentionType=" + mentionType : "")
    + (objectId != null ? ", objectId=" + objectId : "") 
    + (headTokenSpan != null ? ", hstart=" + headTokenSpan.start() + ", hend=" + headTokenSpan.end() : "")
    + (extentTokenSpan != null ? ", estart=" + extentTokenSpan.start() + ", eend=" + extentTokenSpan.end() : "")
    + (syntacticHeadTokenPosition >= 0 ? ", headPosition=" + syntacticHeadTokenPosition : "")
    + (headTokenSpan != null ? ", value=\"" + getValue() + "\"" : "") 
    + (normalizedName != null ? ", normalizedName=\"" + normalizedName + "\"" : "")
    + ", corefID=" + corefID
    + (typeProbabilities != null ? ", probs=" + probsToString() : "")
    + "]";
  }

  @Override
  public int hashCode() {
    int result = mentionType != null ? mentionType.hashCode() : 0;
    result = 31 * result + (headTokenSpan != null ? headTokenSpan.hashCode() : 0);
    result = 31 * result + (normalizedName != null ? normalizedName.hashCode() : 0);
    result = 31 * result + (extentTokenSpan != null ? extentTokenSpan.hashCode() : 0);
    return result;
  }

  static class CompByHead implements Comparator {
    public int compare(EntityMention o1, EntityMention o2) {
      if(o1.getHeadTokenStart() < o2.getHeadTokenStart()){
        return -1;
      } else if(o1.getHeadTokenStart() > o2.getHeadTokenStart()){
        return 1;
      } else if(o1.getHeadTokenEnd() < o2.getHeadTokenEnd()) {
        return -1;
      } else if(o1.getHeadTokenEnd() > o2.getHeadTokenEnd()) {
        return 1;
      } else {
        return 0;
      }
    }
  }

  public static void sortByHeadSpan(List mentions) {
    Collections.sort(mentions, new CompByHead());
  }

  private static int MENTION_COUNTER = 0;

  /**
   * Creates a new unique id for an entity mention
   * @return the new id
   */
  public static synchronized String makeUniqueId() {
    MENTION_COUNTER ++;
    return "EntityMention-" + MENTION_COUNTER;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy