edu.stanford.nlp.dcoref.CorefChain Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
There is a newer version: 4.5.7
Show newest version
//
// StanfordCoreNLP -- a suite of NLP tools
// Copyright (c) 2009-2010 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//

package edu.stanford.nlp.dcoref;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import edu.stanford.nlp.dcoref.Dictionaries.Animacy;
import edu.stanford.nlp.dcoref.Dictionaries.Gender;
import edu.stanford.nlp.dcoref.Dictionaries.MentionType;
import edu.stanford.nlp.dcoref.Dictionaries.Number;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.IntTuple;

/**
 * Output of (deterministic) coref system.  Each CorefChain represents a set
 * of mentions in the text which should all correspond to the same actual
 * entity.  There is a representative mention, which stores the best
 * mention of an entity, and then there is a List of all mentions
 * that are coreferent with that mention. The mentionMap maps from pairs of
 * a sentence number and a head word index to a CorefMention. The chainID is
 * an arbitrary integer for the chain number.
 *
 * @author Heeyoung Lee
 */
public class CorefChain implements Serializable {

  private final int chainID;
  private final List mentions;
  private final Map> mentionMap;

  /** The most representative mention in this cluster */
  private final CorefMention representative;

  @Override
  public boolean equals(Object aThat) {
    if (this == aThat)
      return true;
    if (!(aThat instanceof CorefChain))
      return false;
    CorefChain that = (CorefChain) aThat;
    if (chainID != that.chainID)
      return false;
    if (!mentions.equals(that.mentions))
      return false;
    if (representative == null && that.representative == null) {
      return true;
    }
    if (representative == null || that.representative == null ||
        ! representative.equals(that.representative)) {
      return false;
    }
    // mentionMap is another view of mentions, so no need to compare
    // that once we've compared mentions
    return true;
  }

  @Override
  public int hashCode() {
    return mentions.hashCode();
  }

  /** get List of CorefMentions */
  public List getMentionsInTextualOrder() { return mentions; }

  /** get CorefMentions by position (sentence number, headIndex) Can be multiple mentions sharing headword */
  public Set getMentionsWithSameHead(IntPair position) { return mentionMap.get(position); }

  /** get CorefMention by position */
  public Set getMentionsWithSameHead(int sentenceNumber, int headIndex) {
    return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex));
  }

  public Map> getMentionMap() { return mentionMap; }

  /** Return the most representative mention in the chain.
   *  Proper mention and a mention with more pre-modifiers are preferred.
   */
  public CorefMention getRepresentativeMention() { return representative; }
  public int getChainID() { return chainID; }

  /** Mention for coref output.  This is one instance of the entity
   * referred to by a given CorefChain.
   */
  public static class CorefMention implements Serializable {
    public final MentionType mentionType;
    public final Number number;
    public final Gender gender;
    public final Animacy animacy;

    /**
     * Starting word number, indexed from 1
     */
    public final int startIndex;
    /**
     * One past the end word number, indexed from 1
     */
    public final int endIndex;
    /**
     * Head word of the mention
     */
    public final int headIndex;
    public final int corefClusterID;
    public final int mentionID;
    /**
     * Sentence number in the document containing this mention,
     * indexed from 1.
     */
    public final int sentNum;
    /**
     * Position is a binary tuple of (sentence number, mention number
     * in that sentence).  This is used for indexing by mention.
     */
    public final IntTuple position;
    public final String mentionSpan;

    /** This constructor is used to recreate a CorefMention following serialization. */
    public CorefMention(MentionType mentionType,
            Number number,
            Gender gender,
            Animacy animacy,
            int startIndex,
            int endIndex,
            int headIndex,
            int corefClusterID,
            int mentionID,
            int sentNum,
            IntTuple position,
            String mentionSpan) {
      this.mentionType = mentionType;
      this.number = number;
      this.gender = gender;
      this.animacy = animacy;
      this.startIndex = startIndex;
      this.endIndex = endIndex;
      this.headIndex = headIndex;
      this.corefClusterID = corefClusterID;
      this.mentionID = mentionID;
      this.sentNum = sentNum;
      this.position = position;
      this.mentionSpan = mentionSpan;
    }

    /** This constructor builds the external CorefMention class from the internal Mention. */
    public CorefMention(Mention m, IntTuple pos){
      mentionType = m.mentionType;
      number = m.number;
      gender = m.gender;
      animacy = m.animacy;
      startIndex = m.startIndex + 1;
      endIndex = m.endIndex + 1;
      headIndex = m.headIndex + 1;
      corefClusterID = m.corefClusterID;
      sentNum = m.sentNum + 1;
      mentionID = m.mentionID;
      mentionSpan = m.spanToString();

      // index starts from 1
      position = new IntTuple(2);
      position.set(0, pos.get(0)+1);
      position.set(1, pos.get(1)+1);

      m.headWord.set(CorefCoreAnnotations.CorefClusterIdAnnotation.class, corefClusterID);
    }

    @Override
    public boolean equals(Object aThat) {
      if (this == aThat)
        return true;
      if (!(aThat instanceof CorefMention))
        return false;
      CorefMention that = (CorefMention) aThat;
      if (mentionType != that.mentionType)
        return false;
      if (number != that.number)
        return false;
      if (gender != that.gender)
        return false;
      if (animacy != that.animacy)
        return false;
      if (startIndex != that.startIndex)
        return false;
      if (endIndex != that.endIndex)
        return false;
      if (headIndex != that.headIndex)
        return false;
      if (corefClusterID != that.corefClusterID)
        return false;
      if (mentionID != that.mentionID)
        return false;
      if (sentNum != that.sentNum)
        return false;
      if (!position.equals(that.position))
        return false;
      // we ignore MentionSpan as it is constructed from the tokens
      // the mention is a span of, so if we know those spans are the
      // same, we should be able to ignore the actual text
      return true;
    }

    @Override
    public int hashCode() {
      return position.hashCode();
    }

    @Override
    public String toString() {
      return '"' + mentionSpan + "\" in sentence " + sentNum;
      //      return "(sentence:" + sentNum + ", startIndex:" + startIndex + "-endIndex:" + endIndex + ")";
    }

    private boolean moreRepresentativeThan(CorefMention m) {
      if (m==null) return true;
      if (mentionType != m.mentionType) {
        return (mentionType == MentionType.PROPER)
            || (mentionType == MentionType.NOMINAL && m.mentionType == MentionType.PRONOMINAL);
      } else {
        // First, check length
        if (headIndex - startIndex > m.headIndex - m.startIndex) return true;
        if (headIndex - startIndex < m.headIndex - m.startIndex) return false;
        if (endIndex - startIndex > m.endIndex - m.startIndex) return true;
        if (endIndex - startIndex < m.endIndex - m.startIndex) return false;
        // Now check relative position
        if (sentNum < m.sentNum) return true;
        if (sentNum > m.sentNum) return false;
        if (headIndex < m.headIndex) return true;
        if (headIndex > m.headIndex) return false;
        if (startIndex < m.startIndex) return true;
        if (startIndex > m.startIndex) return false;
        // At this point they're equal...
        return false;
      }
    }

    private static final long serialVersionUID = 3657691243504173L;

  } // end static class CorefMention


  protected static class CorefMentionComparator implements Comparator {
    @Override
    public int compare(CorefMention m1, CorefMention m2) {
      if(m1.sentNum < m2.sentNum) return -1;
      else if(m1.sentNum > m2.sentNum) return 1;
      else{
        if(m1.startIndex < m2.startIndex) return -1;
        else if(m1.startIndex > m2.startIndex) return 1;
        else {
          if(m1.endIndex > m2.endIndex) return -1;
          else if(m1.endIndex < m2.endIndex) return 1;
          else return 0;
        }
      }
    }
  }

  protected static class MentionComparator implements Comparator {
    @Override
    public int compare(Mention m1, Mention m2) {
      if(m1.sentNum < m2.sentNum) return -1;
      else if(m1.sentNum > m2.sentNum) return 1;
      else{
        if(m1.startIndex < m2.startIndex) return -1;
        else if(m1.startIndex > m2.startIndex) return 1;
        else {
          if(m1.endIndex > m2.endIndex) return -1;
          else if(m1.endIndex < m2.endIndex) return 1;
          else return 0;
        }
      }
    }
  }

  /**
   * Delete a mention from this coreference chain.
   * @param m The mention to delete.
   */
  public void deleteMention(CorefMention m) {
    this.mentions.remove(m);
    IntPair position = new IntPair(m.sentNum, m.headIndex);
    this.mentionMap.remove(position);
  }

  public CorefChain(CorefCluster c, Map positions){
    chainID = c.clusterID;
    // Collect mentions
    mentions = new ArrayList<>();
    mentionMap = Generics.newHashMap();
    CorefMention represents = null;
    for (Mention m : c.getCorefMentions()) {
      CorefMention men = new CorefMention(m, positions.get(m));
      mentions.add(men);
    }
    Collections.sort(mentions, new CorefMentionComparator());
    // Find representative mention
    for (CorefMention men : mentions) {
      IntPair position = new IntPair(men.sentNum, men.headIndex);
      if (!mentionMap.containsKey(position)) mentionMap.put(position, Generics.newHashSet());
      mentionMap.get(position).add(men);
      if (men.moreRepresentativeThan(represents)) {
        represents = men;
      }
    }
    representative = represents;
  }

  /** Constructor required by CustomAnnotationSerializer */
  public CorefChain(int cid,
                    Map> mentionMap,
                    CorefMention representative) {
    this.chainID = cid;
    this.representative = representative;
    this.mentionMap = mentionMap;
    this.mentions = new ArrayList<>();
    for (Set ms: mentionMap.values()) {
      for (CorefMention m: ms) {
        this.mentions.add(m);
      }
    }
    Collections.sort(mentions, new CorefMentionComparator());
  }

  public String toString(){
    return "CHAIN" + this.chainID + '-' + mentions;
  }

  private static final long serialVersionUID = 3657691243506528L;

}