opennlp.tools.coref.mention.MentionContext Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opennlp-tools Show documentation
There is a newer version: 2.5.3
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreemnets.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.coref.mention;

import java.util.List;

import opennlp.tools.coref.sim.Context;
import opennlp.tools.coref.sim.GenderEnum;
import opennlp.tools.coref.sim.NumberEnum;
import opennlp.tools.util.Span;

/** 
 * Data structure representation of a mention with additional contextual information. 
 * The contextual information is used in performing coreference resolution.
 */
public class MentionContext extends Context {
 
  /** 
   * The index of first token which is not part of a descriptor.  This is 0 if no descriptor is present. 
   */
  private int nonDescriptorStart;
  
  /** 
   * The Parse of the head constituent of this mention.
   */
  private Parse head;
  
  /** 
   * Sentence-token-based span whose end is the last token of the mention.
   */
  private Span indexSpan;
  
  /** 
   * Position of the NP in the sentence.
   */
  private int nounLocation;

  /** 
   * Position of the NP in the document.
   */
  private  int nounNumber;
  
  /** 
   * Number of noun phrases in the sentence which contains this mention.
   */
  private int maxNounLocation;
  
  /** 
   * Index of the sentence in the document which contains this mention. 
   */
  private int sentenceNumber;
  
  /** 
   * The token preceding this mention's maximal noun phrase.
   */
  private Parse prevToken;
  
  /** 
   * The token following this mention's maximal noun phrase.
   */
  private Parse nextToken;
  
  /** 
   * The token following this mention's basal noun phrase.
   */
  private Parse basalNextToken;

  /** 
   * The parse of the mention's head word. 
   */
  private Parse headToken;
  
  /** 
   * The parse of the first word in the mention. 
   */
  private Parse firstToken;
  
  /** 
   * The text of the first word in the mention.
   */
  private String firstTokenText;
  
  /** 
   * The pos-tag of the first word in the mention. 
   */
  private String firstTokenTag;
  
  /** 
   * The gender assigned to this mention. 
   */
  private GenderEnum gender;
  
  /** 
   * The probability associated with the gender assignment. 
   */
  private double genderProb;
  
  /** 
   * The number assigned to this mention.
   */
  private NumberEnum number;
  
  /** 
   * The probability associated with the number assignment. 
   */
  private double numberProb;

  public MentionContext(Span span, Span headSpan, int entityId, Parse parse, String extentType, String nameType, int mentionIndex, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
    super(span,headSpan,entityId,parse,extentType,nameType,headFinder);
    nounLocation = mentionIndex;
    maxNounLocation = mentionsInSentence;
    nounNumber = mentionIndexInDocument;
    sentenceNumber = sentenceIndex;
    indexSpan = parse.getSpan();
    prevToken = parse.getPreviousToken();
    nextToken = parse.getNextToken();
    head = headFinder.getLastHead(parse);
    List headTokens = head.getTokens();
    tokens = headTokens.toArray(new Parse[headTokens.size()]);
    basalNextToken = head.getNextToken();
    //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+" head="+head);
    nonDescriptorStart = 0;
    initHeads(headFinder.getHeadIndex(head));
    gender = GenderEnum.UNKNOWN;
    this.genderProb = 0d;
    number = NumberEnum.UNKNOWN;
    this.numberProb = 0d;
  }
  /**
   * Constructs context information for the specified mention.
   * 
   * @param mention The mention object on which this object is based.
   * @param mentionIndexInSentence The mention's position in the sentence.
   * @param mentionsInSentence The number of mentions in the sentence.
   * @param mentionIndexInDocument The index of this mention with respect to the document.
   * @param sentenceIndex The index of the sentence which contains this mention.
   * @param headFinder An object which provides head information.
   */
  public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
    this(mention.getSpan(),mention.getHeadSpan(),mention.getId(),mention.getParse(),mention.type,mention.nameType, mentionIndexInSentence,mentionsInSentence,mentionIndexInDocument,sentenceIndex,headFinder);
  }


  /**
   * Constructs context information for the specified mention.
   * 
   * @param mentionParse Mention parse structure for which context is to be constructed.
   * @param mentionIndex mention position in sentence.
   * @param mentionsInSentence Number of mentions in the sentence.
   * @param mentionsInDocument Number of mentions in the document.
   * @param sentenceIndex Sentence number for this mention.
   * @param nameType The named-entity type for this mention.
   * @param headFinder Object which provides head information.
   */
  /*
  public MentionContext(Parse mentionParse, int mentionIndex, int mentionsInSentence, int mentionsInDocument, int sentenceIndex, String nameType, HeadFinder headFinder) {
    nounLocation = mentionIndex;
    maxNounLocation = mentionsInDocument;
    sentenceNumber = sentenceIndex;
    parse = mentionParse;
    indexSpan = mentionParse.getSpan();
    prevToken = mentionParse.getPreviousToken();
    nextToken = mentionParse.getNextToken();
    head = headFinder.getLastHead(mentionParse);
    List headTokens = head.getTokens();
    tokens = (Parse[]) headTokens.toArray(new Parse[headTokens.size()]);
    basalNextToken = head.getNextToken();
    //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+" head="+head);
    indexHeadSpan = head.getSpan();
    nonDescriptorStart = 0;
    initHeads(headFinder.getHeadIndex(head));
    this.neType= nameType;
    if (getHeadTokenTag().startsWith("NN") && !getHeadTokenTag().startsWith("NNP")) {
      //if (headTokenTag.startsWith("NNP") && neType != null) {
      this.synsets = getSynsetSet(this);
    }
    else {
      this.synsets=Collections.EMPTY_SET;
    }
    gender = GenderEnum.UNKNOWN;
    this.genderProb = 0d;
    number = NumberEnum.UNKNOWN;
    this.numberProb = 0d;
  }
  */

  private void initHeads(int headIndex) {
    this.headTokenIndex=headIndex;
    this.headToken = (Parse) tokens[getHeadTokenIndex()];
    this.headTokenText = headToken.toString();
    this.headTokenTag=headToken.getSyntacticType();
    this.firstToken = (Parse) tokens[0];
    this.firstTokenTag = firstToken.getSyntacticType();
    this.firstTokenText=firstToken.toString();
  }

  /**
   * Returns the parse of the head token for this mention.
   * 
   * @return the parse of the head token for this mention.
   */
  public Parse getHeadTokenParse() {
    return headToken;
  }

  public String getHeadText() {
    StringBuffer headText = new StringBuffer();
    for (int hsi = 0; hsi < tokens.length; hsi++) {
      headText.append(" ").append(tokens[hsi].toString());
    }
    return headText.toString().substring(1);
  }

  public Parse getHead() {
    return head;
  }

  public int getNonDescriptorStart() {
    return this.nonDescriptorStart;
  }

  /**
   * Returns a sentence-based token span for this mention.  If this mention consist
   * of the third, fourth, and fifth token, then this span will be 2..4.
   * 
   * @return a sentence-based token span for this mention.
   */
  public Span getIndexSpan() {
    return indexSpan;
  }

  /**
   * Returns the index of the noun phrase for this mention in a sentence.
   * 
   * @return the index of the noun phrase for this mention in a sentence.
   */
  public int getNounPhraseSentenceIndex() {
    return nounLocation;
  }

  /**
   * Returns the index of the noun phrase for this mention in a document.
   * 
   * @return the index of the noun phrase for this mention in a document.
   */
  public int getNounPhraseDocumentIndex() {
    return nounNumber;
  }

  /**
   * Returns the index of the last noun phrase in the sentence containing this mention.
   * This is one less than the number of noun phrases in the sentence which contains this mention.
   * 
   * @return the index of the last noun phrase in the sentence containing this mention.
   */
  public int getMaxNounPhraseSentenceIndex() {
    return maxNounLocation;
  }

  public Parse getNextTokenBasal() {
    return basalNextToken;
  }

  public Parse getPreviousToken() {
    return prevToken;
  }

  public Parse getNextToken() {
    return nextToken;
  }

  /**
   * Returns the index of the sentence which contains this mention.
   * 
   * @return the index of the sentence which contains this mention.
   */
  public int getSentenceNumber() {
    return sentenceNumber;
  }

  /** 
   * Returns the parse for the first token in this mention.
   * 
   * @return The parse for the first token in this mention.
   */
  public Parse getFirstToken() {
    return firstToken;
  }

  /** 
   * Returns the text for the first token of the mention.
   * 
   * @return The text for the first token of the mention.
   */
  public String getFirstTokenText() {
    return firstTokenText;
  }

  /**
   * Returns the pos-tag of the first token of this mention.
   * 
   * @return the pos-tag of the first token of this mention.
   */
  public String getFirstTokenTag() {
    return firstTokenTag;
  }

  /**
   * Returns the parses for the tokens which are contained in this mention.
   * 
   * @return An array of parses, in order, for each token contained in this mention.
   */
  public Parse[] getTokenParses() {
    return (Parse[]) tokens;
  }

  /**
   * Returns the text of this mention.
   * 
   * @return A space-delimited string of the tokens of this mention.
   */
  public String toText() {
    return parse.toString();
  }

  /*
  private static String[] getLemmas(MentionContext xec) {
    //TODO: Try multi-word lemmas first.
    String word = xec.getHeadTokenText();
    return DictionaryFactory.getDictionary().getLemmas(word,"NN");
  }

  private static Set getSynsetSet(MentionContext xec) {
    //System.err.println("getting synsets for mention:"+xec.toText());
    Set synsetSet = new HashSet();
    String[] lemmas = getLemmas(xec);
    for (int li = 0; li < lemmas.length; li++) {
      String[] synsets = DictionaryFactory.getDictionary().getParentSenseKeys(lemmas[li],"NN",0);
      for (int si=0,sn=synsets.length;si