edu.stanford.nlp.ling.IndexedWord Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ling;

import java.util.Set;

import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;

/**
 * This class is mainly for use with RTE in terms of the methods it provides,
 * but on a more general level, it provides a {@link CoreLabel} that uses its
 * DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement
 * Comparable/compareTo, hashCode, and equals.  This means no other annotations,
 * including the identity of the word, are taken into account when using these
 * methods.
 * 

 * The actual implementation is to wrap a CoreLabel.
 * This avoids breaking the equals() and
 * hashCode() contract and also avoids expensive copying
 * when used to represent the same data as the original
 * CoreLabel.
 *
 * @author rafferty
 *
 */
public class IndexedWord implements AbstractCoreLabel, Comparable {

  private static final long serialVersionUID = 3739633991145239829L;

  /**
   * The identifier that points to no word.
   */
  public static final IndexedWord NO_WORD = new IndexedWord(null, -1, -1);

  private final CoreLabel label;

  /**
   * Default constructor; uses {@link CoreLabel} default constructor
   */
  public IndexedWord() {
    label = new CoreLabel();
  }


  /**
   * Copy Constructor - relies on {@link CoreLabel} copy constructor
   * It will set the value, and if the word is not set otherwise, set
   * the word to the value.
   *
   * @param w A Label to initialize this IndexedWord from
   */
  public IndexedWord(Label w) {
    if (w instanceof CoreLabel) {
      this.label = (CoreLabel) w;
    } else {
      label = new CoreLabel(w);
      if (label.word() == null) {
        label.setWord(label.value());
      }
    }
  }

  /**
   * Construct an IndexedWord from a CoreLabel just as for a CoreMap.
   * Implementation note: this is a the same as the constructor
   * that takes a CoreMap, but is needed to ensure unique most specific
   * type inference for selecting a constructor at compile-time.
   *
   * @param w A Label to initialize this IndexedWord from
   */
  public IndexedWord(CoreLabel w) {
    label = w;
  }

  /**
   * Constructor for setting docID, sentenceIndex, and
   * index without any other annotations.
   *
   * @param docID The document ID (arbitrary string)
   * @param sentenceIndex The sentence number in the document (normally 0-based)
   * @param index The index of the word in the sentence (normally 0-based)
   */
  public IndexedWord(String docID, int sentenceIndex, int index) {
    label = new CoreLabel();
    label.set(CoreAnnotations.DocIDAnnotation.class, docID);
    label.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);
    label.set(CoreAnnotations.IndexAnnotation.class, index);
  }

  public IndexedWord makeCopy(int count) {
    CoreLabel labelCopy = new CoreLabel(label);
    IndexedWord copy = new IndexedWord(labelCopy);
    copy.setCopyCount(count);
    return copy;
  }

  /**
   * TODO: would be nice to get rid of this.  Only used in two places in RTE.  
   */
  public CoreLabel backingLabel() { return label; }

  public  VALUE get(Class> key) {
    return label.get(key);
  }

  public  boolean has(Class> key) {
    return label.has(key);
  }

  public  boolean containsKey(Class> key) {
    return label.containsKey(key);
  }

  public  VALUE set(Class> key, VALUE value) {
    return label.set(key, value);
  }

  public > String getString(Class key) {
    return label.getString(key);
  }

  public  VALUE remove(Class> key) {
    return label.remove(key);
  }

  public Set> keySet() {
    return label.keySet();
  }

  public int size() {
    return label.size();
  }

  @Override
  public String value() {
    return label.value();
  }

  @Override
  public void setValue(String value) {
    label.setValue(value);
  }

  @Override
  public String tag() {
    return label.tag();
  }

  @Override
  public void setTag(String tag) {
    label.setTag(tag);
  }

  @Override
  public String word() {
    return label.word();
  }

  @Override
  public void setWord(String word) {
    label.setWord(word);
  }

  @Override
  public String lemma() {
    return label.lemma();
  }

  @Override
  public void setLemma(String lemma) {
    label.setLemma(lemma);
  }

  @Override
  public String ner() {
    return label.ner();
  }

  @Override
  public void setNER(String ner) {
    label.setNER(ner);
  }

  @Override
  public String docID() {
    return label.docID();
  }

  @Override
  public void setDocID(String docID) {
    label.setDocID(docID);
  }

  @Override
  public int index() {
    return label.index();
  }

  @Override
  public void setIndex(int index) {
    label.setIndex(index);
  }

  @Override
  public int sentIndex() {
    return label.sentIndex();
  }

  @Override
  public void setSentIndex(int sentIndex) {
    label.setSentIndex(sentIndex);
  }

  @Override
  public String originalText() {
    return label.originalText();
  }

  @Override
  public void setOriginalText(String originalText) {
    label.setOriginalText(originalText);
  }

  @Override
  public int beginPosition() {
    return label.beginPosition();
  }

  @Override
  public int endPosition() {
    return label.endPosition();
  }

  @Override
  public void setBeginPosition(int beginPos) {
    label.setBeginPosition(beginPos);
  }

  @Override
  public void setEndPosition(int endPos) {
    label.setEndPosition(endPos);
  }

  public int copyCount() {
    return label.copyCount();
  }

  public void setCopyCount(int count) {
    label.setCopyCount(count);
  }

  public String toPrimes() {
    int copy = label.copyCount();
    return StringUtils.repeat('\'', copy);    
  }

  /**
   * This .equals is dependent only on docID, sentenceIndex, and index.
   * It doesn't consider the actual word value, but assumes that it is
   * validly represented by token position.
   * All IndexedWords that lack these fields will be regarded as equal.
   */
  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (!(o instanceof IndexedWord)) return false;

    //now compare on appropriate keys
    final IndexedWord otherWord = (IndexedWord) o;
    Integer myInd = get(CoreAnnotations.IndexAnnotation.class);
    Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class);
    if (myInd == null) {
      if (otherInd != null)
      return false;
    } else if ( ! myInd.equals(otherInd)) {
      return false;
    }
    Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class);
    Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class);
    if (mySentInd == null) {
      if (otherSentInd != null)
      return false;
    } else if ( ! mySentInd.equals(otherSentInd)) {
      return false;
    }
    String myDocID = getString(CoreAnnotations.DocIDAnnotation.class);
    String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class);
    if (myDocID == null) {
      if (otherDocID != null)
      return false;
    } else if ( ! myDocID.equals(otherDocID)) {
      return false;
    }
    if (copyCount() != otherWord.copyCount()) {
      return false;
    }
    return true;
  }


  /**
   * This hashCode uses only the docID, sentenceIndex, and index.
   * See compareTo for more info.
   */
  @Override
  public int hashCode() {
    boolean sensible = false;
    int result = 0;
    if (get(CoreAnnotations.DocIDAnnotation.class) != null) {
      result = get(CoreAnnotations.DocIDAnnotation.class).hashCode();
      sensible = true;
    }
    if (has(CoreAnnotations.SentenceIndexAnnotation.class)) {
      result = 29 * result + get(CoreAnnotations.SentenceIndexAnnotation.class).hashCode();
      sensible = true;
    }
    if (has(CoreAnnotations.IndexAnnotation.class)) {
      result = 29 * result + get(CoreAnnotations.IndexAnnotation.class).hashCode();
      sensible = true;
    }
    if ( ! sensible) {
      System.err.println("WARNING!!!  You have hashed an IndexedWord with no docID, sentIndex or wordIndex. You will almost certainly lose");
    }
    return result;
  }

  /**
   * NOTE: This compareTo is based on and made to be compatible with the one
   * from IndexedFeatureLabel.  You must have a DocIDAnnotation,
   * SentenceIndexAnnotation, and IndexAnnotation for this to make sense and
   * be guaranteed to work properly. Currently, it won't error out and will
   * try to return something sensible if these are not defined, but that really
   * isn't proper usage!
   *
   * This compareTo method is based not by value elements like the word(),
   *  but on passage position. It puts NO_WORD elements first, and then orders
   *  by document, sentence, and word index.  If these do not differ, it
   *  returns equal.
   *
   *  @param w The IndexedWord to compare with
   *  @return Whether this is less than w or not in the ordering
   */
  public int compareTo(IndexedWord w) {
    if (this.equals(IndexedWord.NO_WORD)) {
      if (w.equals(IndexedWord.NO_WORD)) {
        return 0;
      } else {
        return -1;
      }
    }
    if (w.equals(IndexedWord.NO_WORD)) {
      return 1;
    }

    String docID = this.getString(CoreAnnotations.DocIDAnnotation.class);
    int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class));
    if (docComp != 0) return docComp;

    int sentComp = sentIndex() - w.sentIndex();
    if (sentComp != 0) return sentComp;

    int indexComp = index() - w.index();
    if (indexComp != 0) return indexComp;

    return copyCount() - w.copyCount();
  }

  /**
   * Returns the value-tag of this label.
   */
  @Override
  public String toString() {
    return label.toString(CoreLabel.OutputFormat.VALUE_TAG);
  }

  public String toString(CoreLabel.OutputFormat format) {
    return label.toString(format);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setFromString(String labelStr) {
    throw new UnsupportedOperationException("Cannot set from string");
  }


  public static LabelFactory factory() {
    return new LabelFactory() {

      public Label newLabel(String labelStr) {
        CoreLabel label = new CoreLabel();
        label.setValue(labelStr);
        return new IndexedWord(label);
      }

      public Label newLabel(String labelStr, int options) {
        return newLabel(labelStr);
      }

      public Label newLabel(Label oldLabel) {
        return new IndexedWord(oldLabel);
      }

      public Label newLabelFromString(String encodedLabelStr) {
        throw new UnsupportedOperationException("This code branch left blank" +
        " because we do not understand what this method should do.");
      }
    };
  }
  /**
   * {@inheritDoc}
   */
  @Override
  public LabelFactory labelFactory() {
    return IndexedWord.factory();
  }
}