All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.IndexedWord Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ling;

import java.util.Set;

import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;

/**
 * This class is mainly for use with RTE in terms of the methods it provides,
 * but on a more general level, it provides a {@link CoreLabel} that uses its
 * DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement
 * Comparable/compareTo, hashCode, and equals.  This means no other annotations,
 * including the identity of the word, are taken into account when using these
 * methods.
 * 
* The actual implementation is to wrap a CoreLabel. * This avoids breaking the equals() and * hashCode() contract and also avoids expensive copying * when used to represent the same data as the original * CoreLabel. * * @author rafferty * */ public class IndexedWord implements AbstractCoreLabel, Comparable { private static final long serialVersionUID = 3739633991145239829L; /** * The identifier that points to no word. */ public static final IndexedWord NO_WORD = new IndexedWord(null, -1, -1); private final CoreLabel label; /** * Default constructor; uses {@link CoreLabel} default constructor */ public IndexedWord() { label = new CoreLabel(); } /** * Copy Constructor - relies on {@link CoreLabel} copy constructor * It will set the value, and if the word is not set otherwise, set * the word to the value. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(Label w) { if (w instanceof CoreLabel) { this.label = (CoreLabel) w; } else { label = new CoreLabel(w); if (label.word() == null) { label.setWord(label.value()); } } } /** * Construct an IndexedWord from a CoreLabel just as for a CoreMap. * Implementation note: this is a the same as the constructor * that takes a CoreMap, but is needed to ensure unique most specific * type inference for selecting a constructor at compile-time. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(CoreLabel w) { label = w; } /** * Constructor for setting docID, sentenceIndex, and * index without any other annotations. * * @param docID The document ID (arbitrary string) * @param sentenceIndex The sentence number in the document (normally 0-based) * @param index The index of the word in the sentence (normally 0-based) */ public IndexedWord(String docID, int sentenceIndex, int index) { label = new CoreLabel(); label.set(CoreAnnotations.DocIDAnnotation.class, docID); label.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex); label.set(CoreAnnotations.IndexAnnotation.class, index); } public IndexedWord makeCopy(int count) { CoreLabel labelCopy = new CoreLabel(label); IndexedWord copy = new IndexedWord(labelCopy); copy.setCopyCount(count); return copy; } /** * TODO: would be nice to get rid of this. Only used in two places in RTE. */ public CoreLabel backingLabel() { return label; } public VALUE get(Class> key) { return label.get(key); } public boolean has(Class> key) { return label.has(key); } public boolean containsKey(Class> key) { return label.containsKey(key); } public VALUE set(Class> key, VALUE value) { return label.set(key, value); } public > String getString(Class key) { return label.getString(key); } public VALUE remove(Class> key) { return label.remove(key); } public Set> keySet() { return label.keySet(); } public int size() { return label.size(); } @Override public String value() { return label.value(); } @Override public void setValue(String value) { label.setValue(value); } @Override public String tag() { return label.tag(); } @Override public void setTag(String tag) { label.setTag(tag); } @Override public String word() { return label.word(); } @Override public void setWord(String word) { label.setWord(word); } @Override public String lemma() { return label.lemma(); } @Override public void setLemma(String lemma) { label.setLemma(lemma); } @Override public String ner() { return label.ner(); } @Override public void setNER(String ner) { label.setNER(ner); } @Override public String docID() { return label.docID(); } @Override public void setDocID(String docID) { label.setDocID(docID); } @Override public int index() { return label.index(); } @Override public void setIndex(int index) { label.setIndex(index); } @Override public int sentIndex() { return label.sentIndex(); } @Override public void setSentIndex(int sentIndex) { label.setSentIndex(sentIndex); } @Override public String originalText() { return label.originalText(); } @Override public void setOriginalText(String originalText) { label.setOriginalText(originalText); } @Override public int beginPosition() { return label.beginPosition(); } @Override public int endPosition() { return label.endPosition(); } @Override public void setBeginPosition(int beginPos) { label.setBeginPosition(beginPos); } @Override public void setEndPosition(int endPos) { label.setEndPosition(endPos); } public int copyCount() { return label.copyCount(); } public void setCopyCount(int count) { label.setCopyCount(count); } public String toPrimes() { int copy = label.copyCount(); return StringUtils.repeat('\'', copy); } /** * This .equals is dependent only on docID, sentenceIndex, and index. * It doesn't consider the actual word value, but assumes that it is * validly represented by token position. * All IndexedWords that lack these fields will be regarded as equal. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof IndexedWord)) return false; //now compare on appropriate keys final IndexedWord otherWord = (IndexedWord) o; Integer myInd = get(CoreAnnotations.IndexAnnotation.class); Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class); if (myInd == null) { if (otherInd != null) return false; } else if ( ! myInd.equals(otherInd)) { return false; } Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class); Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class); if (mySentInd == null) { if (otherSentInd != null) return false; } else if ( ! mySentInd.equals(otherSentInd)) { return false; } String myDocID = getString(CoreAnnotations.DocIDAnnotation.class); String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class); if (myDocID == null) { if (otherDocID != null) return false; } else if ( ! myDocID.equals(otherDocID)) { return false; } if (copyCount() != otherWord.copyCount()) { return false; } return true; } /** * This hashCode uses only the docID, sentenceIndex, and index. * See compareTo for more info. */ @Override public int hashCode() { boolean sensible = false; int result = 0; if (get(CoreAnnotations.DocIDAnnotation.class) != null) { result = get(CoreAnnotations.DocIDAnnotation.class).hashCode(); sensible = true; } if (has(CoreAnnotations.SentenceIndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.SentenceIndexAnnotation.class).hashCode(); sensible = true; } if (has(CoreAnnotations.IndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.IndexAnnotation.class).hashCode(); sensible = true; } if ( ! sensible) { System.err.println("WARNING!!! You have hashed an IndexedWord with no docID, sentIndex or wordIndex. You will almost certainly lose"); } return result; } /** * NOTE: This compareTo is based on and made to be compatible with the one * from IndexedFeatureLabel. You must have a DocIDAnnotation, * SentenceIndexAnnotation, and IndexAnnotation for this to make sense and * be guaranteed to work properly. Currently, it won't error out and will * try to return something sensible if these are not defined, but that really * isn't proper usage! * * This compareTo method is based not by value elements like the word(), * but on passage position. It puts NO_WORD elements first, and then orders * by document, sentence, and word index. If these do not differ, it * returns equal. * * @param w The IndexedWord to compare with * @return Whether this is less than w or not in the ordering */ public int compareTo(IndexedWord w) { if (this.equals(IndexedWord.NO_WORD)) { if (w.equals(IndexedWord.NO_WORD)) { return 0; } else { return -1; } } if (w.equals(IndexedWord.NO_WORD)) { return 1; } String docID = this.getString(CoreAnnotations.DocIDAnnotation.class); int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class)); if (docComp != 0) return docComp; int sentComp = sentIndex() - w.sentIndex(); if (sentComp != 0) return sentComp; int indexComp = index() - w.index(); if (indexComp != 0) return indexComp; return copyCount() - w.copyCount(); } /** * Returns the value-tag of this label. */ @Override public String toString() { return label.toString(CoreLabel.OutputFormat.VALUE_TAG); } public String toString(CoreLabel.OutputFormat format) { return label.toString(format); } /** * {@inheritDoc} */ @Override public void setFromString(String labelStr) { throw new UnsupportedOperationException("Cannot set from string"); } public static LabelFactory factory() { return new LabelFactory() { public Label newLabel(String labelStr) { CoreLabel label = new CoreLabel(); label.setValue(labelStr); return new IndexedWord(label); } public Label newLabel(String labelStr, int options) { return newLabel(labelStr); } public Label newLabel(Label oldLabel) { return new IndexedWord(oldLabel); } public Label newLabelFromString(String encodedLabelStr) { throw new UnsupportedOperationException("This code branch left blank" + " because we do not understand what this method should do."); } }; } /** * {@inheritDoc} */ @Override public LabelFactory labelFactory() { return IndexedWord.factory(); } }