All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.WordLemmaTag Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ling;

import edu.stanford.nlp.process.Morphology;

/**
 * A WordLemmaTag corresponds to a pair of a tagged (e.g., for part of speech)
 * word and its lemma. WordLemmaTag is implemented with String-valued word,
 * lemma and tag.
 * It implements the Label interface; the {@code value()} method for that
 * interface corresponds to the word of the WordLemmaTag.
 * 

* The equality relation for WordLemmaTag is defined as identity of * word, lemma and tag. * * @author Marie-Catherine de Marneffe */ public class WordLemmaTag implements Label, Comparable, HasWord, HasTag { private String word; private String lemma; private String tag; private static final String DIVIDER = "/"; public WordLemmaTag(String word) { this.word = word; this.lemma = null; setTag(null); } public WordLemmaTag(Label word) { this(word.value()); } public WordLemmaTag() { } /** * Create a new {@code WordLemmaTag}. * * @param word This word is set as the word of this Label * @param tag The {@code value()} of this Label is set as the * tag of this Label */ public WordLemmaTag(String word, String tag) { WordTag wT = new WordTag(word, tag); this.word = word; this.lemma = Morphology.stemStatic(wT).word(); setTag(tag); } /** * Create a new {@code WordLemmaTag}. * * @param word This word is passed to the supertype constructor * @param lemma The lemma is set as the lemma of this Label * @param tag The {@code value()} of this Label is set as the * tag of this Label */ public WordLemmaTag(String word, String lemma, String tag) { this(word); this.lemma = lemma; setTag(tag); } /** * Create a new {@code WordLemmaTag} from a Label. The value of * the Label corresponds to the word of the WordLemmaTag. * * @param word This word is passed to the supertype constructor * @param tag The {@code value()} of this Label is set as the * tag of this Label */ public WordLemmaTag(Label word, Label tag) { this(word); WordTag wT = new WordTag(word, tag); this.lemma = Morphology.stemStatic(wT).word(); setTag(tag.value()); } /** * Return a String representation of just the "main" value of this Label. * * @return the "value" of the Label */ @Override public String value() { return word; } @Override public String word() { return value(); } /** * Set the value for the Label. * * @param value the value for the Label */ @Override public void setValue(String value) { word = value; } @Override public void setWord(String word) { setValue(word); } public void setLemma(String lemma) { this.lemma = lemma; } /** * Set the tag for the Label. * * @param tag the value for the Label */ @Override public final void setTag(String tag) { this.tag = tag; } @Override public String tag() { return tag; } public String lemma() { return lemma; } /** * Return a String representation of the Label. For a multipart Label, * this will return all parts. * * @return a text representation of the full label contents: word/lemma/tag */ @Override public String toString() { return toString(DIVIDER); } public String toString(String divider) { return word() + divider + lemma + divider + tag; } /** * The String is divided according to the divider character (usually, "/"). * We assume that we can always just divide on the rightmost divider character, * rather than trying to parse up escape sequences. If the divider character isn't found * in the word, then the whole string becomes the word, and lemma and tag * are {@code null}. * We assume that if only one divider character is found, word and tag are present in * the String, and lemma will be computed. * * @param labelStr The word that will go into the {@code WordLemmaTag} */ @Override public void setFromString(String labelStr) { setFromString(labelStr, DIVIDER); } public void setFromString(String labelStr, String divider) { int first = labelStr.indexOf(divider); int second = labelStr.lastIndexOf(divider); if (first == second) { setWord(labelStr.substring(0, first)); setTag(labelStr.substring(first + 1)); setLemma(Morphology.lemmaStatic(labelStr.substring(0, first), labelStr.substring(first + 1))); } else if (first >= 0) { setWord(labelStr.substring(0, first)); setLemma(labelStr.substring(first + 1, second)); setTag(labelStr.substring(second + 1)); } else { setWord(labelStr); setLemma(null); setTag(null); } } /** * Equality is satisfied only if the compared object is a WordLemmaTag * and has String-equal word, lemma and tag fields. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof WordLemmaTag)) return false; final WordLemmaTag other = (WordLemmaTag) o; return word().equals(other.word()) && lemma().equals(other.lemma()) && tag().equals(other.tag()); } @Override public int hashCode() { int result; result = (word != null ? word.hashCode() : 3); result = 29 * result + (tag != null ? tag.hashCode() : 0); result = 29 * result + (lemma != null ? lemma.hashCode() : 0); return result; } /** * Orders first by word, then by lemma, then by tag. * * @param wordLemmaTag object to compare to * @return result (positive if {@code this} is greater than * {@code obj}, 0 if equal, negative otherwise) */ @Override public int compareTo(WordLemmaTag wordLemmaTag) { int first = word().compareTo(wordLemmaTag.word()); if (first != 0) return first; int second = lemma().compareTo(wordLemmaTag.lemma()); if (second != 0) return second; else return tag().compareTo(wordLemmaTag.tag()); } /** * Return a factory for this kind of label * (i.e., {@code TaggedWord}). * The factory returned is always the same one (a singleton). * * @return The label factory */ @Override public LabelFactory labelFactory() { return new WordLemmaTagFactory(); } /*for debugging only*/ public static void main(String[] args) { WordLemmaTag wLT = new WordLemmaTag(); wLT.setFromString("hunter/NN"); System.out.println(wLT.word()); System.out.println(wLT.lemma()); System.out.println(wLT.tag()); WordLemmaTag wLT2 = new WordLemmaTag(); wLT2.setFromString("bought/buy/V"); System.out.println(wLT2.word()); System.out.println(wLT2.lemma()); System.out.println(wLT2.tag()); WordLemmaTag wLT3 = new WordLemmaTag(); wLT2.setFromString("life"); System.out.println(wLT3.word()); System.out.println(wLT3.lemma()); System.out.println(wLT3.tag()); } private static final long serialVersionUID = -5993410244163988138L; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy