All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.WordTag Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ling;

import java.io.DataInputStream;
import java.io.DataOutputStream;

/**
 * A WordTag corresponds to a tagged (e.g., for part of speech) word
 * and is implemented with String-valued word and tag.  It implements
 * the Label interface; the value() method for that
 * interface corresponds to the word of the WordTag.
 * 

* The equality relation for WordTag is defined as identity of both * word and tag. Note that this is different from * TaggedWord, for which equality derives from * ValueLabel and requires only identity of value. * * @author Roger Levy */ public class WordTag implements Label, HasWord, HasTag, Comparable { private String word; private String tag; private static final String DIVIDER = "/"; /** * Create a new WordTag. * * @param word This word is passed to the supertype constructor * @param tag The value() of this label is set as the * tag of this Label */ public WordTag(String word, String tag) { setWord(word); setTag(tag); } public WordTag(String word) { this(word, null); } public WordTag(E word) { this(word.value(), word.tag()); } private WordTag() { } // only used internally for doing setFromString() /** * Create a new WordTag from a Label. The value of * the Label corresponds to the word of the WordTag. * * @param word The value() of this label is set as the * word of the WordTag * @param tag The value() of this label is set as the * tag of the WordTag */ public WordTag(Label word, Label tag) { this(word.value(), tag.value()); } public static WordTag valueOf(String s) { WordTag result = new WordTag(); result.setFromString(s); return result; } public static WordTag valueOf(String s, String tagDivider) { WordTag result = new WordTag(); result.setFromString(s, tagDivider); return result; } /** * Return a String representation of just the "main" value of this label. * * @return the "value" of the label */ public String value() { return word; } public String word() { return value(); } /** * Set the value for the label (if one is stored). * * @param value - the value for the label */ public void setValue(String value) { word = value; } public String tag() { return tag; } public void setWord(String word) { setValue(word); } public void setTag(String tag) { this.tag = tag; } /** * Return a String representation of the label. For a multipart label, * this will return all parts. The toString() method * causes a label to spill its guts. It should always return an * empty string rather than null if there is no value. * * @return a text representation of the full label contents */ @Override public String toString() { return toString(DIVIDER); } public String toString(String divider) { String tag = tag(); if (tag == null) { return word(); } else { return word() + divider + tag; } } /** * Sets a WordTag from decoding * the String passed in. The String is divided according * to the divider character (usually, "/"). We assume that we can * always just * divide on the rightmost divider character, rather than trying to * parse up escape sequences. If the divider character isn't found * in the word, then the whole string becomes the word, and the tag * is null. * * @param wordTagString The word that will go into the Word */ @Override public void setFromString(String wordTagString) { setFromString(wordTagString, DIVIDER); } public void setFromString(String wordTagString, String divider) { int where = wordTagString.lastIndexOf(divider); if (where >= 0) { setWord(wordTagString.substring(0, where).intern()); setTag(wordTagString.substring(where + 1).intern()); } else { setWord(wordTagString.intern()); setTag(null); } } /** A WordTag is equal only to another WordTag with the same word and tag values. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof WordTag)) return false; final WordTag wordTag = (WordTag) o; if (tag != null ? !tag.equals(wordTag.tag) : wordTag.tag != null) return false; if (word != null ? !word.equals(wordTag.word) : wordTag.word != null) return false; return true; } @Override public int hashCode() { int result; result = (word != null ? word.hashCode() : 0); result = 29 * result + (tag != null ? tag.hashCode() : 0); return result; } /** * Orders first by word, then by tag. * * @param wordTag object to compare to * @return result (positive if this is greater than * obj, 0 if equal, negative otherwise) */ public int compareTo(WordTag wordTag) { int first = (word != null ? word().compareTo(wordTag.word()) : 0); if(first != 0) return first; else { if (tag() == null) { if (wordTag.tag() == null) return 0; else return -1; } return tag().compareTo(wordTag.tag()); } } // extra class guarantees correct lazy loading (Bloch p.194) private static class LabelFactoryHolder { private static final LabelFactory lf = new WordTagFactory(); } /** * Return a factory for this kind of label * (i.e., TaggedWord). * The factory returned is always the same one (a singleton). * * @return The label factory */ public LabelFactory labelFactory() { return LabelFactoryHolder.lf; } /** * Return a factory for this kind of label. * * @return The label factory */ public static LabelFactory factory() { return LabelFactoryHolder.lf; } public void read(DataInputStream in) { try { word = in.readUTF(); tag = in.readUTF(); } catch (Exception e) { e.printStackTrace(); } } public void save(DataOutputStream out) { try { out.writeUTF(word); out.writeUTF(tag); } catch (Exception e) { e.printStackTrace(); } } private static final long serialVersionUID = -1859527239216813742L; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy