All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.WordTag Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ling;

import java.io.DataInputStream;
import java.io.DataOutputStream;

/**
 * A WordTag corresponds to a tagged (e.g., for part of speech) word
 * and is implemented with String-valued word and tag.  It implements
 * the Label interface; the value() method for that
 * interface corresponds to the word of the WordTag.
 * 

* The equality relation for WordTag is defined as identity of both * word and tag. Note that this is different from * TaggedWord, for which equality derives from * ValueLabel and requires only identity of value. * * @author Roger Levy */ public class WordTag implements Label, HasWord, HasTag, Comparable { private String word; private String tag; private static final String DIVIDER = "/"; /** * Create a new WordTag. * * @param word This word is passed to the supertype constructor * @param tag The value() of this label is set as the * tag of this Label */ public WordTag(String word, String tag) { setWord(word); setTag(tag); } public WordTag(String word) { this(word, null); } public WordTag(E word) { this(word.value(), word.tag()); } private WordTag() { } // only used internally for doing setFromString() /** * Create a new WordTag from a Label. The value of * the Label corresponds to the word of the WordTag. * * @param word The value() of this label is set as the * word of the WordTag * @param tag The value() of this label is set as the * tag of the WordTag */ public WordTag(Label word, Label tag) { this(word.value(), tag.value()); } public static WordTag valueOf(String s) { WordTag result = new WordTag(); result.setFromString(s); return result; } public static WordTag valueOf(String s, String tagDivider) { WordTag result = new WordTag(); result.setFromString(s, tagDivider); return result; } /** * Return a String representation of just the "main" value of this label. * * @return the "value" of the label */ public String value() { return word; } public String word() { return value(); } /** * Set the value for the label (if one is stored). * * @param value - the value for the label */ public void setValue(String value) { word = value; } public String tag() { return tag; } public void setWord(String word) { setValue(word); } public void setTag(String tag) { this.tag = tag; } /** * Return a String representation of the label. For a multipart label, * this will return all parts. The toString() method * causes a label to spill its guts. It should always return an * empty string rather than null if there is no value. * * @return a text representation of the full label contents */ @Override public String toString() { return toString(DIVIDER); } public String toString(String divider) { String tag = tag(); if (tag == null) { return word(); } else { return word() + divider + tag; } } /** * Sets a WordTag from decoding * the String passed in. The String is divided according * to the divider character (usually, "/"). We assume that we can * always just * divide on the rightmost divider character, rather than trying to * parse up escape sequences. If the divider character isn't found * in the word, then the whole string becomes the word, and the tag * is null. * * @param wordTagString The word that will go into the Word */ @Override public void setFromString(String wordTagString) { setFromString(wordTagString, DIVIDER); } public void setFromString(String wordTagString, String divider) { int where = wordTagString.lastIndexOf(divider); if (where >= 0) { setWord(wordTagString.substring(0, where).intern()); setTag(wordTagString.substring(where + 1).intern()); } else { setWord(wordTagString.intern()); setTag(null); } } /** A WordTag is equal only to another WordTag with the same word and tag values. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof WordTag)) return false; final WordTag wordTag = (WordTag) o; if (tag != null ? !tag.equals(wordTag.tag) : wordTag.tag != null) return false; if (word != null ? !word.equals(wordTag.word) : wordTag.word != null) return false; return true; } @Override public int hashCode() { int result; result = (word != null ? word.hashCode() : 0); result = 29 * result + (tag != null ? tag.hashCode() : 0); return result; } /** * Orders first by word, then by tag. * * @param wordTag object to compare to * @return result (positive if this is greater than * obj, 0 if equal, negative otherwise) */ public int compareTo(WordTag wordTag) { int first = (word != null ? word().compareTo(wordTag.word()) : 0); if(first != 0) return first; else { if (tag() == null) { if (wordTag.tag() == null) return 0; else return -1; } return tag().compareTo(wordTag.tag()); } } // extra class guarantees correct lazy loading (Bloch p.194) private static class LabelFactoryHolder { private static final LabelFactory lf = new WordTagFactory(); } /** * Return a factory for this kind of label * (i.e., TaggedWord). * The factory returned is always the same one (a singleton). * * @return The label factory */ public LabelFactory labelFactory() { return LabelFactoryHolder.lf; } /** * Return a factory for this kind of label. * * @return The label factory */ public static LabelFactory factory() { return LabelFactoryHolder.lf; } public void read(DataInputStream in) { try { word = in.readUTF(); tag = in.readUTF(); } catch (Exception e) { e.printStackTrace(); } } public void save(DataOutputStream out) { try { out.writeUTF(word); out.writeUTF(tag); } catch (Exception e) { e.printStackTrace(); } } private static final long serialVersionUID = -1859527239216813742L; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy