Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.parser.lexparser;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.Map;
import java.util.Set;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
/**
* An unknown word model for a generic language. This was originally designed for
* German, changing only to remove German-specific numeric features. Models unknown
* words based on their prefix and suffixes, as well as capital letters.
*
* @author Roger Levy
* @author Greg Donaker (corrections and modeling improvements)
* @author Christopher Manning (generalized and improved what Greg did)
* @author Anna Rafferty
*
*/
public class BaseUnknownWordModel implements UnknownWordModel {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(BaseUnknownWordModel.class);
private static final long serialVersionUID = 6355171148751673822L;
protected static final boolean VERBOSE = false;
protected boolean useFirst; //= true;
private final boolean useEnd;
protected boolean useGT;
private final boolean useFirstCap; // Only care if first is capitalized
private int endLength = 2; // only used if useEnd==true
/** What type of equivalence classing is done in getSignature */
protected final int unknownLevel;
protected static final String unknown = "UNK";
protected static final int nullWord = -1;
protected static final short nullTag = -1;
protected static final IntTaggedWord NULL_ITW = new IntTaggedWord(nullWord, nullTag);
protected final TrainOptions trainOptions;
protected final Index wordIndex;
protected final Index tagIndex;
/**
* Has counts for taggings in terms of unseen signatures. The IntTagWords are
* for (tag,sig), (tag,null), (null,sig), (null,null). (None for basic UNK if
* there are signatures.)
*/
protected final ClassicCounter unSeenCounter;
/** This maps from a tag (as a label) to a Counter from word signatures to
* their P(sig|tag), as estimated in the model. For Chinese, the word
* signature is just the first character or its unicode type for things
* that aren't Chinese characters.
*/
protected final Map