Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
* Set of common annotations for {@link CoreMap}s. The classes
* defined here are typesafe keys for getting and setting annotation
* values. These classes need not be instantiated outside of this
* class. e.g {@link TextAnnotation}.class serves as the key and a
* {@code String} serves as the value containing the
* corresponding word.
*
*
*
* New types of {@link CoreAnnotation} can be defined anywhere that is
* convenient in the source tree - they are just classes. This file exists to
* hold widely used "core" annotations and others inherited from the
* {@link Label} family. In general, most keys should be placed in this file as
* they may often be reused throughout the code. This architecture allows for
* flexibility, but in many ways it should be considered as equivalent to an
* enum in which everything should be defined
*
*
*
* The getType method required by CoreAnnotation must return the same class type
* as its value type parameter. It feels like one should be able to get away
* without that method, but because Java erases the generic type signature, that
* info disappears at runtime. See {@link ValueAnnotation} for an example.
*
*
* @author dramage
* @author rafferty
* @author bethard
*/
public class CoreAnnotations {
private CoreAnnotations() { } // only static members
/**
* The CoreMap key identifying the annotation's text.
*
* Note that this key is intended to be used with many different kinds of
* annotations - documents, sentences and tokens all have their own text.
*/
public static class TextAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the lemma (morphological stem) of a token.
*
* This key is typically set on token annotations.
*
* TODO: merge with StemAnnotation?
*/
public static class LemmaAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the Penn part of speech of a token.
*
* This key is typically set on token annotations.
*/
public static class PartOfSpeechAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the token-level named entity tag (e.g., DATE,
* PERSON, etc.)
*
* This key is typically set on token annotations.
*/
public static class NamedEntityTagAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the token-level named entity tag (e.g., DATE,
* PERSON, etc.) from a previous NER tagger. NERFeatureFactory is sensitive to
* this tag and will turn the annotations from the previous NER tagger into
* new features. This is currently used to implement one level of stacking --
* we may later change it to take a list as needed.
*
* This key is typically set on token annotations.
*/
public static class StackedNamedEntityTagAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the token-level true case annotation (e.g.,
* INIT_UPPER)
*
* This key is typically set on token annotations.
*/
public static class TrueCaseAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key identifying the annotation's true-cased text.
*
* Note that this key is intended to be used with many different kinds of
* annotations - documents, sentences and tokens all have their own text.
*/
public static class TrueCaseTextAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The CoreMap key for getting the tokens contained by an annotation.
*
* This key should be set for any annotation that contains tokens. It can be
* done without much memory overhead using List.subList.
*/
public static class TokensAnnotation implements CoreAnnotation> {
@Override
public Class> getType() {
return ErasureUtils.uncheckedCast(List.class);
}
}
/**
* The CoreMap key for getting the tokens (can be words, phrases or anything that are of type CoreMap) contained by an annotation.
*
* This key should be set for any annotation that contains tokens (words, phrases etc). It can be
* done without much memory overhead using List.subList.
*/
public static class GenericTokensAnnotation implements CoreAnnotation> {
@Override
public Class> getType() {
return ErasureUtils.uncheckedCast(List.class);
}
}
/**
* The CoreMap key for getting the sentences contained in an annotation.
* The sentences are represented as a {@code List}.
* Each sentence might typically have annotations such as {@code TextAnnotation},
* {@code TokensAnnotation}, {@code SentenceIndexAnnotation}, and {@code BasicDependenciesAnnotation}.
*
* This key is typically set only on document annotations.
*/
public static class SentencesAnnotation implements CoreAnnotation> {
@Override
public Class> getType() {
return ErasureUtils.uncheckedCast(List.class);
}
}
/**
* The CoreMap key for getting the quotations contained by an annotation.
*
* This key is typically set only on document annotations.
*/
public static class QuotationsAnnotation implements CoreAnnotation> {
@Override
public Class> getType() {
return ErasureUtils.uncheckedCast(List.class);
}
}
/**
* Unique identifier within a document for a given quotation.
*/
public static class QuotationIndexAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* The index of the sentence that this annotation begins in.
*/
public static class SentenceBeginAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* The index of the sentence that this annotation begins in.
*/
public static class SentenceEndAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* The CoreMap key for getting the paragraphs contained by an annotation.
*
* This key is typically set only on document annotations.
*/
public static class ParagraphsAnnotation implements CoreAnnotation> {
@Override
public Class> getType() {
return ErasureUtils.uncheckedCast(List.class);
}
}
/**
* The CoreMap key identifying the first token included in an annotation. The
* token with index 0 is the first token in the document.
*
* This key should be set for any annotation that contains tokens.
*/
public static class TokenBeginAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* The CoreMap key identifying the last token after the end of an annotation.
* The token with index 0 is the first token in the document.
*
* This key should be set for any annotation that contains tokens.
*/
public static class TokenEndAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* The CoreMap key identifying the date and time associated with an
* annotation.
*
* This key is typically set on document annotations.
*/
public static class CalendarAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Calendar.class;
}
}
/*
* These are the keys hashed on by IndexedWord
*/
/**
* This refers to the unique identifier for a "document", where document may
* vary based on your application.
*/
public static class DocIDAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* This indexes a token number inside a sentence. Standardly, tokens are
* indexed within a sentence starting at 1 (not 0: we follow common parlance
* whereby we speak of the first word of a sentence).
* This is generally an individual word or feature index - it is local, and
* may not be uniquely identifying without other identifiers such as sentence
* and doc. However, if these are the same, the index annotation should be a
* unique identifier for differentiating objects.
*/
public static class IndexAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* This indexes the beginning of a span of words, e.g., a constituent in a
* tree. See {@link edu.stanford.nlp.trees.Tree#indexSpans(int)}.
* This annotation counts tokens.
* It standardly indexes from 1 (like IndexAnnotation). The reasons for
* this are: (i) Talking about the first word of a sentence is kind of
* natural, and (ii) We use index 0 to refer to an imaginary root in
* dependency output.
*/
public static class BeginIndexAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* This indexes the end of a span of words, e.g., a constituent in a
* tree. See {@link edu.stanford.nlp.trees.Tree#indexSpans(int)}. This annotation
* counts tokens. It standardly indexes from 1 (like IndexAnnotation).
* The end index is not a fencepost: its value is equal to the
* IndexAnnotation of the last word in the span.
*/
public static class EndIndexAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* This indicates that starting at this token, the sentence should not be ended until
* we see a ForcedSentenceEndAnnotation. Used to force the ssplit annotator
* (eg the WordToSentenceProcessor) to keep tokens in the same sentence
* until ForcedSentenceEndAnnotation is seen.
*/
public static class ForcedSentenceUntilEndAnnotation
implements CoreAnnotation {
@Override
public Class getType() {
return Boolean.class;
}
}
/**
* This indicates the sentence should end at this token. Used to
* force the ssplit annotator (eg the WordToSentenceProcessor) to
* start a new sentence at the next token.
*/
public static class ForcedSentenceEndAnnotation
implements CoreAnnotation {
@Override
public Class getType() {
return Boolean.class;
}
}
/**
* Unique identifier within a document for a given sentence.
*/
public static class SentenceIndexAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* Line number for a sentence in a document delimited by newlines
* instead of punctuation. May skip numbers if there are blank
* lines not represented as sentences. Indexed from 1 rather than 0.
*/
public static class LineNumberAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Integer.class;
}
}
/**
* Contains the "value" - an ill-defined string used widely in MapLabel.
*/
public static class ValueAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
public static class CategoryAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* The exact original surface form of a token. This is created in the
* invertible PTBTokenizer. The tokenizer may normalize the token form to
* match what appears in the PTB, but this key will hold the original characters.
*/
public static class OriginalTextAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* Annotation for the whitespace characters appearing before this word. This
* can be filled in by the tokenizer so that the original text string can be
* reconstructed.
*/
public static class BeforeAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* Annotation for the whitespace characters appear after this word. This can
* be filled in by the tokenizer so that the original text string can be
* reconstructed.
*/
public static class AfterAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* CoNLL dep parsing - coarser POS tags.
*/
public static class CoarseTagAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return String.class;
}
}
/**
* CoNLL dep parsing - the dependency type
*/
public static class CoNLLDepAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return CoreMap.class;
}
}
/**
* CoNLL SRL/dep parsing - whether the word is a predicate
*/
public static class CoNLLPredicateAnnotation implements CoreAnnotation {
@Override
public Class getType() {
return Boolean.class;
}
}
/**
* CoNLL SRL/dep parsing - map which, for the current word, specifies its
* specific role for each predicate
*/
public static class CoNLLSRLAnnotation implements CoreAnnotation