![JAR search and dependency download from the Maven repository](/logo.png)
edu.stanford.nlp.pipeline.Annotator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.ArraySet;
import java.util.*;
/**
* This is an interface for adding annotations to a partially annotated
* Annotation. In some ways, it is just a glorified function, except
* that it explicitly operates in-place on Annotation objects. Annotators
* should be given to an AnnotationPipeline in order to make
* annotation pipelines (the whole motivation of this package), and
* therefore implementers of this interface should be designed to play
* well with other Annotators and in their javadocs they should
* explicitly state what annotations they are assuming already exist
* in the annotation (like parse, POS tag, etc), what keys they are
* expecting them under (see, for instance, the ones in CoreAnnotations),
* and what annotations they will add (or modify) and the keys
* for them as well. If you would like to look at the code for a
* relatively simple Annotator, I recommend NERAnnotator. For a lot
* of code you could just add the implements directly, but I recommend
* wrapping instead because I believe that it will help to keep the
* pipeline code more manageable.
*
* An Annotator can also provide a description of what it produces and
* a description of what it requires to have been produced by using
* the Requirement objects. Predefined Requirement objects are
* provided for most of the core annotators, such as tokenize, ssplit,
* etc. The StanfordCoreNLP version of the AnnotationPipeline can
* enforce requirements, throwing an exception if an annotator does
* not have all of its prerequisite met. An Annotator which does not
* participate in this system can simply return Collections.emptySet()
* for both requires() and requirementsSatisfied().
*
* @author Jenny Finkel
*/
public interface Annotator {
/**
* Given an Annotation, perform a task on this Annotation.
*/
void annotate(Annotation annotation);
/**
* The Requirement is a general way of describing the pre and post
* conditions of an Annotator running. Typical use is to have
* constants for the different requirement types, such as the
* TOKENIZE_REQUIREMENT below, and to reuse those constants instead
* of creating new objects. It is also possible to subclass
* Requirement if an Annotator has a more general output. For
* example, one could imagine a TsurgeonAnnotator which has a wide
* range of possible effects; this would probably subclass
* Requirement to indicate which particular surgery it provided.
*
* We do nothing to override the equals or hashCode methods. This
* means that two Requirements are equal iff they are the same
* object. We do not want to use {@code name} to decide
* equality because a subclass that uses more information, such as
* the particular kind of tsurgeon used in a hypothetical
* TsurgeonAnnotator, cannot use a stricter equals() than the
* superclass. It is hard to get stricter than ==.
*/
class Requirement {
public final String name;
public Requirement(String name) {
this.name = name;
}
@Override
public String toString() {
return name;
}
}
/**
* Returns a set of requirements for which tasks this annotator can
* provide. For example, the POS annotator will return "pos".
*/
Set requirementsSatisfied();
/**
* Returns the set of tasks which this annotator requires in order
* to perform. For example, the POS annotator will return
* "tokenize", "ssplit".
*/
Set requires();
/**
* These are annotators which StanfordCoreNLP knows how to create.
* Add new annotators and/or annotators from other groups here!
*/
String STANFORD_TOKENIZE = "tokenize";
String STANFORD_CLEAN_XML = "cleanxml";
String STANFORD_SSPLIT = "ssplit";
String STANFORD_POS = "pos";
String STANFORD_LEMMA = "lemma";
String STANFORD_NER = "ner";
String STANFORD_REGEXNER = "regexner";
String STANFORD_ENTITY_MENTIONS = "entitymentions";
String STANFORD_GENDER = "gender";
String STANFORD_TRUECASE = "truecase";
String STANFORD_PARSE = "parse";
String STANFORD_DETERMINISTIC_COREF = "dcoref";
String STANFORD_COREF = "coref";
String STANFORD_MENTION = "mention"; // TODO(jebolton) Merge with entitymention
String STANFORD_RELATION = "relation";
String STANFORD_SENTIMENT = "sentiment";
String STANFORD_COLUMN_DATA_CLASSIFIER = "cdc";
String STANFORD_DEPENDENCIES = "depparse";
String STANFORD_NATLOG = "natlog";
String STANFORD_OPENIE = "openie";
String STANFORD_QUOTE = "quote";
String STANFORD_UD_FEATURES = "udfeats";
Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE);
Requirement CLEAN_XML_REQUIREMENT = new Requirement(STANFORD_CLEAN_XML);
Requirement SSPLIT_REQUIREMENT = new Requirement(STANFORD_SSPLIT);
Requirement POS_REQUIREMENT = new Requirement(STANFORD_POS);
Requirement LEMMA_REQUIREMENT = new Requirement(STANFORD_LEMMA);
Requirement NER_REQUIREMENT = new Requirement(STANFORD_NER);
Requirement GENDER_REQUIREMENT = new Requirement(STANFORD_GENDER);
Requirement TRUECASE_REQUIREMENT = new Requirement(STANFORD_TRUECASE);
Requirement PARSE_REQUIREMENT = new Requirement(STANFORD_PARSE);
Requirement DEPENDENCY_REQUIREMENT = new Requirement(STANFORD_DEPENDENCIES);
Requirement MENTION_REQUIREMENT = new Requirement(STANFORD_MENTION);
Requirement ENTITY_MENTIONS_REQUIREMENT = new Requirement(STANFORD_ENTITY_MENTIONS);
Requirement DETERMINISTIC_COREF_REQUIREMENT = new Requirement(STANFORD_DETERMINISTIC_COREF);
Requirement COREF_REQUIREMENT = new Requirement(STANFORD_COREF);
Requirement RELATION_EXTRACTOR_REQUIREMENT = new Requirement(STANFORD_RELATION);
Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG);
Requirement OPENIE_REQUIREMENT = new Requirement(STANFORD_OPENIE);
Requirement QUOTE_REQUIREMENT = new Requirement(STANFORD_QUOTE);
Requirement UD_FEATURES_REQUIREMENT = new Requirement(STANFORD_UD_FEATURES);
/**
* A map from annotator name to a set of requirements for that annotator.
* This is useful to have here for the purpose of static analysis on an
* annotators list.
*/
@SuppressWarnings("unchecked")
Map> REQUIREMENTS = Collections.unmodifiableMap(new HashMap>() {
{
put(STANFORD_TOKENIZE, Collections.EMPTY_SET);
put(STANFORD_CLEAN_XML, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT); // A requirement for STANFORD_CLEAN_XML
}}));
put(STANFORD_SSPLIT, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
}}));
put(STANFORD_POS, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
}}));
put(STANFORD_LEMMA, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
}}));
put(STANFORD_NER, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
}}));
put(STANFORD_REGEXNER, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
}}));
put(STANFORD_GENDER, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
}}));
put(STANFORD_TRUECASE, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
}}));
put(STANFORD_PARSE, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
}}));
put(STANFORD_DEPENDENCIES, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
}}));
put(STANFORD_MENTION, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(NER_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
}}));
put(STANFORD_ENTITY_MENTIONS, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
}}));
put(STANFORD_DETERMINISTIC_COREF, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT);
add(PARSE_REQUIREMENT);
}}));
put(STANFORD_COREF, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
add(MENTION_REQUIREMENT);
}}));
put(STANFORD_RELATION, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
}}));
put(STANFORD_NATLOG, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically
}}));
put(STANFORD_OPENIE, Collections.unmodifiableSet(new HashSet() {{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically
add(NATLOG_REQUIREMENT);
}}));
put(STANFORD_QUOTE, Collections.unmodifiableSet(new HashSet() {{
// No requirements
}}));
put(STANFORD_UD_FEATURES, Collections.unmodifiableSet(new HashSet(){{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
}}));
}});
/**
* These are annotators which StanfordCoreNLP does not know how to
* create by itself, meaning you would need to use the custom
* annotator mechanism to create them. Note that some of them are
* already included in other parts of the system, such as sutime,
* which is already included in ner.
*/
Requirement GUTIME_REQUIREMENT = new Requirement("gutime");
Requirement SUTIME_REQUIREMENT = new Requirement("sutime");
Requirement HEIDELTIME_REQUIREMENT = new Requirement("heideltime");
Requirement STEM_REQUIREMENT = new Requirement("stem");
Requirement NUMBER_REQUIREMENT = new Requirement("number");
Requirement TIME_WORDS_REQUIREMENT = new Requirement("timewords");
Requirement QUANTIFIABLE_ENTITY_NORMALIZATION_REQUIREMENT = new Requirement("quantifiable_entity_normalization");
Requirement COLUMN_DATA_CLASSIFIER = new Requirement("column_data_classifer");
/**
* The Stanford Parser can produce this if it is specifically requested.
*/
Requirement BINARIZED_TREES_REQUIREMENT = new Requirement("binarized_trees");
/**
* These are typical combinations of annotators which may be used as
* requirements by other annotators.
*/
Set TOKENIZE_AND_SSPLIT = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT));
Set TOKENIZE_SSPLIT_POS = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT));
Set TOKENIZE_SSPLIT_NER = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, NER_REQUIREMENT));
Set TOKENIZE_SSPLIT_PARSE = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT));
Set TOKENIZE_SSPLIT_PARSE_NER = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT, NER_REQUIREMENT));
Set TOKENIZE_SSPLIT_POS_LEMMA = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, LEMMA_REQUIREMENT));
Set TOKENIZE_SSPLIT_POS_DEPPARSE = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, DEPENDENCY_REQUIREMENT));
Set PARSE_AND_TAG = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT));
Set PARSE_TAG_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, BINARIZED_TREES_REQUIREMENT));
Set PARSE_TAG_DEPPARSE_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, DEPENDENCY_REQUIREMENT, BINARIZED_TREES_REQUIREMENT));
Set PARSE_TAG_DEPPARSE = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, DEPENDENCY_REQUIREMENT));
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy