edu.stanford.nlp.pipeline.Annotator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.ArraySet;
import java.util.Collections;
import java.util.Set;
/**
* This is an interface for adding annotations to a fully annotated
* Annotation. In some ways, it is just a glorified Function, except
* that it explicitly operates on Annotation objects. Annotators
* should be given to an AnnotationPipeline in order to make
* annotation pipelines (the whole motivation of this package), and
* therefore implementers of this interface should be designed to play
* well with other Annotators and in their javadocs they should
* explicitly state what annotations they are assuming already exist
* in the annotation (like parse, POS tag, etc), what field they are
* expecting them under (Annotation.WORDS_KEY, Annotation.PARSE_KEY,
* etc) and what annotations they will add (or modify) and the keys
* for them as well. If you would like to look at the code for a
* relatively simple Annotator, I recommend NERAnnotator. For a lot
* of code you could just add the implements directly, but I recommend
* wrapping instead because I believe that it will help to keep the
* pipeline code more manageable.
*
* An Annotator can also provide a description of what it produces and
* a description of what it requires to have been produced by using
* the Requirement objects. Predefined Requirement objects are
* provided for most of the core annotators, such as tokenize, ssplit,
* etc. The StanfordCoreNLP version of the AnnotationPipeline can
* enforce requirements, throwing an exception if an annotator does
* not have all of its prerequisite met. An Annotator which does not
* participate in this system can simply return Collections.emptySet()
* for both requires() and requirementsSatisfied().
*
* @author Jenny Finkel
*/
public interface Annotator {
/**
* Given an Annotation, perform a task on this Annotation.
*/
public void annotate(Annotation annotation) ;
/**
* The Requirement is a general way of describing the pre and post
* conditions of an Annotator running. Typical use is to have
* constants for the different requirement types, such as the
* TOKENIZE_REQUIREMENT below, and to reuse those constants instead
* of creating new objects. It is also possible to subclass
* Requirement if an Annotator has a more general output. For
* example, one could imagine a TsurgeonAnnotator which has a wide
* range of possible effects; this would probably subclass
* Requirement to indicate which particular surgery it provided.
*
* We do nothing to override the equals or hashCode methods. This
* means that two Requirements are equal iff they are the same
* object. We do not want to use name
to decide
* equality because a subclass that uses more information, such as
* the particular kind of tsurgeon used in a hypothetical
* TsurgeonAnnotator, cannot use a stricter equals() than the
* superclass. It is hard to get stricter than ==.
*/
public class Requirement {
public final String name;
public Requirement(String name) {
this.name = name;
}
@Override
public String toString() {
return name;
}
}
/**
* Returns a set of requirements for which tasks this annotator can
* provide. For example, the POS annotator will return "pos".
*/
public Set requirementsSatisfied();
/**
* Returns the set of tasks which this annotator requires in order
* to perform. For example, the POS annotator will return
* "tokenize", "ssplit".
*/
public Set requires();
/**
* These are annotators which StanfordCoreNLP knows how to create.
* Add new annotators and/or annotators from other groups here!
*/
public static final String STANFORD_TOKENIZE = "tokenize";
public static final String STANFORD_CLEAN_XML = "cleanxml";
public static final String STANFORD_SSPLIT = "ssplit";
public static final String STANFORD_POS = "pos";
public static final String STANFORD_LEMMA = "lemma";
public static final String STANFORD_NER = "ner";
public static final String STANFORD_REGEXNER = "regexner";
public static final String STANFORD_ENTITY_MENTIONS = "entitymentions";
public static final String STANFORD_GENDER = "gender";
public static final String STANFORD_TRUECASE = "truecase";
public static final String STANFORD_PARSE = "parse";
public static final String STANFORD_DETERMINISTIC_COREF = "dcoref";
public static final String STANFORD_RELATION = "relation";
public static final String STANFORD_SENTIMENT = "sentiment";
public static final String STANFORD_COLUMN_DATA_CLASSIFIER = "cdc";
public static final String STANFORD_DEPENDENCIES = "depparse";
public static final String STANFORD_NATLOG = "natlog";
public static final String STANFORD_QUOTE = "quote";
public static final Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE);
public static final Requirement CLEAN_XML_REQUIREMENT = new Requirement(STANFORD_CLEAN_XML);
public static final Requirement SSPLIT_REQUIREMENT = new Requirement(STANFORD_SSPLIT);
public static final Requirement POS_REQUIREMENT = new Requirement(STANFORD_POS);
public static final Requirement LEMMA_REQUIREMENT = new Requirement(STANFORD_LEMMA);
public static final Requirement NER_REQUIREMENT = new Requirement(STANFORD_NER);
public static final Requirement GENDER_REQUIREMENT = new Requirement(STANFORD_GENDER);
public static final Requirement TRUECASE_REQUIREMENT = new Requirement(STANFORD_TRUECASE);
public static final Requirement PARSE_REQUIREMENT = new Requirement(STANFORD_PARSE);
public static final Requirement DETERMINISTIC_COREF_REQUIREMENT = new Requirement(STANFORD_DETERMINISTIC_COREF);
public static final Requirement RELATION_EXTRACTOR_REQUIREMENT = new Requirement(STANFORD_RELATION);
public static final Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG);
public static final Requirement QUOTE_REQUIREMENT = new Requirement(STANFORD_QUOTE);
/**
* These are annotators which StanfordCoreNLP does not know how to
* create by itself, meaning you would need to use the custom
* annotator mechanism to create them. Note that some of them are
* already included in other parts of the system, such as sutime,
* which is already included in ner.
*/
public static final Requirement GUTIME_REQUIREMENT = new Requirement("gutime");
public static final Requirement SUTIME_REQUIREMENT = new Requirement("sutime");
public static final Requirement HEIDELTIME_REQUIREMENT = new Requirement("heideltime");
public static final Requirement STEM_REQUIREMENT = new Requirement("stem");
public static final Requirement NUMBER_REQUIREMENT = new Requirement("number");
public static final Requirement TIME_WORDS_REQUIREMENT = new Requirement("timewords");
public static final Requirement QUANTIFIABLE_ENTITY_NORMALIZATION_REQUIREMENT = new Requirement("quantifiable_entity_normalization");
public static final Requirement COLUMN_DATA_CLASSIFIER = new Requirement("column_data_classifer");
/**
* The Stanford Parser can produce this if it is specifically requested
*/
public static final Requirement BINARIZED_TREES_REQUIREMENT = new Requirement("binarized_trees");
/**
* These are typical combinations of annotators which may be used as
* requirements by other annotators.
*/
public static final Set TOKENIZE_AND_SSPLIT = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT));
public static final Set TOKENIZE_SSPLIT_POS = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT));
public static final Set TOKENIZE_SSPLIT_NER = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, NER_REQUIREMENT));
public static final Set TOKENIZE_SSPLIT_PARSE = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT));
public static final Set TOKENIZE_SSPLIT_PARSE_NER = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT, NER_REQUIREMENT));
public static final Set TOKENIZE_SSPLIT_POS_LEMMA = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, LEMMA_REQUIREMENT));
public static final Set PARSE_AND_TAG = Collections.unmodifiableSet(new ArraySet(POS_REQUIREMENT, PARSE_REQUIREMENT));
public static final Set PARSE_TAG_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet(POS_REQUIREMENT, PARSE_REQUIREMENT, BINARIZED_TREES_REQUIREMENT));
}