All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.pipeline.Annotator Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.util.ArraySet;

import java.util.Collections;
import java.util.Set;

/**
 * This is an interface for adding annotations to a fully annotated
 * Annotation.  In some ways, it is just a glorified Function, except
 * that it explicitly operates on Annotation objects.  Annotators
 * should be given to an AnnotationPipeline in order to make
 * annotation pipelines (the whole motivation of this package), and
 * therefore implementers of this interface should be designed to play
 * well with other Annotators and in their javadocs they should
 * explicitly state what annotations they are assuming already exist
 * in the annotation (like parse, POS tag, etc), what field they are
 * expecting them under (Annotation.WORDS_KEY, Annotation.PARSE_KEY,
 * etc) and what annotations they will add (or modify) and the keys
 * for them as well.  If you would like to look at the code for a
 * relatively simple Annotator, I recommend NERAnnotator.  For a lot
 * of code you could just add the implements directly, but I recommend
 * wrapping instead because I believe that it will help to keep the
 * pipeline code more manageable.
 * 
* An Annotator can also provide a description of what it produces and * a description of what it requires to have been produced by using * the Requirement objects. Predefined Requirement objects are * provided for most of the core annotators, such as tokenize, ssplit, * etc. The StanfordCoreNLP version of the AnnotationPipeline can * enforce requirements, throwing an exception if an annotator does * not have all of its prerequisite met. An Annotator which does not * participate in this system can simply return Collections.emptySet() * for both requires() and requirementsSatisfied(). * * @author Jenny Finkel */ public interface Annotator { /** * Given an Annotation, perform a task on this Annotation. */ public void annotate(Annotation annotation) ; /** * The Requirement is a general way of describing the pre and post * conditions of an Annotator running. Typical use is to have * constants for the different requirement types, such as the * TOKENIZE_REQUIREMENT below, and to reuse those constants instead * of creating new objects. It is also possible to subclass * Requirement if an Annotator has a more general output. For * example, one could imagine a TsurgeonAnnotator which has a wide * range of possible effects; this would probably subclass * Requirement to indicate which particular surgery it provided. *
* We do nothing to override the equals or hashCode methods. This * means that two Requirements are equal iff they are the same * object. We do not want to use name to decide * equality because a subclass that uses more information, such as * the particular kind of tsurgeon used in a hypothetical * TsurgeonAnnotator, cannot use a stricter equals() than the * superclass. It is hard to get stricter than ==. */ public class Requirement { public final String name; public Requirement(String name) { this.name = name; } @Override public String toString() { return name; } } /** * Returns a set of requirements for which tasks this annotator can * provide. For example, the POS annotator will return "pos". */ public Set requirementsSatisfied(); /** * Returns the set of tasks which this annotator requires in order * to perform. For example, the POS annotator will return * "tokenize", "ssplit". */ public Set requires(); /** * These are annotators which StanfordCoreNLP knows how to create. * Add new annotators and/or annotators from other groups here! */ public static final String STANFORD_TOKENIZE = "tokenize"; public static final String STANFORD_CLEAN_XML = "cleanxml"; public static final String STANFORD_SSPLIT = "ssplit"; public static final String STANFORD_POS = "pos"; public static final String STANFORD_LEMMA = "lemma"; public static final String STANFORD_NER = "ner"; public static final String STANFORD_REGEXNER = "regexner"; public static final String STANFORD_ENTITY_MENTIONS = "entitymentions"; public static final String STANFORD_GENDER = "gender"; public static final String STANFORD_TRUECASE = "truecase"; public static final String STANFORD_PARSE = "parse"; public static final String STANFORD_DETERMINISTIC_COREF = "dcoref"; public static final String STANFORD_RELATION = "relation"; public static final String STANFORD_SENTIMENT = "sentiment"; public static final String STANFORD_COLUMN_DATA_CLASSIFIER = "cdc"; public static final String STANFORD_DEPENDENCIES = "depparse"; public static final String STANFORD_NATLOG = "natlog"; public static final String STANFORD_QUOTE = "quote"; public static final Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE); public static final Requirement CLEAN_XML_REQUIREMENT = new Requirement(STANFORD_CLEAN_XML); public static final Requirement SSPLIT_REQUIREMENT = new Requirement(STANFORD_SSPLIT); public static final Requirement POS_REQUIREMENT = new Requirement(STANFORD_POS); public static final Requirement LEMMA_REQUIREMENT = new Requirement(STANFORD_LEMMA); public static final Requirement NER_REQUIREMENT = new Requirement(STANFORD_NER); public static final Requirement GENDER_REQUIREMENT = new Requirement(STANFORD_GENDER); public static final Requirement TRUECASE_REQUIREMENT = new Requirement(STANFORD_TRUECASE); public static final Requirement PARSE_REQUIREMENT = new Requirement(STANFORD_PARSE); public static final Requirement DETERMINISTIC_COREF_REQUIREMENT = new Requirement(STANFORD_DETERMINISTIC_COREF); public static final Requirement RELATION_EXTRACTOR_REQUIREMENT = new Requirement(STANFORD_RELATION); public static final Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG); public static final Requirement QUOTE_REQUIREMENT = new Requirement(STANFORD_QUOTE); /** * These are annotators which StanfordCoreNLP does not know how to * create by itself, meaning you would need to use the custom * annotator mechanism to create them. Note that some of them are * already included in other parts of the system, such as sutime, * which is already included in ner. */ public static final Requirement GUTIME_REQUIREMENT = new Requirement("gutime"); public static final Requirement SUTIME_REQUIREMENT = new Requirement("sutime"); public static final Requirement HEIDELTIME_REQUIREMENT = new Requirement("heideltime"); public static final Requirement STEM_REQUIREMENT = new Requirement("stem"); public static final Requirement NUMBER_REQUIREMENT = new Requirement("number"); public static final Requirement TIME_WORDS_REQUIREMENT = new Requirement("timewords"); public static final Requirement QUANTIFIABLE_ENTITY_NORMALIZATION_REQUIREMENT = new Requirement("quantifiable_entity_normalization"); public static final Requirement COLUMN_DATA_CLASSIFIER = new Requirement("column_data_classifer"); /** * The Stanford Parser can produce this if it is specifically requested */ public static final Requirement BINARIZED_TREES_REQUIREMENT = new Requirement("binarized_trees"); /** * These are typical combinations of annotators which may be used as * requirements by other annotators. */ public static final Set TOKENIZE_AND_SSPLIT = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT)); public static final Set TOKENIZE_SSPLIT_POS = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT)); public static final Set TOKENIZE_SSPLIT_NER = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, NER_REQUIREMENT)); public static final Set TOKENIZE_SSPLIT_PARSE = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT)); public static final Set TOKENIZE_SSPLIT_PARSE_NER = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, PARSE_REQUIREMENT, NER_REQUIREMENT)); public static final Set TOKENIZE_SSPLIT_POS_LEMMA = Collections.unmodifiableSet(new ArraySet(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, LEMMA_REQUIREMENT)); public static final Set PARSE_AND_TAG = Collections.unmodifiableSet(new ArraySet(POS_REQUIREMENT, PARSE_REQUIREMENT)); public static final Set PARSE_TAG_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet(POS_REQUIREMENT, PARSE_REQUIREMENT, BINARIZED_TREES_REQUIREMENT)); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy