gate.creole.coref.NominalCoref Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that
provides the building blocks of many other GATE applications.
/*
* NominalCoref.java
*
* Copyright (c) 1995-2012, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* $Id: NominalCoref.java 19742 2016-11-16 17:58:23Z markagreenwood $
*/
package gate.creole.coref;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.Err;
import gate.util.OffsetComparator;
import gate.util.SimpleFeatureMapImpl;
@CreoleResource(name="ANNIE Nominal Coreferencer", comment="Nominal Coreference resolution component", helpURL="http://gate.ac.uk/userguide/sec:annie:pronom-coref", icon="nominal-coreferencer")
public class NominalCoref extends AbstractCoreferencer {
private static final long serialVersionUID = 1497388811557744017L;
public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
//annotation features
private static final String PERSON_CATEGORY = "Person";
private static final String JOBTITLE_CATEGORY = "JobTitle";
private static final String ORGANIZATION_CATEGORY = "Organization";
private static final String LOOKUP_CATEGORY = "Lookup";
private static final String ORGANIZATION_NOUN_CATEGORY = "organization_noun";
//scope
/** --- */
//private static AnnotationOffsetComparator ANNOTATION_OFFSET_COMPARATOR;
/** --- */
private String annotationSetName;
/** --- */
private AnnotationSet defaultAnnotations;
/** --- */
private HashMap anaphor2antecedent;
/* static {
ANNOTATION_OFFSET_COMPARATOR = new AnnotationOffsetComparator();
}*/
/** --- */
public NominalCoref() {
super("NOMINAL");
this.anaphor2antecedent = new HashMap();
}
/** Initialise this resource, and return it. */
@Override
public Resource init() throws ResourceInstantiationException {
return super.init();
} // init()
/**
* Reinitialises the processing resource. After calling this method the
* resource should be in the state it is after calling init.
* If the resource depends on external resources (such as rules files) then
* the resource will re-read those resources. If the data used to create
* the resource has changed since the resource has been created then the
* resource will change too after calling reInit().
*/
@Override
public void reInit() throws ResourceInstantiationException {
this.anaphor2antecedent = new HashMap();
init();
} // reInit()
/** Set the document to run on. */
@Override
public void setDocument(Document newDocument) {
//0. precondition
// Assert.assertNotNull(newDocument);
super.setDocument(newDocument);
}
/** --- */
@Override
@RunTime
@Optional
@CreoleParameter(comment="The annotation set to be used for the generated annotations")
public void setAnnotationSetName(String annotationSetName) {
this.annotationSetName = annotationSetName;
}
/** --- */
@Override
public String getAnnotationSetName() {
return annotationSetName;
}
/**
* This method runs the coreferencer. It assumes that all the needed parameters
* are set. If they are not, an exception will be fired.
*
* The process goes like this:
* - Create a sorted list of Person and JobTitle annotations.
* - Loop through the annotations
* If it is a Person, we add it to the top of a stack.
* If it is a job title, we subject it to a series of tests. If it
* passes, we associate it with the Person annotation at the top
* of the stack
*/
@Override
public void execute() throws ExecutionException{
Annotation[] nominalArray;
//0. preconditions
if (null == this.document) {
throw new ExecutionException("[coreference] Document is not set!");
}
//1. preprocess
preprocess();
// Out.println("Total annotations: " + defaultAnnotations.size());
// Get a sorted array of Tokens.
// The tests for job titles often require getting previous and subsequent
// tokens, so to save work, we create a single, sorted list of
// tokens.
Annotation[] tokens = defaultAnnotations.get(TOKEN_ANNOTATION_TYPE).
toArray(new Annotation[0]);
java.util.Arrays.sort(tokens, new OffsetComparator());
// The current token is the token at the start of the current annotation.
int currentToken = 0;
// get Person entities
//FeatureMap personConstraint = new SimpleFeatureMapImpl();
//personConstraint.put(LOOKUP_MAJOR_TYPE_FEATURE_NAME,
// PERSON_CATEGORY);
Set personConstraint = new HashSet();
personConstraint.add(PERSON_CATEGORY);
AnnotationSet people =
this.defaultAnnotations.get(personConstraint);
// get all JobTitle entities
//FeatureMap constraintJobTitle = new SimpleFeatureMapImpl();
//constraintJobTitle.put(LOOKUP_MAJOR_TYPE_FEATURE_NAME, JOBTITLE_CATEGORY);
Set jobTitleConstraint = new HashSet();
jobTitleConstraint.add(JOBTITLE_CATEGORY);
AnnotationSet jobTitles =
this.defaultAnnotations.get(jobTitleConstraint);
FeatureMap orgNounConstraint = new SimpleFeatureMapImpl();
orgNounConstraint.put(LOOKUP_MAJOR_TYPE_FEATURE_NAME,
ORGANIZATION_NOUN_CATEGORY);
AnnotationSet orgNouns =
this.defaultAnnotations.get(LOOKUP_CATEGORY, orgNounConstraint);
Set orgConstraint = new HashSet();
orgConstraint.add(ORGANIZATION_CATEGORY);
AnnotationSet organizations =
this.defaultAnnotations.get(orgConstraint);
// combine them into a list of nominals
Set nominals = new HashSet();
if (people != null) {
nominals.addAll(people);
}
if (jobTitles != null) {
nominals.addAll(jobTitles);
}
if (orgNouns != null) {
nominals.addAll(orgNouns);
}
if (organizations != null) {
nominals.addAll(organizations);
}
// Out.println("total nominals: " + nominals.size());
// sort them according to offset
nominalArray = nominals.toArray(new Annotation[0]);
java.util.Arrays.sort(nominalArray, new OffsetComparator());
ArrayList previousPeople = new ArrayList();
ArrayList previousOrgs = new ArrayList();
// process all nominals
for (int i=0; i iter = annotations.iterator();
while (iter.hasNext()) {
Annotation current = iter.next();
if (a.overlaps(current)) {
return true;
}
}
return false;
}
/** Use this method to keep the current token pointer at the right point
* in the token list */
private int advanceTokenPosition(Annotation target, int currentPosition,
Object[] tokens) {
long targetOffset = target.getStartNode().getOffset().longValue();
long currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
if (targetOffset > currentOffset) {
while (targetOffset > currentOffset) {
currentPosition++;
currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
}
}
else if (targetOffset < currentOffset) {
while (targetOffset < currentOffset) {
currentPosition--;
currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
}
}
return currentPosition;
}
/** Return the number of tokens between the end of annotation 1 and the
* beginning of annotation 2. Will return 0 if they are not in order */
private int countInterveningTokens(Annotation first, Annotation second,
int currentPosition, Object[] tokens) {
int interveningTokens = 0;
long startOffset = first.getEndNode().getOffset().longValue();
long endOffset = second.getStartNode().getOffset().longValue();
long currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
while (currentOffset < endOffset) {
if (currentOffset >= startOffset) {
interveningTokens++;
}
currentPosition++;
currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
}
return interveningTokens;
}
/** Get the next token after an annotation */
private Annotation getFollowingToken(Annotation current, int currentPosition,
Object[] tokens) {
long endOffset = current.getEndNode().getOffset().longValue();
long currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
while (currentOffset < endOffset) {
currentPosition++;
currentOffset = ((Annotation) tokens[currentPosition])
.getStartNode().getOffset().longValue();
}
return (Annotation) tokens[currentPosition];
}
/** Get the text of an annotation */
@SuppressWarnings("unused")
private String stringValue(Annotation ann) {
Object[] tokens = getSortedTokens(ann);
StringBuffer output = new StringBuffer();
for (int i=0;i getResolvedAnaphora() {
return this.anaphor2antecedent;
}
/** --- */
private void preprocess() throws ExecutionException {
//0.5 cleanup
this.anaphor2antecedent.clear();
//1.get all annotation in the input set
if ( this.annotationSetName == null || this.annotationSetName.equals("")) {
this.defaultAnnotations = this.document.getAnnotations();
}
else {
this.defaultAnnotations = this.document.getAnnotations(annotationSetName);
}
//if none found, print warning and exit
if (this.defaultAnnotations == null || this.defaultAnnotations.isEmpty()) {
Err.prln("Coref Warning: No annotations found for processing!");
return;
}
/*
// initialise the quoted text fragments
AnnotationSet sentQuotes = this.defaultAnnotations.get(QUOTED_TEXT_TYPE);
//if none then return
if (null == sentQuotes) {
this.quotedText = new Quote[0];
}
else {
this.quotedText = new Quote[sentQuotes.size()];
Object[] quotesArray = sentQuotes.toArray();
java.util.Arrays.sort(quotesArray,ANNOTATION_OFFSET_COMPARATOR);
for (int i =0; i < quotesArray.length; i++) {
this.quotedText[i] = new Quote((Annotation)quotesArray[i],i);
}
}
*/
}
}