gate.creole.coref.AbstractCoreferencer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that
provides the building blocks of many other GATE applications.
The newest version!
/*
* AbstractCoreferencer.java
*
* Copyright (c) 1995-2012, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* $Id: AbstractCoreferencer.java 19738 2016-11-15 15:15:06Z markagreenwood $
*/
package gate.creole.coref;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.ANNIEConstants;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ResourceInstantiationException;
import gate.util.GateRuntimeException;
import gate.util.SimpleFeatureMapImpl;
public abstract class AbstractCoreferencer extends AbstractLanguageAnalyser implements ANNIEConstants {
private static final long serialVersionUID = 7077378848676784207L;
public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
/** --- */
private static final boolean DEBUG = false;
public String coreferenceType;
/** --- */
public AbstractCoreferencer(String type) {
this.coreferenceType = type;
}
/** Initialise this resource, and return it. */
@Override
public Resource init() throws ResourceInstantiationException {
Resource result = super.init();
return result;
} // init()
/**
* Reinitialises the processing resource. After calling this method the
* resource should be in the state it is after calling init.
* If the resource depends on external resources (such as rules files) then
* the resource will re-read those resources. If the data used to create
* the resource has changed since the resource has been created then the
* resource will change too after calling reInit().
*/
@Override
public void reInit() throws ResourceInstantiationException {
init();
} // reInit()
/** Set the document to run on. */
@Override
public void setDocument(Document newDocument) {
super.setDocument(newDocument);
}
/** --- */
public abstract void setAnnotationSetName(String annotationSetName);
/** --- */
public abstract String getAnnotationSetName();
/** --- */
protected void generateCorefChains(Map ana2ant)
throws GateRuntimeException{
String asName = getAnnotationSetName();
AnnotationSet outputSet = null;
if (null == asName || asName.equals("")) {
outputSet = getDocument().getAnnotations();
}
else {
outputSet = getDocument().getAnnotations(asName);
}
//3. generate new annotations
Iterator> it = ana2ant.entrySet().iterator();
while (it.hasNext()) {
Map.Entry currLink = it.next();
Annotation anaphor = currLink.getKey();
Annotation antecedent = currLink.getValue();
if (DEBUG) {
AnnotationSet corefSet = getDocument().getAnnotations("COREF");
Long antOffset = new Long(0);
if (null != antecedent) {
antOffset = antecedent.getStartNode().getOffset();
}
FeatureMap features = new SimpleFeatureMapImpl();
features.put("antecedent",antOffset);
corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
}
//do we have antecedent?
if (null == antecedent) {
continue;
}
//get the ortho-matches of the antecedent
@SuppressWarnings("unchecked")
List matches = (List)antecedent.getFeatures().
get(ANNOTATION_COREF_FEATURE_NAME);
if (null == matches) {
matches = new ArrayList();
matches.add(antecedent.getId());
antecedent.getFeatures().
put(ANNOTATION_COREF_FEATURE_NAME,matches);
//check if the document has a list of matches
//if yes, simply add the new list to it
//if not, create it and add the list of matches to it
if (document.getFeatures().containsKey(
DOCUMENT_COREF_FEATURE_NAME)) {
@SuppressWarnings("unchecked")
Map>> matchesMap = (Map>>)document.getFeatures().get(
DOCUMENT_COREF_FEATURE_NAME);
List> matchesList = matchesMap.get(getAnnotationSetName());
if (matchesList == null) {
matchesList = new ArrayList>();
matchesMap.put(getAnnotationSetName(), matchesList);
}
matchesList.add(matches);
} else {
Map>> matchesMap = new HashMap>>();
List> matchesList = new ArrayList>();
matchesMap.put(getAnnotationSetName(), matchesList);
matchesList.add(matches);
}//if else
}//if matches == null
FeatureMap features = new SimpleFeatureMapImpl();
features.put(COREF_TYPE_FEATURE_NAME, coreferenceType);
features.put(ANNOTATION_COREF_FEATURE_NAME, matches);
features.put(COREF_ANTECEDENT_FEATURE_NAME,
antecedent.getStartNode().getOffset());
Integer annID = outputSet.add(anaphor.getStartNode(),
anaphor.getEndNode(),
antecedent.getType(),
features);
matches.add(annID);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy