gate.creole.annotdelete.AnnotationDeletePR Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that
provides the building blocks of many other GATE applications.
The newest version!
/*
* AnnotationDeletePR.java
*
* Copyright (c) 1995-2012, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* Kalina Bontcheva, 19/10/2001
*
* $Id: AnnotationDeletePR.java 19738 2016-11-15 15:15:06Z markagreenwood $
*/
package gate.creole.annotdelete;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import gate.Annotation;
import gate.AnnotationSet;
import gate.GateConstants;
import gate.Resource;
import gate.creole.ANNIEConstants;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
/**
* This class is the implementation of a processing resource which
* deletes all annotations and sets other than 'original markups'.
* If put at the start of an application, it'll ensure that the
* document is restored to its clean state before being processed.
*/
@CreoleResource(name = "Document Reset PR", icon = "document-reset",
comment = "Remove named annotation sets or reset the default annotation set",
helpURL = "http://gate.ac.uk/userguide/sec:misc-creole:reset")
public class AnnotationDeletePR extends AbstractLanguageAnalyser {
private static final long serialVersionUID = 4738446480871610387L;
public static final String
TRANSD_DOCUMENT_PARAMETER_NAME = "document";
public static final String
TRANSD_ANNOT_TYPES_PARAMETER_NAME = "annotationTypes";
public static final String
TRANSD_SETS_KEEP_PARAMETER_NAME = "setsToKeep";
public static final String
TRANSD_SETS_KEEP_ORIGIANL_MARKUPS_ANNOT_SET = "keppOriginalMarkupsAS";
protected String markupSetName = GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME;
protected List annotationTypes;
protected List setsToKeep;
protected List setsToRemove = null;
protected Boolean keepOriginalMarkupsAS;
/**
* This parameter specifies the names of sets to remove or reset. If this
* list is empty or null, it will be ignored. If this list is not empty,
* all the other parameters of this PR are ignored. In order to include
* the default annotation set in this list, add a list entry that is either
* null or an empty String.
* @param setsToRemove a List of String that contains the names of
* annotation sets to remove.
*/
@RunTime
@Optional
@CreoleParameter(
comment = "A list of annotation set names to reset/remove. If non-empty, ignore the parameters which specify what to keep"
)
public void setSetsToRemove(List setsToRemove) {
this.setsToRemove = setsToRemove;
}
public List getSetsToRemove() {
return this.setsToRemove;
}
/** Initialise this resource, and return it. */
@Override
public Resource init() throws ResourceInstantiationException
{
return super.init();
} // init()
/**
* Reinitialises the processing resource. After calling this method the
* resource should be in the state it is after calling init.
* If the resource depends on external resources (such as rules files) then
* the resource will re-read those resources. If the data used to create
* the resource has changed since the resource has been created then the
* resource will change too after calling reInit().
*/
@Override
public void reInit() throws ResourceInstantiationException
{
init();
} // reInit()
/** Run the resource. */
@Override
public void execute() throws ExecutionException {
if(document == null)
throw new GateRuntimeException("No document to process!");
Object matchesMapObject = document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
@SuppressWarnings("unchecked")
Map>> matchesMap =
matchesMapObject instanceof Map
? (Map>>)matchesMapObject
: null;
if(setsToRemove != null && !setsToRemove.isEmpty()) {
// just remove or empty the sets in this list and ignore
// everything else
for(String setName : setsToRemove) {
if(setName == null || setName.equals("")) {
// clear the default annotation set
if (annotationTypes == null || annotationTypes.isEmpty()) {
document.getAnnotations().clear();
removeFromDocumentCorefData( (String)null, matchesMap);
} else {
removeSubSet(document.getAnnotations(), matchesMap);
}
//empty the relation set associated with the annotation set
document.getAnnotations().getRelations().clear();
} else {
// remove this named set
if (annotationTypes == null || annotationTypes.isEmpty()) {
document.removeAnnotationSet(setName);
removeFromDocumentCorefData( setName, matchesMap);
} else {
removeSubSet(document.getAnnotations(setName), matchesMap);
}
}
}
if(matchesMap != null) {
document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME,
matchesMap);
}
} else {
// ignore the setsToRemove parameter and process according to
// the other parameters
// determine which sets to keep
List keepSets = new ArrayList();
if(setsToKeep != null) keepSets.addAll(setsToKeep);
if(keepOriginalMarkupsAS.booleanValue() &&
!keepSets.contains(markupSetName)) {
keepSets.add(markupSetName);
}
//Unless we've been asked to keep it, first clear the default set,
//which cannot be removed
if(!keepSets.contains(null) && !keepSets.contains("")) {
if (annotationTypes == null || annotationTypes.isEmpty()) {
document.getAnnotations().clear();
removeFromDocumentCorefData( (String)null, matchesMap);
} else {
removeSubSet(document.getAnnotations(), matchesMap);
}
//empty the relation set associated with the annotation set
document.getAnnotations().getRelations().clear();
}
//get the names of all sets
Map namedSets = document.getNamedAnnotationSets();
//nothing left to do if there are no named sets
if (namedSets != null && !namedSets.isEmpty()) {
//loop through the sets and delete them all unless
//we've been asked to keep them
List setNames = new ArrayList(namedSets.keySet());
Iterator iter = setNames.iterator();
String setName;
while (iter.hasNext()) {
setName = iter.next();
//check first whether this is the original markups or one of the sets
//that we want to keep
if (setName != null) {
// skip named sets from setsToKeep
if(keepSets.contains(setName)) continue;
if (annotationTypes == null || annotationTypes.isEmpty()) {
document.removeAnnotationSet(setName);
removeFromDocumentCorefData( setName, matchesMap);
} else {
removeSubSet(document.getAnnotations(setName), matchesMap);
}
}//if
}
}
// and finally we add it to the document
if(matchesMap != null) {
document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME,
matchesMap);
}
} // if(setsToRemove != null && !setsToRemove.isEmpty())
} // execute()
// method to update the Document-Coref-data
private void removeFromDocumentCorefData(String currentSet, Map>> matchesMap) {
if(matchesMap == null)
return;
// if this is defaultAnnotationSet, we cannot remove this
if(currentSet == null) {
List> matches = matchesMap.get(currentSet);
if (matches == null || matches.size() == 0) {
// do nothing
return;
}
else {
matchesMap.put(currentSet, new ArrayList>());
}
} else {
// we remove this set from the Coref Data
matchesMap.remove(currentSet);
}
}
// method to update the Document-Coref-data
private void removeAnnotationsFromCorefData(AnnotationSet annotations, String setName, Map>> matchesMap) {
if(matchesMap == null) {
return;
}
List> matches = matchesMap.get(setName);
if(matches == null)
return;
// each element in the matches is a group of annotation IDs
// so for each annotation we will have to traverse through all the lists and
// find out the annotation and remove it
List annots = new ArrayList(annotations);
for(int i=0; i> idIters = matches.iterator();
List ids = new ArrayList();
while(idIters.hasNext()) {
ids = idIters.next();
if(ids.remove(toRemove.getId())) {
// yes removed
break;
}
}
if(ids.size()==0) {
matches.remove(ids);
}
}
// and finally see if there is any group available
if(matches.size()==0) {
matchesMap.remove(setName);
}
}
/* End */
private void removeSubSet(AnnotationSet theSet, Map>> matchMap) {
AnnotationSet toRemove = theSet.get(new HashSet(annotationTypes));
if (toRemove == null || toRemove.isEmpty())
return;
theSet.removeAll(toRemove);
removeAnnotationsFromCorefData(toRemove, theSet.getName(), matchMap);
}//removeSubSet
public void setMarkupASName(String newMarkupASName) {
markupSetName = newMarkupASName;
}
public String getMarkupASName() {
return markupSetName;
}
public List getAnnotationTypes() {
return this.annotationTypes;
}
@RunTime
@Optional
@CreoleParameter(comment="The annotation types to delete otherwise delete all")
public void setAnnotationTypes(List newTypes) {
annotationTypes = newTypes;
}
public List getSetsToKeep() {
return this.setsToKeep;
}
@RunTime
@Optional
@CreoleParameter(comment="The annotation sets to keep otherwise delete all", defaultValue="Key")
public void setSetsToKeep(List newSetNames) {
//we need to modify this list sometimes, so to make sure it's not some
//unmodifiable version, we'll create our own
setsToKeep = newSetNames != null ?
new ArrayList(newSetNames):
new ArrayList();
}
public Boolean getKeepOriginalMarkupsAS() {
return keepOriginalMarkupsAS;
}
@RunTime
@Optional
@CreoleParameter(comment="Should we keep the 'Original markups' annotation set?", defaultValue="true")
public void setKeepOriginalMarkupsAS(Boolean emptyDefaultAnnotationSet) {
this.keepOriginalMarkupsAS = emptyDefaultAnnotationSet;
}
} // class AnnotationSetTransfer
© 2015 - 2024 Weber Informatics LLC | Privacy Policy