All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.annotation.AnnotationSetImpl Maven / Gradle / Ivy

Go to download

GATE - general achitecture for text engineering - is open source software capable of solving almost any text processing problem. This artifact enables you to embed the core GATE Embedded with its essential dependencies. You will able to use the GATE Embedded API and load and store GATE XML documents. This artifact is the perfect dependency for CREOLE plugins or for applications that need to customize the GATE dependencies due to confict with their own dependencies or for lower footprint.

The newest version!
/*
 *  AnnotationSetImpl.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Hamish Cunningham, 7/Feb/2000
 *
 *  Developer notes:
 *  ---
 *
 *  the addToIndex... and indexBy... methods could be refactored as I'm
 *  sure they can be made simpler
 *
 *  every set to which annotation will be added has to have positional
 *  indexing, so that we can find or create the nodes on the new annotations
 *
 *  note that annotations added anywhere other than sets that are
 *  stored on the document will not get stored anywhere...
 *
 *  nodes aren't doing anything useful now. needs some interface that allows
 *  their creation, defaulting to no coterminous duplicates, but allowing such
 *  if required
 *
 *  $Id: AnnotationSetImpl.java 19658 2016-10-10 06:46:13Z markagreenwood $
 */
package gate.annotation;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.lang3.StringUtils;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.FeatureMap;
import gate.Gate;
import gate.GateConstants;
import gate.Node;
import gate.corpora.DocumentImpl;
import gate.event.AnnotationSetEvent;
import gate.event.AnnotationSetListener;
import gate.event.GateEvent;
import gate.event.GateListener;
import gate.relations.RelationSet;
import gate.util.InvalidOffsetException;
import gate.util.RBTreeMap;

/**
 * Implementation of AnnotationSet. Has a number of indices, all bar one of
 * which are null by default and are only constructed when asked for. Has lots
 * of get methods with various selection criteria; these return views into the
 * set, which are nonetheless valid sets in their own right (but will not
 * necesarily be fully indexed). Has a name, which is null by default; clients
 * of Document can request named AnnotationSets if they so desire. Has a
 * reference to the Document it is attached to. Contrary to Collections
 * convention, there is no no-arg constructor, as this would leave the set in an
 * inconsistent state.
 * 

* There are four indices: annotation by id, annotations by type, annotations by * start node and nodes by offset. The last two jointly provide positional * indexing; construction of these is triggered by indexByStart(), or by calling * a get method that selects on offset. The type index is triggered by * indexByType(), or calling a get method that selects on type. The id index is * always present. *

* NOTE: equality and hashCode of this implementation is exclusively based on the annotations * which appear in the set (if any). The document the set comes from, the name of the set or * the relations stored in that set are not taken into account for equality or hashSet!! * * */ public class AnnotationSetImpl extends AbstractSet implements AnnotationSet { /** Freeze the serialization UID. */ static final long serialVersionUID = 1479426765310434166L; /** The name of this set */ String name = null; /** The document this set belongs to */ DocumentImpl doc; /** Maps annotation ids (Integers) to Annotations */ transient protected HashMap annotsById; /** Maps offsets (Longs) to nodes */ transient RBTreeMap nodesByOffset = null; /** * This field is used temporarily during serialisation to store all the * annotations that need to be saved. At all other times, this will be null; */ private Annotation[] annotations; /** Maps annotation types (Strings) to AnnotationSets */ transient Map annotsByType = null; /** * Maps node ids (Integers) to Annotations or a Collection of Annotations that * start from that node */ transient Map annotsByStartNode; protected transient Vector annotationSetListeners; private transient Vector gateListeners; /** * A caching value that greatly improves the performance of get * methods that have a defined beginning and end. By tracking the * maximum length that an annotation can be, we know the maximum * amount of nodes outside of a specified range that must be checked * to see if an annotation starting at one of those nodes crosses into * the range. This mechanism is not perfect because we do not check if * we have to decrease it if an annotation is removed from the set. * However, usually annotations are removed because they are about to * be replaced with another one that is >= to the length of the one * being replaced, so this isn't a big deal. At worst, it means that * the get methods simply checks a few more start positions than it * needs to. */ protected transient Long longestAnnot = 0l; protected RelationSet relations = null; // Empty AnnotationSet to be returned instead of null //public final static AnnotationSet emptyAS; //static { //emptyAnnotationSet = new ImmutableAnnotationSetImpl(null,null); //} /** Construction from Document. */ public AnnotationSetImpl(Document doc) { annotsById = new HashMap(); this.doc = (DocumentImpl)doc; } // construction from document /** Construction from Document and name. */ public AnnotationSetImpl(Document doc, String name) { this(doc); this.name = name; } // construction from document and name /** Construction from an existing AnnotationSet */ @SuppressWarnings("unchecked") public AnnotationSetImpl(AnnotationSet c) throws ClassCastException { this(c.getDocument(), c.getName()); // the original annotationset is of the same implementation if(c instanceof AnnotationSetImpl) { AnnotationSetImpl theC = (AnnotationSetImpl)c; annotsById.putAll(theC.annotsById); if(theC.annotsByStartNode != null) { annotsByStartNode = new HashMap(Gate.HASH_STH_SIZE); annotsByStartNode.putAll(theC.annotsByStartNode); } if(theC.annotsByType != null) { annotsByType = new HashMap(Gate.HASH_STH_SIZE); annotsByType.putAll(theC.annotsByType); } if(theC.nodesByOffset != null) { nodesByOffset = (RBTreeMap)theC.nodesByOffset.clone(); } } // the implementation is not the default one // let's add the annotations one by one else { Iterator iterannots = c.iterator(); while(iterannots.hasNext()) { add(iterannots.next()); } } } @Override public void clear() { // while nullifying the indexes does clear the set it doesn't fire the // appropriate events so use the Iterator based clear implementation in // AbstractSet.clear() first and then reset the indexes super.clear(); //reset all the indexes to be sure everything has been cleared correctly annotsById = new HashMap(); nodesByOffset = null; annotsByStartNode = null; annotsByType = null; longestAnnot = 0l; } /** * This inner class serves as the return value from the iterator() method. */ class AnnotationSetIterator implements Iterator { private Iterator iter; protected Annotation lastNext = null; AnnotationSetIterator() { iter = annotsById.values().iterator(); } @Override public boolean hasNext() { return iter.hasNext(); } @Override public Annotation next() { return (lastNext = iter.next()); } @Override public void remove() { // this takes care of the ID index iter.remove(); // what if lastNext is null if(lastNext == null) return; // remove from type index removeFromTypeIndex(lastNext); // remove from offset indices removeFromOffsetIndex(lastNext); // that's the second way of removing annotations from a set // apart from calling remove() on the set itself fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this, AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), lastNext)); } // remove() }; // AnnotationSetIterator /** Get an iterator for this set */ @Override public Iterator iterator() { return new AnnotationSetIterator(); } /** Remove an element from this set. */ @Override public boolean remove(Object o) throws ClassCastException { Annotation a = (Annotation)o; boolean wasPresent = removeFromIdIndex(a); if(wasPresent) { removeFromTypeIndex(a); removeFromOffsetIndex(a); } // fire the event fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this, AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), a)); return wasPresent; } // remove(o) /** Remove from the ID index. */ protected boolean removeFromIdIndex(Annotation a) { if(annotsById.remove(a.getId()) == null) return false; return true; } // removeFromIdIndex(a) /** Remove from the type index. */ protected void removeFromTypeIndex(Annotation a) { if(annotsByType != null) { AnnotationSet sameType = annotsByType.get(a.getType()); if(sameType != null) sameType.remove(a); if(sameType != null && sameType.isEmpty()) // none left of this type annotsByType.remove(a.getType()); } } // removeFromTypeIndex(a) /** Remove from the offset indices. */ protected void removeFromOffsetIndex(Annotation a) { if (nodesByOffset != null) { // if there is a nodesByOffset map then we need to make sure it is // correctly updated and redundant nodes removed, otherwise methods that // use it can report incorrect information // get all the annotations (ignoring the one we are removing that start at the same node Set tmp = new HashSet(); tmp.addAll(gate.Utils.getAnnotationsAtOffset(this,a.getStartNode().getOffset())); tmp.remove(a); if (tmp.size() == 0) { // if there aren't any then we may need to remove the node, but let's // double check there aren't any annotations that end where this one // starts tmp.addAll(gate.Utils.getAnnotationsEndingAtOffset(this, a.getStartNode().getOffset())); tmp.remove(a); if (tmp.size() == 0) { // nope, there are no annotations that start or end where the one we // are removing starts so remove the node from the map nodesByOffset.remove(a.getStartNode().getOffset()); } } // repeat the logic above but for the node at the end of the annotation we // want to remove from the set tmp = new HashSet(); tmp.addAll(gate.Utils.getAnnotationsAtOffset(this,a.getEndNode().getOffset())); tmp.remove(a); if (tmp.size() == 0) { tmp.addAll(gate.Utils.getAnnotationsEndingAtOffset(this, a.getEndNode().getOffset())); tmp.remove(a); if (tmp.size() == 0) { nodesByOffset.remove(a.getEndNode().getOffset()); } } } if(annotsByStartNode != null) { Integer id = a.getStartNode().getId(); // might be an annotation or an annotationset Object objectAtNode = annotsByStartNode.get(id); if(objectAtNode instanceof Annotation) { annotsByStartNode.remove(id); // no annotations start here any // more return; } // otherwise it is a Collection @SuppressWarnings("unchecked") Collection starterAnnots = (Collection)objectAtNode; starterAnnots.remove(a); // if there is only one annotation left // we discard the set and put directly the annotation if(starterAnnots.size() == 1) annotsByStartNode.put(id, starterAnnots.iterator().next()); } } // removeFromOffsetIndex(a) /** The size of this set */ @Override public int size() { return annotsById.size(); } /** Find annotations by id */ @Override public Annotation get(Integer id) { return annotsById.get(id); } // get(id) /** * Get all annotations. * * @return an ImmutableAnnotationSet, empty or not */ @Override public AnnotationSet get() { if (annotsById.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotsById.values()); } // get() /** * Select annotations by type * * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(String type) { if(annotsByType == null) indexByType(); AnnotationSet byType = annotsByType.get(type); if (byType==null)return emptyAS(); // convert the mutable AS into an immutable one return byType.get(); } // get(type) /** * Select annotations by a set of types. Expects a Set of String. * * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(Set types) throws ClassCastException { if(annotsByType == null) indexByType(); Iterator iter = types.iterator(); List annotations = new ArrayList(); while(iter.hasNext()) { String type = iter.next(); AnnotationSet as = annotsByType.get(type); if(as != null) { Iterator iterAnnot = as.iterator(); while(iterAnnot.hasNext()) { annotations.add(iterAnnot.next()); } } } // while if(annotations.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotations); } // get(types) /** * Select annotations by type and features * * This will return an annotation set containing just those annotations of a * particular type (i.e. with a particular name) and which have features with * specific names and values. (It will also return annotations that have * features besides those specified, but it will not return any annotations * that do not have all the specified feature-value pairs.) * * However, if constraints contains a feature whose value is equal to * gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally * "class"), then GATE will attempt to match that feature using an ontology * which it will try to retreive from a feature on the both the annotation and * in constraints. If these do not return identical ontologies, or if either * the annotation or constraints does not contain an ontology, then matching * will fail, and the annotation will not be added. In summary, this method * will not work normally for features with the name "class". * * @param type * The name of the annotations to return. * @param constraints * A feature map containing all of the feature value pairs that the * annotation must have in order for them to be returned. * @return An annotation set containing only those annotations with the given * name and which have the specified set of feature-value pairs. */ @Override public AnnotationSet get(String type, FeatureMap constraints) { if(annotsByType == null) indexByType(); AnnotationSet typeSet = get(type); if(typeSet == null) return null; Iterator iter = typeSet.iterator(); List annotationsToAdd = new ArrayList(); while(iter.hasNext()) { Annotation a = iter.next(); // we check for matching constraints by simple equality. a // feature map satisfies the constraints if it contains all the // key/value pairs from the constraints map // if // (a.getFeatures().entrySet().containsAll(constraints.entrySet())) if(a.getFeatures().subsumes(constraints)) annotationsToAdd.add(a); } // while if(annotationsToAdd.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, constraints) /** Select annotations by type and feature names */ @Override public AnnotationSet get(String type, Set featureNames) { if(annotsByType == null) indexByType(); AnnotationSet typeSet = null; if(type != null) { // if a type is provided, try finding annotations of this type typeSet = get(type); // if none exist, then return coz nothing left to do if(typeSet == null) return null; } List annotationsToAdd = new ArrayList(); Iterator iter = null; if(type != null) iter = typeSet.iterator(); else iter = annotsById.values().iterator(); while(iter.hasNext()) { Annotation a = iter.next(); // we check for matching constraints by simple equality. a // feature map satisfies the constraints if it contains all the // key/value pairs from the constraints map if(a.getFeatures().keySet().containsAll(featureNames)) annotationsToAdd.add(a); } // while if(annotationsToAdd.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, featureNames) /** * Select annotations by offset. This returns the set of annotations whose * start node is the least such that it is less than or equal to offset. If a * positional index doesn't exist it is created. If there are no nodes at or * beyond the offset param then it will return an empty annotationset. */ @Override public AnnotationSet get(Long offset) { if(annotsByStartNode == null) indexByStartOffset(); // find the next node at or after offset; get the annots starting // there Node nextNode = nodesByOffset.getNextOf(offset); if(nextNode == null) // no nodes at or beyond this offset return emptyAS(); Collection annotationsToAdd = getAnnotsByStartNode(nextNode .getId()); // skip all the nodes that have no starting annotations while(annotationsToAdd == null) { nextNode = nodesByOffset.getNextOf(nextNode.getOffset() .longValue() + 1); if (nextNode==null) return emptyAS(); annotationsToAdd = getAnnotsByStartNode(nextNode.getId()); } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } /** * Select annotations by offset. This returns the set of annotations that * start exactly at the given offset. If a * positional index doesn't exist it is created. If there are no annotations * at the given offset then an empty annotation set is returned. * * @param offset The starting offset for which to return annotations * @return a ImmutableAnnotationSetImpl containing all annotations starting at the given * offset (possibly empty). */ public AnnotationSet getStartingAt(long offset) { if(annotsByStartNode == null) indexByStartOffset(); Node node = nodesByOffset.get(offset); if(node == null) { // no nodes at or beyond this offset return emptyAS(); } return new ImmutableAnnotationSetImpl(doc, getAnnotsByStartNode(node.getId())); } /** * Return a list of annotations sorted by increasing start offset, i.e. in the order * they appear in the document. If more than one annotation starts at a specific offset * the order of these annotations is unspecified. * * @return a list of annotations ordered by increasing start offset. If a positional * index does not exist, it is created. */ @Override public List inDocumentOrder() { if(annotsByStartNode == null) indexByStartOffset(); Collection values = nodesByOffset.values(); List result = new ArrayList(); for(Node nodeObj : values) { Collection anns = getAnnotsByStartNode(nodeObj.getId()); if(anns != null) { result.addAll(anns); } } return result; } /** * Select annotations by offset. This returns the set of annotations that * overlap totaly or partially with the interval defined by the two provided * offsets.The result will include all the annotations that either: *

    *
  • start before the start offset and end strictly after it
  • *
  • OR
  • *
  • start at a position between the start and the end offsets
  • *
* * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(Long startOffset, Long endOffset) { return get(null, startOffset, endOffset); } // get(startOfset, endOffset) /** * Select annotations by offset. This returns the set of annotations that * overlap strictly with the interval defined by the two provided offsets.The * result will include all the annotations that start at the start offset and * end strictly at the end offset */ public AnnotationSet getStrict(Long startOffset, Long endOffset) { // the result will include all the annotations that // start at the start offset and end strictly at the end offset if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = null; Iterator annotsIter; Node currentNode; Annotation currentAnnot; // find all the annots that start at the start offset currentNode = nodesByOffset.get(startOffset); if(currentNode != null) { Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint != null) { annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) == 0) { if(annotationsToAdd == null) annotationsToAdd = new ArrayList(); annotationsToAdd.add(currentAnnot); } // if } // while } // if } // if return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // getStrict(startOfset, endOffset) /** * Select annotations by offset. This returns the set of annotations of the * given type that overlap totaly or partially with the interval defined by * the two provided offsets.The result will include all the annotations that * either: *
    *
  • start before the start offset and end strictly after it
  • *
  • OR
  • *
  • start at a position between the start and the end offsets
  • *
*/ @Override public AnnotationSet get(String neededType, Long startOffset, Long endOffset) { if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = new ArrayList(); Iterator nodesIter; Iterator annotsIter; Node currentNode; Annotation currentAnnot; boolean checkType = StringUtils.isNotBlank(neededType); // find all the annots that start strictly before the start offset // and end // strictly after it Long searchStart = (startOffset - longestAnnot); if (searchStart < 0) searchStart = 0l; //nodesIter = nodesByOffset.headMap(startOffset).values().iterator(); nodesIter = nodesByOffset.subMap(searchStart, startOffset).values().iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); //if neededType is set, make sure this is the right type if (checkType && !currentAnnot.getType().equals(neededType)) continue; if(currentAnnot.getEndNode().getOffset().compareTo(startOffset) > 0) { annotationsToAdd.add(currentAnnot); } // if } // while } // find all the annots that start at or after the start offset but // before the end offset nodesIter = nodesByOffset.subMap(startOffset, endOffset).values() .iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; //if no specific type requested, add all of the annots if (!checkType) annotationsToAdd.addAll(objFromPoint); else { //check the type of each annot annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); if (currentAnnot.getType().equals(neededType)) annotationsToAdd.add(currentAnnot); } // while } } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, startOfset, endOffset) /** * Select annotations of the given type that completely span the range. * Formally, for any annotation a, a will be included in the return * set if: *
    *
  • a.getStartNode().getOffset() <= startOffset
  • *
  • and
  • *
  • a.getEndNode().getOffset() >= endOffset
  • *
* * @param neededType Type of annotation to return. If empty, all * annotation types will be returned. * @return annotations of the given type that completely span the range. */ @Override public AnnotationSet getCovering(String neededType, Long startOffset, Long endOffset) { //check the range if(endOffset < startOffset) return emptyAS(); //ensure index if(annotsByStartNode == null) indexByStartOffset(); //if the requested range is longer than the longest annotation in this set, //then there can be no annotations covering the range // so we return an empty set. if(endOffset - startOffset > longestAnnot) return emptyAS(); List annotationsToAdd = new ArrayList(); Iterator nodesIter; Iterator annotsIter; Node currentNode; Annotation currentAnnot; boolean checkType = StringUtils.isNotBlank(neededType); // find all the annots with startNode <= startOffset. Need the + 1 because // headMap returns strictly less than. // the length of the longest annot from the endOffset since we know that nothing // that starts earlier will be long enough to cover the entire span. Long searchStart = ((endOffset - 1) - longestAnnot); if (searchStart < 0) searchStart = 0l; //nodesIter = nodesByOffset.headMap(startOffset + 1).values().iterator(); nodesIter = nodesByOffset.subMap(searchStart, startOffset + 1).values().iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); //if neededType is set, make sure this is the right type if (checkType && !currentAnnot.getType().equals(neededType)) continue; //check that the annot ends at or after the endOffset if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) >= 0) annotationsToAdd.add(currentAnnot); } // while } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, startOfset, endOffset) /** Select annotations by type, features and offset */ @Override public AnnotationSet get(String type, FeatureMap constraints, Long offset) { // select by offset AnnotationSet nextAnnots = get(offset); // select by type and constraints from the next annots return nextAnnots.get(type, constraints); } // get(type, constraints, offset) /** * Select annotations contained within an interval, i.e. * those annotations whose start position is * >= startOffset and whose end position is <= * endOffset. */ @Override public AnnotationSet getContained(Long startOffset, Long endOffset) { // the result will include all the annotations that either: // start at a position between the start and end before the end // offsets //check the range if(endOffset < startOffset) return emptyAS(); //ensure index if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = null; Iterator nodesIter; Node currentNode; Iterator annotIter; // find all the annots that start at or after the start offset but // strictly // before the end offset nodesIter = nodesByOffset.subMap(startOffset, endOffset).values() .iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; // loop through the annotations and find only those that // also end before endOffset annotIter = objFromPoint.iterator(); while(annotIter.hasNext()) { Annotation annot = annotIter.next(); if(annot.getEndNode().getOffset().compareTo(endOffset) <= 0) { if(annotationsToAdd == null) annotationsToAdd = new ArrayList(); annotationsToAdd.add(annot); } } } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(startOfset, endOffset) /** Get the node with the smallest offset */ @Override public Node firstNode() { indexByStartOffset(); if(nodesByOffset.isEmpty()) return null; else return nodesByOffset.get(nodesByOffset.firstKey()); } // firstNode /** Get the node with the largest offset */ @Override public Node lastNode() { indexByStartOffset(); if(nodesByOffset.isEmpty()) return null; else return nodesByOffset.get(nodesByOffset.lastKey()); } // lastNode /** * Get the first node that is relevant for this annotation set and which has * the offset larger than the one of the node provided. */ @Override public Node nextNode(Node node) { indexByStartOffset(); return nodesByOffset.getNextOf(node.getOffset().longValue() + 1); } protected static AnnotationFactory annFactory; /** * Set the annotation factory used to create annotation objects. The default * factory is {@link DefaultAnnotationFactory}. */ public static void setAnnotationFactory(AnnotationFactory newFactory) { annFactory = newFactory; } static { // set the default factory to always create AnnotationImpl objects setAnnotationFactory(new DefaultAnnotationFactory()); } /** * Create and add an annotation with pre-existing nodes, and return its id. * Note that only Nodes retrieved from the same annotation set should be used * to create a new annotation using this method. Using Nodes from other annotation * sets may lead to undefined behaviour. If in any doubt use the Long based add * method instead of this one. */ @Override public Integer add(Node start, Node end, String type, FeatureMap features) { // the id of the new annotation Integer id = doc.getNextAnnotationId(); // construct an annotation annFactory.createAnnotationInSet(this, id, start, end, type, features); return id; } // add(Node, Node, String, FeatureMap) /** Add an existing annotation. Returns true when the set is modified. */ @Override public boolean add(Annotation a) throws ClassCastException { Annotation oldValue = annotsById.put(a.getId(), a); if (oldValue != null) { if (annotsByType != null) removeFromTypeIndex(oldValue); if (annotsByStartNode != null) removeFromOffsetIndex(oldValue); } if(annotsByType != null) addToTypeIndex(a); if(annotsByStartNode != null) addToStartOffsetIndex(a); AnnotationSetEvent evt = new AnnotationSetEvent(this, AnnotationSetEvent.ANNOTATION_ADDED, doc, a); fireAnnotationAdded(evt); fireGateEvent(evt); return oldValue != a; } // add(o) /** * Adds multiple annotations to this set in one go. All the objects in the * provided collection should be of {@link gate.Annotation} type, otherwise a * ClassCastException will be thrown. The provided annotations will be used to * create new annotations using the appropriate add() methods from this set. * The new annotations will have different IDs from the old ones (which is * required in order to preserve the uniqueness of IDs inside an annotation * set). * * @param c * a collection of annotations * @return true if the set has been modified as a result of this * call. */ @Override public boolean addAll(Collection c) { Iterator annIter = c.iterator(); boolean changed = false; while(annIter.hasNext()) { Annotation a = annIter.next(); try { add(a.getStartNode().getOffset(), a.getEndNode().getOffset(), a .getType(), a.getFeatures()); changed = true; } catch(InvalidOffsetException ioe) { throw new IllegalArgumentException(ioe.toString()); } } return changed; } /** * Adds multiple annotations to this set in one go. All the objects in the * provided collection should be of {@link gate.Annotation} type, otherwise a * ClassCastException will be thrown. This method does not create copies of * the annotations like addAll() does but simply adds the new annotations to * the set. It is intended to be used solely by annotation sets in order to * construct the results for various get(...) methods. * * @param c * a collection of annotations * @return true if the set has been modified as a result of this * call. */ protected boolean addAllKeepIDs(Collection c) { Iterator annIter = c.iterator(); boolean changed = false; while(annIter.hasNext()) { Annotation a = annIter.next(); changed |= add(a); } return changed; } /** Returns the nodes corresponding to the Longs. The Nodes are created if * they don't exist. **/ private final Node[] getNodes(Long start, Long end) throws InvalidOffsetException { // are the offsets valid? if(!doc.isValidOffsetRange(start, end)) { throw new InvalidOffsetException("Offsets [" + start + ":" + end + "] not valid for this document of size " + doc.getContent().size()); } // the set has to be indexed by position in order to add, as we need // to find out if nodes need creating or if they exist already if(nodesByOffset == null) { indexByStartOffset(); } // find existing nodes if appropriate nodes don't already exist, // create them Node startNode = nodesByOffset.get(start); if(startNode == null) startNode = new NodeImpl(doc.getNextNodeId(), start); Node endNode = null; if(start.equals(end)){ endNode = startNode; return new Node[]{startNode,endNode}; } endNode = nodesByOffset.get(end); if(endNode == null) endNode = new NodeImpl(doc.getNextNodeId(), end); return new Node[]{startNode,endNode}; } /** Create and add an annotation and return its id */ @Override public Integer add(Long start, Long end, String type, FeatureMap features) throws InvalidOffsetException { Node[] nodes = getNodes(start,end); // delegate to the method that adds annotations with existing nodes return add(nodes[0], nodes[1], type, features); } // add(start, end, type, features) /** * Create and add an annotation from database read data In this case the id is * already known being previously fetched from the database */ @Override public void add(Integer id, Long start, Long end, String type, FeatureMap features) throws InvalidOffsetException { Node[] nodes = getNodes(start,end); // construct an annotation annFactory.createAnnotationInSet(this, id, nodes[0], nodes[1], type, features); //try to ensure that if someone adds an annotation directly by ID //the other methods don't trample all over it later if (id >= doc.peakAtNextAnnotationId()) { doc.setNextAnnotationId(id+1); } } // add(id, start, end, type, features) /** Construct the positional index. */ protected void indexByType() { if(annotsByType != null) return; annotsByType = new HashMap(Gate.HASH_STH_SIZE); Iterator annotIter = annotsById.values().iterator(); while(annotIter.hasNext()) addToTypeIndex(annotIter.next()); } // indexByType() /** Construct the positional indices for annotation start */ protected void indexByStartOffset() { if(annotsByStartNode != null) return; if(nodesByOffset == null) nodesByOffset = new RBTreeMap(); annotsByStartNode = new HashMap(annotsById.size()); Iterator annotIter = annotsById.values().iterator(); while(annotIter.hasNext()) addToStartOffsetIndex(annotIter.next()); } // indexByStartOffset() /** * Add an annotation to the type index. Does nothing if the index doesn't * exist. */ void addToTypeIndex(Annotation a) { if(annotsByType == null) return; String type = a.getType(); AnnotationSet sameType = annotsByType.get(type); if(sameType == null) { sameType = new AnnotationSetImpl(doc); annotsByType.put(type, sameType); } sameType.add(a); } // addToTypeIndex(a) /** * Add an annotation to the start offset index. Does nothing if the index * doesn't exist. */ @SuppressWarnings("unchecked") void addToStartOffsetIndex(Annotation a) { Node startNode = a.getStartNode(); Node endNode = a.getEndNode(); Long start = startNode.getOffset(); Long end = endNode.getOffset(); // add a's nodes to the offset index if(nodesByOffset != null) { nodesByOffset.put(start, startNode); nodesByOffset.put(end, endNode); } //add marking for longest annot long annotLength = end - start; if (annotLength > longestAnnot) longestAnnot = annotLength; // if there's no appropriate index give up if(annotsByStartNode == null) return; // get the annotations that start at the same node, or create new // set Object thisNodeObject = annotsByStartNode.get(startNode.getId()); if(thisNodeObject == null) { // put directly the annotation annotsByStartNode.put(startNode.getId(), a); } else { // already something there : a single Annotation or a // Collection Set newCollection = null; if(thisNodeObject instanceof Annotation) { // we need to create a set - we have more than one annotation // starting // at this Node if(thisNodeObject.equals(a)) return; newCollection = new HashSet(3); newCollection.add((Annotation)thisNodeObject); annotsByStartNode.put(startNode.getId(), newCollection); } else newCollection = (Set)thisNodeObject; // get the existing set // add the new node annotation newCollection.add(a); } } // addToStartOffsetIndex(a) /** * Propagate document content changes to this AnnotationSet. * * This method is called for all annotation sets of a document from * DocumentImpl.edit to adapt the annotations to the text changes made through * the edit. The behaviour of this method is influenced by the configuration * setting {@link gate.GateConstants#DOCEDIT_INSERT_PREPEND GateConstants.DOCEDIT_INSERT_PREPEND }: * annotations immediately * ending before or starting after the point of insertion will either become * part of the inserted text or not. Currently it works like this: *
    *
  • PREPEND=true: annotation before will become part, annotation after not *
  • PREPEND=false: annotation before will not become part, annotation after * will become part *
* NOTE 1 (JP): There is another setting * {@link gate.GateConstants#DOCEDIT_INSERT_APPEND GateConstants.DOCEDIT_INSERT_APPEND } * but * this setting does currently not influence the behaviour of this method. * The behaviour of this method may change in the future so that * DOCEDIT_INSERT_APPEND is considered separately and in addition to * DOCEDIT_INSERT_PREPEND so that it can be controlled independently if * the annotation before and/or after an insertion point gets expanded or not. *

* NOTE 2: This method has, unfortunately, to be * public, to allow DocumentImpls to get at it. Oh for a "friend" declaration. * Doesn't throw InvalidOffsetException as DocumentImpl is the only client, * and that checks the offsets before calling this method. */ public void edit(Long start, Long end, DocumentContent replacement) { // make sure we have the indices computed indexByStartOffset(); if(end.compareTo(start) > 0) { // get the nodes that need to be processed (the nodes internal to // the // removed section plus the marginal ones List affectedNodes = new ArrayList(nodesByOffset.subMap(start, end.longValue() + 1).values()); // if we have more than 1 node we need to delete all apart from // the first // and move the annotations so that they refer to the one we keep // (the // first) NodeImpl firstNode = null; if(!affectedNodes.isEmpty()) { firstNode = (NodeImpl)affectedNodes.get(0); List startingAnnotations = new ArrayList(); List endingAnnotations = new ArrayList(); // now we need to find all the annotations // ending in the zone List beforeNodes = new ArrayList(nodesByOffset.subMap(0L, end.longValue() + 1).values()); Iterator beforeNodesIter = beforeNodes.iterator(); while(beforeNodesIter.hasNext()) { Node currentNode = beforeNodesIter.next(); Collection annotations = getAnnotsByStartNode(currentNode.getId()); if(annotations == null) continue; // iterates on the annotations in this set Iterator localIterator = annotations.iterator(); while(localIterator.hasNext()) { Annotation annotation = localIterator.next(); long offsetEndAnnotation = annotation.getEndNode().getOffset() .longValue(); // we are interested only in the annotations ending // inside the zone if(offsetEndAnnotation >= start.longValue() && offsetEndAnnotation <= end.longValue()) endingAnnotations.add(annotation); } } for(int i = 1; i < affectedNodes.size(); i++) { Node aNode = affectedNodes.get(i); Collection annSet = getAnnotsByStartNode(aNode.getId()); if(annSet != null) { startingAnnotations.addAll(annSet); } // remove the node // nodesByOffset.remove(aNode.getOffset()); // annotsByStartNode.remove(aNode); } // modify the annotations so they point to the saved node Iterator annIter = startingAnnotations.iterator(); while(annIter.hasNext()) { AnnotationImpl anAnnot = (AnnotationImpl)annIter.next(); anAnnot.start = firstNode; // remove the modified annotation if it has just become // zero-length if(anAnnot.start == anAnnot.end) { remove(anAnnot); } else { addToStartOffsetIndex(anAnnot); } } annIter = endingAnnotations.iterator(); while(annIter.hasNext()) { AnnotationImpl anAnnot = (AnnotationImpl)annIter.next(); anAnnot.end = firstNode; // remove the modified annotation if it has just become // zero-length if(anAnnot.start == anAnnot.end) { remove(anAnnot); } } // remove the unused nodes inside the area for(int i = 1; i < affectedNodes.size(); i++) { Node aNode = affectedNodes.get(i); nodesByOffset.remove(aNode.getOffset()); annotsByStartNode.remove(aNode.getId()); } // repair the first node // remove from offset index nodesByOffset.remove(firstNode.getOffset()); // change the offset for the saved node firstNode.setOffset(start); // add back to the offset index nodesByOffset.put(firstNode.getOffset(), firstNode); } } // now handle the insert and/or update the rest of the nodes' // position // get the user selected behaviour (defaults to append) boolean shouldPrepend = Gate.getUserConfig().getBoolean( GateConstants.DOCEDIT_INSERT_PREPEND).booleanValue(); long s = start.longValue(), e = end.longValue(); long rlen = // length of the replacement value ((replacement == null) ? 0 : replacement.size().longValue()); // update the offsets and the index by offset for the rest of the // nodes List nodesAfterReplacement = new ArrayList(nodesByOffset.tailMap(start) .values()); // remove from the index by offset Iterator nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); nodesByOffset.remove(n.getOffset()); } // change the offsets nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); long oldOffset = n.getOffset().longValue(); // by default we move all nodes back long newOffset = oldOffset - (e - s) + rlen; // for the first node we need behave differently if(oldOffset == s) { // the first offset never moves back if(newOffset < s) newOffset = s; // if we're prepending we don't move forward if(shouldPrepend) newOffset = s; } n.setOffset(newOffset); } // add back to the index by offset with the new offsets nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); nodesByOffset.put(n.getOffset(), n); } // //rebuild the indices with the new offsets // nodesByOffset = null; // annotsByStartNode = null; // annotsByEndNode = null; // indexByStartOffset(); // indexByEndOffset(); } // edit(start,end,replacement) /** Get the name of this set. */ @Override public String getName() { return name; } /** Get the document this set is attached to. */ @Override public Document getDocument() { return doc; } /** * Get a set of java.lang.String objects representing all the annotation types * present in this annotation set. */ @Override public Set getAllTypes() { indexByType(); return Collections.unmodifiableSet(annotsByType.keySet()); } /** * Returns a set of annotations starting at that position This intermediate * method is used to simplify the code as the values of the annotsByStartNode * hashmap can be Annotations or a Collection of Annotations. Returns null if * there are no Annotations at that position */ @SuppressWarnings("unchecked") private final Collection getAnnotsByStartNode(Integer id) { Object objFromPoint = annotsByStartNode.get(id); if(objFromPoint == null) return null; if(objFromPoint instanceof Annotation) { List al = new ArrayList(2); al.add((Annotation)objFromPoint); return al; } // it is already a collection // return it return (Collection)objFromPoint; } /** * * @return a clone of this set. * @throws CloneNotSupportedException */ @Override public Object clone() throws CloneNotSupportedException { return super.clone(); } @Override public synchronized void removeAnnotationSetListener(AnnotationSetListener l) { if(annotationSetListeners != null && annotationSetListeners.contains(l)) { @SuppressWarnings("unchecked") Vector v = (Vector)annotationSetListeners.clone(); v.removeElement(l); annotationSetListeners = v; } } @Override public synchronized void addAnnotationSetListener(AnnotationSetListener l) { @SuppressWarnings("unchecked") Vector v = annotationSetListeners == null ? new Vector(2) : (Vector)annotationSetListeners.clone(); if(!v.contains(l)) { v.addElement(l); annotationSetListeners = v; } } protected void fireAnnotationAdded(AnnotationSetEvent e) { if(annotationSetListeners != null) { Vector listeners = annotationSetListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).annotationAdded(e); } } } protected void fireAnnotationRemoved(AnnotationSetEvent e) { if(annotationSetListeners != null) { Vector listeners = annotationSetListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).annotationRemoved(e); } } } @Override public synchronized void removeGateListener(GateListener l) { if(gateListeners != null && gateListeners.contains(l)) { @SuppressWarnings("unchecked") Vector v = (Vector)gateListeners.clone(); v.removeElement(l); gateListeners = v; } } @Override public synchronized void addGateListener(GateListener l) { @SuppressWarnings("unchecked") Vector v = gateListeners == null ? new Vector(2) : (Vector)gateListeners .clone(); if(!v.contains(l)) { v.addElement(l); gateListeners = v; } } protected void fireGateEvent(GateEvent e) { if(gateListeners != null) { Vector listeners = gateListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).processGateEvent(e); } } } // how to serialize this object? // there is no need to serialize the indices // so it's probably as fast to just recreate them // if required private void writeObject(java.io.ObjectOutputStream out) throws IOException { ObjectOutputStream.PutField pf = out.putFields(); pf.put("name", this.name); pf.put("doc", this.doc); // // out.writeObject(this.name); // out.writeObject(this.doc); // save only the annotations // in an array that will prevent the need for casting // when deserializing annotations = new Annotation[this.annotsById.size()]; annotations = this.annotsById.values().toArray(annotations); // out.writeObject(annotations); pf.put("annotations", this.annotations); pf.put("relations", this.relations); out.writeFields(); annotations = null; boolean isIndexedByType = (this.annotsByType != null); boolean isIndexedByStartNode = (this.annotsByStartNode != null); out.writeBoolean(isIndexedByType); out.writeBoolean(isIndexedByStartNode); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { this.longestAnnot = 0l; ObjectInputStream.GetField gf = in.readFields(); this.name = (String)gf.get("name", null); this.doc = (DocumentImpl)gf.get("doc", null); boolean isIndexedByType = false; boolean isIndexedByStartNode = false; this.annotations = (Annotation[])gf.get("annotations", null); if(this.annotations == null) { // old style serialised version @SuppressWarnings("unchecked") Map annotsByIdMap = (Map)gf .get("annotsById", null); if(annotsByIdMap == null) throw new IOException( "Invalid serialised data: neither annotations array or map by id" + " are present."); annotations = annotsByIdMap.values().toArray(new Annotation[]{}); } else { // new style serialised version isIndexedByType = in.readBoolean(); isIndexedByStartNode = in.readBoolean(); } // this.name = (String)in.readObject(); // this.doc = (DocumentImpl)in.readObject(); // Annotation[] annotations = (Annotation[])in.readObject(); // do we need to create the indices? // boolean isIndexedByType = in.readBoolean(); // boolean isIndexedByStartNode = in.readBoolean(); this.annotsById = new HashMap(annotations.length); // rebuilds the indices if required if(isIndexedByType) { annotsByType = new HashMap(Gate.HASH_STH_SIZE); } if(isIndexedByStartNode) { nodesByOffset = new RBTreeMap(); annotsByStartNode = new HashMap(annotations.length); } // add all the annotations one by one for(int i = 0; i < annotations.length; i++) { add(annotations[i]); } this.relations = (RelationSet)gf.get("relations", null); annotations = null; } @Override public RelationSet getRelations() { if (relations == null) { relations = new RelationSet(this); } return relations; } // utility method that replaces the former static singleton member ImmutableAnnotationSet(null,null). // We should not give back annotation sets which have a null document, so instead we return // as an empty annotation set one that does not have annotations, but points to the same document // as the one it was created from. protected AnnotationSet emptyAS() { return new ImmutableAnnotationSetImpl(doc, null); } } // AnnotationSetImpl





© 2015 - 2024 Weber Informatics LLC | Privacy Policy