All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.annotation.AnnotationSetImpl Maven / Gradle / Ivy

/*
 *  AnnotationSetImpl.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Hamish Cunningham, 7/Feb/2000
 *
 *  Developer notes:
 *  ---
 *
 *  the addToIndex... and indexBy... methods could be refactored as I'm
 *  sure they can be made simpler
 *
 *  every set to which annotation will be added has to have positional
 *  indexing, so that we can find or create the nodes on the new annotations
 *
 *  note that annotations added anywhere other than sets that are
 *  stored on the document will not get stored anywhere...
 *
 *  nodes aren't doing anything useful now. needs some interface that allows
 *  their creation, defaulting to no coterminous duplicates, but allowing such
 *  if required
 *
 *  $Id: AnnotationSetImpl.java 19658 2016-10-10 06:46:13Z markagreenwood $
 */
package gate.annotation;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.FeatureMap;
import gate.Gate;
import gate.GateConstants;
import gate.Node;
import gate.corpora.DocumentImpl;
import gate.event.AnnotationSetEvent;
import gate.event.AnnotationSetListener;
import gate.event.GateEvent;
import gate.event.GateListener;
import gate.relations.RelationSet;
import gate.util.InvalidOffsetException;
import gate.util.RBTreeMap;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.lang.StringUtils;

/**
 * Implementation of AnnotationSet. Has a number of indices, all bar one of
 * which are null by default and are only constructed when asked for. Has lots
 * of get methods with various selection criteria; these return views into the
 * set, which are nonetheless valid sets in their own right (but will not
 * necesarily be fully indexed). Has a name, which is null by default; clients
 * of Document can request named AnnotationSets if they so desire. Has a
 * reference to the Document it is attached to. Contrary to Collections
 * convention, there is no no-arg constructor, as this would leave the set in an
 * inconsistent state.
 * 

* There are four indices: annotation by id, annotations by type, annotations by * start node and nodes by offset. The last two jointly provide positional * indexing; construction of these is triggered by indexByStart(), or by calling * a get method that selects on offset. The type index is triggered by * indexByType(), or calling a get method that selects on type. The id index is * always present. *

* NOTE: equality and hashCode of this implementation is exclusively based on the annotations * which appear in the set (if any). The document the set comes from, the name of the set or * the relations stored in that set are not taken into account for equality or hashSet!! * * */ public class AnnotationSetImpl extends AbstractSet implements AnnotationSet { /** Freeze the serialization UID. */ static final long serialVersionUID = 1479426765310434166L; /** The name of this set */ String name = null; /** The document this set belongs to */ DocumentImpl doc; /** Maps annotation ids (Integers) to Annotations */ transient protected HashMap annotsById; /** Maps offsets (Longs) to nodes */ transient RBTreeMap nodesByOffset = null; /** * This field is used temporarily during serialisation to store all the * annotations that need to be saved. At all other times, this will be null; */ private Annotation[] annotations; /** Maps annotation types (Strings) to AnnotationSets */ transient Map annotsByType = null; /** * Maps node ids (Integers) to Annotations or a Collection of Annotations that * start from that node */ transient Map annotsByStartNode; protected transient Vector annotationSetListeners; private transient Vector gateListeners; /** * A caching value that greatly improves the performance of get * methods that have a defined beginning and end. By tracking the * maximum length that an annotation can be, we know the maximum * amount of nodes outside of a specified range that must be checked * to see if an annotation starting at one of those nodes crosses into * the range. This mechanism is not perfect because we do not check if * we have to decrease it if an annotation is removed from the set. * However, usually annotations are removed because they are about to * be replaced with another one that is >= to the length of the one * being replaced, so this isn't a big deal. At worst, it means that * the get methods simply checks a few more start positions than it * needs to. */ protected transient Long longestAnnot = 0l; protected RelationSet relations = null; // Empty AnnotationSet to be returned instead of null //public final static AnnotationSet emptyAS; //static { //emptyAnnotationSet = new ImmutableAnnotationSetImpl(null,null); //} /** Construction from Document. */ public AnnotationSetImpl(Document doc) { annotsById = new HashMap(); this.doc = (DocumentImpl)doc; } // construction from document /** Construction from Document and name. */ public AnnotationSetImpl(Document doc, String name) { this(doc); this.name = name; } // construction from document and name /** Construction from an existing AnnotationSet */ @SuppressWarnings("unchecked") public AnnotationSetImpl(AnnotationSet c) throws ClassCastException { this(c.getDocument(), c.getName()); // the original annotationset is of the same implementation if(c instanceof AnnotationSetImpl) { AnnotationSetImpl theC = (AnnotationSetImpl)c; annotsById.putAll(theC.annotsById); if(theC.annotsByStartNode != null) { annotsByStartNode = new HashMap(Gate.HASH_STH_SIZE); annotsByStartNode.putAll(theC.annotsByStartNode); } if(theC.annotsByType != null) { annotsByType = new HashMap(Gate.HASH_STH_SIZE); annotsByType.putAll(theC.annotsByType); } if(theC.nodesByOffset != null) { nodesByOffset = (RBTreeMap)theC.nodesByOffset.clone(); } } // the implementation is not the default one // let's add the annotations one by one else { Iterator iterannots = c.iterator(); while(iterannots.hasNext()) { add(iterannots.next()); } } } @Override public void clear() { // while nullifying the indexes does clear the set it doesn't fire the // appropriate events so use the Iterator based clear implementation in // AbstractSet.clear() first and then reset the indexes super.clear(); //reset all the indexes to be sure everything has been cleared correctly annotsById = new HashMap(); nodesByOffset = null; annotsByStartNode = null; annotsByType = null; longestAnnot = 0l; } /** * This inner class serves as the return value from the iterator() method. */ class AnnotationSetIterator implements Iterator { private Iterator iter; protected Annotation lastNext = null; AnnotationSetIterator() { iter = annotsById.values().iterator(); } @Override public boolean hasNext() { return iter.hasNext(); } @Override public Annotation next() { return (lastNext = iter.next()); } @Override public void remove() { // this takes care of the ID index iter.remove(); // what if lastNext is null if(lastNext == null) return; // remove from type index removeFromTypeIndex(lastNext); // remove from offset indices removeFromOffsetIndex(lastNext); // that's the second way of removing annotations from a set // apart from calling remove() on the set itself fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this, AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), lastNext)); } // remove() }; // AnnotationSetIterator /** Get an iterator for this set */ @Override public Iterator iterator() { return new AnnotationSetIterator(); } /** Remove an element from this set. */ @Override public boolean remove(Object o) throws ClassCastException { Annotation a = (Annotation)o; boolean wasPresent = removeFromIdIndex(a); if(wasPresent) { removeFromTypeIndex(a); removeFromOffsetIndex(a); } // fire the event fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this, AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), a)); return wasPresent; } // remove(o) /** Remove from the ID index. */ protected boolean removeFromIdIndex(Annotation a) { if(annotsById.remove(a.getId()) == null) return false; return true; } // removeFromIdIndex(a) /** Remove from the type index. */ protected void removeFromTypeIndex(Annotation a) { if(annotsByType != null) { AnnotationSet sameType = annotsByType.get(a.getType()); if(sameType != null) sameType.remove(a); if(sameType != null && sameType.isEmpty()) // none left of this type annotsByType.remove(a.getType()); } } // removeFromTypeIndex(a) /** Remove from the offset indices. */ protected void removeFromOffsetIndex(Annotation a) { /*if(nodesByOffset != null) { // knowing when a node is no longer needed would require keeping a // reference // count on annotations, or using a weak reference to the nodes in // nodesByOffset }*/ if(annotsByStartNode != null) { Integer id = a.getStartNode().getId(); // might be an annotation or an annotationset Object objectAtNode = annotsByStartNode.get(id); if(objectAtNode instanceof Annotation) { annotsByStartNode.remove(id); // no annotations start here any // more return; } // otherwise it is a Collection @SuppressWarnings("unchecked") Collection starterAnnots = (Collection)objectAtNode; starterAnnots.remove(a); // if there is only one annotation left // we discard the set and put directly the annotation if(starterAnnots.size() == 1) annotsByStartNode.put(id, starterAnnots.iterator().next()); } } // removeFromOffsetIndex(a) /** The size of this set */ @Override public int size() { return annotsById.size(); } /** Find annotations by id */ @Override public Annotation get(Integer id) { return annotsById.get(id); } // get(id) /** * Get all annotations. * * @return an ImmutableAnnotationSet, empty or not */ @Override public AnnotationSet get() { if (annotsById.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotsById.values()); } // get() /** * Select annotations by type * * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(String type) { if(annotsByType == null) indexByType(); AnnotationSet byType = annotsByType.get(type); if (byType==null)return emptyAS(); // convert the mutable AS into an immutable one return byType.get(); } // get(type) /** * Select annotations by a set of types. Expects a Set of String. * * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(Set types) throws ClassCastException { if(annotsByType == null) indexByType(); Iterator iter = types.iterator(); List annotations = new ArrayList(); while(iter.hasNext()) { String type = iter.next(); AnnotationSet as = annotsByType.get(type); if(as != null) { Iterator iterAnnot = as.iterator(); while(iterAnnot.hasNext()) { annotations.add(iterAnnot.next()); } } } // while if(annotations.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotations); } // get(types) /** * Select annotations by type and features * * This will return an annotation set containing just those annotations of a * particular type (i.e. with a particular name) and which have features with * specific names and values. (It will also return annotations that have * features besides those specified, but it will not return any annotations * that do not have all the specified feature-value pairs.) * * However, if constraints contains a feature whose value is equal to * gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally * "class"), then GATE will attempt to match that feature using an ontology * which it will try to retreive from a feature on the both the annotation and * in constraints. If these do not return identical ontologies, or if either * the annotation or constraints does not contain an ontology, then matching * will fail, and the annotation will not be added. In summary, this method * will not work normally for features with the name "class". * * @param type * The name of the annotations to return. * @param constraints * A feature map containing all of the feature value pairs that the * annotation must have in order for them to be returned. * @return An annotation set containing only those annotations with the given * name and which have the specified set of feature-value pairs. */ @Override public AnnotationSet get(String type, FeatureMap constraints) { if(annotsByType == null) indexByType(); AnnotationSet typeSet = get(type); if(typeSet == null) return null; Iterator iter = typeSet.iterator(); List annotationsToAdd = new ArrayList(); while(iter.hasNext()) { Annotation a = iter.next(); // we check for matching constraints by simple equality. a // feature map satisfies the constraints if it contains all the // key/value pairs from the constraints map // if // (a.getFeatures().entrySet().containsAll(constraints.entrySet())) if(a.getFeatures().subsumes(constraints)) annotationsToAdd.add(a); } // while if(annotationsToAdd.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, constraints) /** Select annotations by type and feature names */ @Override public AnnotationSet get(String type, Set featureNames) { if(annotsByType == null) indexByType(); AnnotationSet typeSet = null; if(type != null) { // if a type is provided, try finding annotations of this type typeSet = get(type); // if none exist, then return coz nothing left to do if(typeSet == null) return null; } List annotationsToAdd = new ArrayList(); Iterator iter = null; if(type != null) iter = typeSet.iterator(); else iter = annotsById.values().iterator(); while(iter.hasNext()) { Annotation a = iter.next(); // we check for matching constraints by simple equality. a // feature map satisfies the constraints if it contains all the // key/value pairs from the constraints map if(a.getFeatures().keySet().containsAll(featureNames)) annotationsToAdd.add(a); } // while if(annotationsToAdd.isEmpty()) return emptyAS(); return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, featureNames) /** * Select annotations by offset. This returns the set of annotations whose * start node is the least such that it is less than or equal to offset. If a * positional index doesn't exist it is created. If there are no nodes at or * beyond the offset param then it will return an empty annotationset. */ @Override public AnnotationSet get(Long offset) { if(annotsByStartNode == null) indexByStartOffset(); // find the next node at or after offset; get the annots starting // there Node nextNode = nodesByOffset.getNextOf(offset); if(nextNode == null) // no nodes at or beyond this offset return emptyAS(); Collection annotationsToAdd = getAnnotsByStartNode(nextNode .getId()); // skip all the nodes that have no starting annotations while(annotationsToAdd == null) { nextNode = nodesByOffset.getNextOf(nextNode.getOffset() .longValue() + 1); if (nextNode==null) return emptyAS(); annotationsToAdd = getAnnotsByStartNode(nextNode.getId()); } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } /** * Select annotations by offset. This returns the set of annotations that * start exactly at the given offset. If a * positional index doesn't exist it is created. If there are no annotations * at the given offset then an empty annotation set is returned. * * @param offset The starting offset for which to return annotations * @return a ImmutableAnnotationSetImpl containing all annotations starting at the given * offset (possibly empty). */ public AnnotationSet getStartingAt(long offset) { if(annotsByStartNode == null) indexByStartOffset(); Node node = nodesByOffset.get(offset); if(node == null) { // no nodes at or beyond this offset return emptyAS(); } return new ImmutableAnnotationSetImpl(doc, getAnnotsByStartNode(node.getId())); } /** * Return a list of annotations sorted by increasing start offset, i.e. in the order * they appear in the document. If more than one annotation starts at a specific offset * the order of these annotations is unspecified. * * @return a list of annotations ordered by increasing start offset. If a positional * index does not exist, it is created. */ @Override public List inDocumentOrder() { if(annotsByStartNode == null) indexByStartOffset(); Collection values = nodesByOffset.values(); List result = new ArrayList(); for(Node nodeObj : values) { Collection anns = getAnnotsByStartNode(nodeObj.getId()); if(anns != null) { result.addAll(anns); } } return result; } /** * Select annotations by offset. This returns the set of annotations that * overlap totaly or partially with the interval defined by the two provided * offsets.The result will include all the annotations that either: *

    *
  • start before the start offset and end strictly after it
  • *
  • OR
  • *
  • start at a position between the start and the end offsets
  • * * @return an ImmutableAnnotationSet */ @Override public AnnotationSet get(Long startOffset, Long endOffset) { return get(null, startOffset, endOffset); } // get(startOfset, endOffset) /** * Select annotations by offset. This returns the set of annotations that * overlap strictly with the interval defined by the two provided offsets.The * result will include all the annotations that start at the start offset and * end strictly at the end offset */ public AnnotationSet getStrict(Long startOffset, Long endOffset) { // the result will include all the annotations that // start at the start offset and end strictly at the end offset if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = null; Iterator annotsIter; Node currentNode; Annotation currentAnnot; // find all the annots that start at the start offset currentNode = nodesByOffset.get(startOffset); if(currentNode != null) { Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint != null) { annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) == 0) { if(annotationsToAdd == null) annotationsToAdd = new ArrayList(); annotationsToAdd.add(currentAnnot); } // if } // while } // if } // if return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // getStrict(startOfset, endOffset) /** * Select annotations by offset. This returns the set of annotations of the * given type that overlap totaly or partially with the interval defined by * the two provided offsets.The result will include all the annotations that * either: *
      *
    • start before the start offset and end strictly after it
    • *
    • OR
    • *
    • start at a position between the start and the end offsets
    • */ @Override public AnnotationSet get(String neededType, Long startOffset, Long endOffset) { if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = new ArrayList(); Iterator nodesIter; Iterator annotsIter; Node currentNode; Annotation currentAnnot; boolean checkType = StringUtils.isNotBlank(neededType); // find all the annots that start strictly before the start offset // and end // strictly after it Long searchStart = (startOffset - longestAnnot); if (searchStart < 0) searchStart = 0l; //nodesIter = nodesByOffset.headMap(startOffset).values().iterator(); nodesIter = nodesByOffset.subMap(searchStart, startOffset).values().iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); //if neededType is set, make sure this is the right type if (checkType && !currentAnnot.getType().equals(neededType)) continue; if(currentAnnot.getEndNode().getOffset().compareTo(startOffset) > 0) { annotationsToAdd.add(currentAnnot); } // if } // while } // find all the annots that start at or after the start offset but // before the end offset nodesIter = nodesByOffset.subMap(startOffset, endOffset).values() .iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; //if no specific type requested, add all of the annots if (!checkType) annotationsToAdd.addAll(objFromPoint); else { //check the type of each annot annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); if (currentAnnot.getType().equals(neededType)) annotationsToAdd.add(currentAnnot); } // while } } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, startOfset, endOffset) /** * Select annotations of the given type that completely span the range. * Formally, for any annotation a, a will be included in the return * set if: *
        *
      • a.getStartNode().getOffset() <= startOffset
      • *
      • and
      • *
      • a.getEndNode().getOffset() >= endOffset
      • * * @param neededType Type of annotation to return. If empty, all * annotation types will be returned. * @return annotations of the given type that completely span the range. */ @Override public AnnotationSet getCovering(String neededType, Long startOffset, Long endOffset) { //check the range if(endOffset < startOffset) return emptyAS(); //ensure index if(annotsByStartNode == null) indexByStartOffset(); //if the requested range is longer than the longest annotation in this set, //then there can be no annotations covering the range // so we return an empty set. if(endOffset - startOffset > longestAnnot) return emptyAS(); List annotationsToAdd = new ArrayList(); Iterator nodesIter; Iterator annotsIter; Node currentNode; Annotation currentAnnot; boolean checkType = StringUtils.isNotBlank(neededType); // find all the annots with startNode <= startOffset. Need the + 1 because // headMap returns strictly less than. // the length of the longest annot from the endOffset since we know that nothing // that starts earlier will be long enough to cover the entire span. Long searchStart = ((endOffset - 1) - longestAnnot); if (searchStart < 0) searchStart = 0l; //nodesIter = nodesByOffset.headMap(startOffset + 1).values().iterator(); nodesIter = nodesByOffset.subMap(searchStart, startOffset + 1).values().iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; annotsIter = objFromPoint.iterator(); while(annotsIter.hasNext()) { currentAnnot = annotsIter.next(); //if neededType is set, make sure this is the right type if (checkType && !currentAnnot.getType().equals(neededType)) continue; //check that the annot ends at or after the endOffset if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) >= 0) annotationsToAdd.add(currentAnnot); } // while } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(type, startOfset, endOffset) /** Select annotations by type, features and offset */ @Override public AnnotationSet get(String type, FeatureMap constraints, Long offset) { // select by offset AnnotationSet nextAnnots = get(offset); // select by type and constraints from the next annots return nextAnnots.get(type, constraints); } // get(type, constraints, offset) /** * Select annotations contained within an interval, i.e. * those annotations whose start position is * >= startOffset and whose end position is <= * endOffset. */ @Override public AnnotationSet getContained(Long startOffset, Long endOffset) { // the result will include all the annotations that either: // start at a position between the start and end before the end // offsets //check the range if(endOffset < startOffset) return emptyAS(); //ensure index if(annotsByStartNode == null) indexByStartOffset(); List annotationsToAdd = null; Iterator nodesIter; Node currentNode; Iterator annotIter; // find all the annots that start at or after the start offset but // strictly // before the end offset nodesIter = nodesByOffset.subMap(startOffset, endOffset).values() .iterator(); while(nodesIter.hasNext()) { currentNode = nodesIter.next(); Collection objFromPoint = getAnnotsByStartNode(currentNode .getId()); if(objFromPoint == null) continue; // loop through the annotations and find only those that // also end before endOffset annotIter = objFromPoint.iterator(); while(annotIter.hasNext()) { Annotation annot = annotIter.next(); if(annot.getEndNode().getOffset().compareTo(endOffset) <= 0) { if(annotationsToAdd == null) annotationsToAdd = new ArrayList(); annotationsToAdd.add(annot); } } } return new ImmutableAnnotationSetImpl(doc, annotationsToAdd); } // get(startOfset, endOffset) /** Get the node with the smallest offset */ @Override public Node firstNode() { indexByStartOffset(); if(nodesByOffset.isEmpty()) return null; else return nodesByOffset.get(nodesByOffset.firstKey()); } // firstNode /** Get the node with the largest offset */ @Override public Node lastNode() { indexByStartOffset(); if(nodesByOffset.isEmpty()) return null; else return nodesByOffset.get(nodesByOffset.lastKey()); } // lastNode /** * Get the first node that is relevant for this annotation set and which has * the offset larger than the one of the node provided. */ @Override public Node nextNode(Node node) { indexByStartOffset(); return nodesByOffset.getNextOf(node.getOffset().longValue() + 1); } protected static AnnotationFactory annFactory; /** * Set the annotation factory used to create annotation objects. The default * factory is {@link DefaultAnnotationFactory}. */ public static void setAnnotationFactory(AnnotationFactory newFactory) { annFactory = newFactory; } static { // set the default factory to always create AnnotationImpl objects setAnnotationFactory(new DefaultAnnotationFactory()); } /** * Create and add an annotation with pre-existing nodes, and return its id. * Note that only Nodes retrieved from the same annotation set should be used * to create a new annotation using this method. Using Nodes from other annotation * sets may lead to undefined behaviour. If in any doubt use the Long based add * method instead of this one. */ @Override public Integer add(Node start, Node end, String type, FeatureMap features) { // the id of the new annotation Integer id = doc.getNextAnnotationId(); // construct an annotation annFactory.createAnnotationInSet(this, id, start, end, type, features); return id; } // add(Node, Node, String, FeatureMap) /** Add an existing annotation. Returns true when the set is modified. */ @Override public boolean add(Annotation a) throws ClassCastException { Annotation oldValue = annotsById.put(a.getId(), a); if (oldValue != null) { if (annotsByType != null) removeFromTypeIndex(oldValue); if (annotsByStartNode != null) removeFromOffsetIndex(oldValue); } if(annotsByType != null) addToTypeIndex(a); if(annotsByStartNode != null) addToStartOffsetIndex(a); AnnotationSetEvent evt = new AnnotationSetEvent(this, AnnotationSetEvent.ANNOTATION_ADDED, doc, a); fireAnnotationAdded(evt); fireGateEvent(evt); return oldValue != a; } // add(o) /** * Adds multiple annotations to this set in one go. All the objects in the * provided collection should be of {@link gate.Annotation} type, otherwise a * ClassCastException will be thrown. The provided annotations will be used to * create new annotations using the appropriate add() methods from this set. * The new annotations will have different IDs from the old ones (which is * required in order to preserve the uniqueness of IDs inside an annotation * set). * * @param c * a collection of annotations * @return true if the set has been modified as a result of this * call. */ @Override public boolean addAll(Collection c) { Iterator annIter = c.iterator(); boolean changed = false; while(annIter.hasNext()) { Annotation a = annIter.next(); try { add(a.getStartNode().getOffset(), a.getEndNode().getOffset(), a .getType(), a.getFeatures()); changed = true; } catch(InvalidOffsetException ioe) { throw new IllegalArgumentException(ioe.toString()); } } return changed; } /** * Adds multiple annotations to this set in one go. All the objects in the * provided collection should be of {@link gate.Annotation} type, otherwise a * ClassCastException will be thrown. This method does not create copies of * the annotations like addAll() does but simply adds the new annotations to * the set. It is intended to be used solely by annotation sets in order to * construct the results for various get(...) methods. * * @param c * a collection of annotations * @return true if the set has been modified as a result of this * call. */ protected boolean addAllKeepIDs(Collection c) { Iterator annIter = c.iterator(); boolean changed = false; while(annIter.hasNext()) { Annotation a = annIter.next(); changed |= add(a); } return changed; } /** Returns the nodes corresponding to the Longs. The Nodes are created if * they don't exist. **/ private final Node[] getNodes(Long start, Long end) throws InvalidOffsetException { // are the offsets valid? if(!doc.isValidOffsetRange(start, end)) { throw new InvalidOffsetException("Offsets [" + start + ":" + end + "] not valid for this document of size " + doc.getContent().size()); } // the set has to be indexed by position in order to add, as we need // to find out if nodes need creating or if they exist already if(nodesByOffset == null) { indexByStartOffset(); } // find existing nodes if appropriate nodes don't already exist, // create them Node startNode = nodesByOffset.get(start); if(startNode == null) startNode = new NodeImpl(doc.getNextNodeId(), start); Node endNode = null; if(start.equals(end)){ endNode = startNode; return new Node[]{startNode,endNode}; } endNode = nodesByOffset.get(end); if(endNode == null) endNode = new NodeImpl(doc.getNextNodeId(), end); return new Node[]{startNode,endNode}; } /** Create and add an annotation and return its id */ @Override public Integer add(Long start, Long end, String type, FeatureMap features) throws InvalidOffsetException { Node[] nodes = getNodes(start,end); // delegate to the method that adds annotations with existing nodes return add(nodes[0], nodes[1], type, features); } // add(start, end, type, features) /** * Create and add an annotation from database read data In this case the id is * already known being previously fetched from the database */ @Override public void add(Integer id, Long start, Long end, String type, FeatureMap features) throws InvalidOffsetException { Node[] nodes = getNodes(start,end); // construct an annotation annFactory.createAnnotationInSet(this, id, nodes[0], nodes[1], type, features); //try to ensure that if someone adds an annotation directly by ID //the other methods don't trample all over it later if (id >= doc.peakAtNextAnnotationId()) { doc.setNextAnnotationId(id+1); } } // add(id, start, end, type, features) /** Construct the positional index. */ protected void indexByType() { if(annotsByType != null) return; annotsByType = new HashMap(Gate.HASH_STH_SIZE); Iterator annotIter = annotsById.values().iterator(); while(annotIter.hasNext()) addToTypeIndex(annotIter.next()); } // indexByType() /** Construct the positional indices for annotation start */ protected void indexByStartOffset() { if(annotsByStartNode != null) return; if(nodesByOffset == null) nodesByOffset = new RBTreeMap(); annotsByStartNode = new HashMap(annotsById.size()); Iterator annotIter = annotsById.values().iterator(); while(annotIter.hasNext()) addToStartOffsetIndex(annotIter.next()); } // indexByStartOffset() /** * Add an annotation to the type index. Does nothing if the index doesn't * exist. */ void addToTypeIndex(Annotation a) { if(annotsByType == null) return; String type = a.getType(); AnnotationSet sameType = annotsByType.get(type); if(sameType == null) { sameType = new AnnotationSetImpl(doc); annotsByType.put(type, sameType); } sameType.add(a); } // addToTypeIndex(a) /** * Add an annotation to the start offset index. Does nothing if the index * doesn't exist. */ @SuppressWarnings("unchecked") void addToStartOffsetIndex(Annotation a) { Node startNode = a.getStartNode(); Node endNode = a.getEndNode(); Long start = startNode.getOffset(); Long end = endNode.getOffset(); // add a's nodes to the offset index if(nodesByOffset != null) { nodesByOffset.put(start, startNode); nodesByOffset.put(end, endNode); } //add marking for longest annot long annotLength = end - start; if (annotLength > longestAnnot) longestAnnot = annotLength; // if there's no appropriate index give up if(annotsByStartNode == null) return; // get the annotations that start at the same node, or create new // set Object thisNodeObject = annotsByStartNode.get(startNode.getId()); if(thisNodeObject == null) { // put directly the annotation annotsByStartNode.put(startNode.getId(), a); } else { // already something there : a single Annotation or a // Collection Set newCollection = null; if(thisNodeObject instanceof Annotation) { // we need to create a set - we have more than one annotation // starting // at this Node if(thisNodeObject.equals(a)) return; newCollection = new HashSet(3); newCollection.add((Annotation)thisNodeObject); annotsByStartNode.put(startNode.getId(), newCollection); } else newCollection = (Set)thisNodeObject; // get the existing set // add the new node annotation newCollection.add(a); } } // addToStartOffsetIndex(a) /** * Propagate document content changes to this AnnotationSet. * * This method is called for all annotation sets of a document from * DocumentImpl.edit to adapt the annotations to the text changes made through * the edit. The behaviour of this method is influenced by the configuration * setting {@link gate.GateConstants#DOCEDIT_INSERT_PREPEND GateConstants.DOCEDIT_INSERT_PREPEND }: * annotations immediately * ending before or starting after the point of insertion will either become * part of the inserted text or not. Currently it works like this: *
          *
        • PREPEND=true: annotation before will become part, annotation after not *
        • PREPEND=false: annotation before will not become part, annotation after * will become part *
        * NOTE 1 (JP): There is another setting * {@link gate.GateConstants#DOCEDIT_INSERT_APPEND GateConstants.DOCEDIT_INSERT_APPEND } * but * this setting does currently not influence the behaviour of this method. * The behaviour of this method may change in the future so that * DOCEDIT_INSERT_APPEND is considered separately and in addition to * DOCEDIT_INSERT_PREPEND so that it can be controlled independently if * the annotation before and/or after an insertion point gets expanded or not. *

        * NOTE 2: This method has, unfortunately, to be * public, to allow DocumentImpls to get at it. Oh for a "friend" declaration. * Doesn't throw InvalidOffsetException as DocumentImpl is the only client, * and that checks the offsets before calling this method. */ public void edit(Long start, Long end, DocumentContent replacement) { // make sure we have the indices computed indexByStartOffset(); if(end.compareTo(start) > 0) { // get the nodes that need to be processed (the nodes internal to // the // removed section plus the marginal ones List affectedNodes = new ArrayList(nodesByOffset.subMap(start, end.longValue() + 1).values()); // if we have more than 1 node we need to delete all apart from // the first // and move the annotations so that they refer to the one we keep // (the // first) NodeImpl firstNode = null; if(!affectedNodes.isEmpty()) { firstNode = (NodeImpl)affectedNodes.get(0); List startingAnnotations = new ArrayList(); List endingAnnotations = new ArrayList(); // now we need to find all the annotations // ending in the zone List beforeNodes = new ArrayList(nodesByOffset.subMap(0L, end.longValue() + 1).values()); Iterator beforeNodesIter = beforeNodes.iterator(); while(beforeNodesIter.hasNext()) { Node currentNode = beforeNodesIter.next(); Collection annotations = getAnnotsByStartNode(currentNode.getId()); if(annotations == null) continue; // iterates on the annotations in this set Iterator localIterator = annotations.iterator(); while(localIterator.hasNext()) { Annotation annotation = localIterator.next(); long offsetEndAnnotation = annotation.getEndNode().getOffset() .longValue(); // we are interested only in the annotations ending // inside the zone if(offsetEndAnnotation >= start.longValue() && offsetEndAnnotation <= end.longValue()) endingAnnotations.add(annotation); } } for(int i = 1; i < affectedNodes.size(); i++) { Node aNode = affectedNodes.get(i); Collection annSet = getAnnotsByStartNode(aNode.getId()); if(annSet != null) { startingAnnotations.addAll(annSet); } // remove the node // nodesByOffset.remove(aNode.getOffset()); // annotsByStartNode.remove(aNode); } // modify the annotations so they point to the saved node Iterator annIter = startingAnnotations.iterator(); while(annIter.hasNext()) { AnnotationImpl anAnnot = (AnnotationImpl)annIter.next(); anAnnot.start = firstNode; // remove the modified annotation if it has just become // zero-length if(anAnnot.start == anAnnot.end) { remove(anAnnot); } else { addToStartOffsetIndex(anAnnot); } } annIter = endingAnnotations.iterator(); while(annIter.hasNext()) { AnnotationImpl anAnnot = (AnnotationImpl)annIter.next(); anAnnot.end = firstNode; // remove the modified annotation if it has just become // zero-length if(anAnnot.start == anAnnot.end) { remove(anAnnot); } } // remove the unused nodes inside the area for(int i = 1; i < affectedNodes.size(); i++) { Node aNode = affectedNodes.get(i); nodesByOffset.remove(aNode.getOffset()); annotsByStartNode.remove(aNode.getId()); } // repair the first node // remove from offset index nodesByOffset.remove(firstNode.getOffset()); // change the offset for the saved node firstNode.setOffset(start); // add back to the offset index nodesByOffset.put(firstNode.getOffset(), firstNode); } } // now handle the insert and/or update the rest of the nodes' // position // get the user selected behaviour (defaults to append) boolean shouldPrepend = Gate.getUserConfig().getBoolean( GateConstants.DOCEDIT_INSERT_PREPEND).booleanValue(); long s = start.longValue(), e = end.longValue(); long rlen = // length of the replacement value ((replacement == null) ? 0 : replacement.size().longValue()); // update the offsets and the index by offset for the rest of the // nodes List nodesAfterReplacement = new ArrayList(nodesByOffset.tailMap(start) .values()); // remove from the index by offset Iterator nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); nodesByOffset.remove(n.getOffset()); } // change the offsets nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); long oldOffset = n.getOffset().longValue(); // by default we move all nodes back long newOffset = oldOffset - (e - s) + rlen; // for the first node we need behave differently if(oldOffset == s) { // the first offset never moves back if(newOffset < s) newOffset = s; // if we're prepending we don't move forward if(shouldPrepend) newOffset = s; } n.setOffset(newOffset); } // add back to the index by offset with the new offsets nodesAfterReplacementIter = nodesAfterReplacement.iterator(); while(nodesAfterReplacementIter.hasNext()) { NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next(); nodesByOffset.put(n.getOffset(), n); } // //rebuild the indices with the new offsets // nodesByOffset = null; // annotsByStartNode = null; // annotsByEndNode = null; // indexByStartOffset(); // indexByEndOffset(); } // edit(start,end,replacement) /** Get the name of this set. */ @Override public String getName() { return name; } /** Get the document this set is attached to. */ @Override public Document getDocument() { return doc; } /** * Get a set of java.lang.String objects representing all the annotation types * present in this annotation set. */ @Override public Set getAllTypes() { indexByType(); return Collections.unmodifiableSet(annotsByType.keySet()); } /** * Returns a set of annotations starting at that position This intermediate * method is used to simplify the code as the values of the annotsByStartNode * hashmap can be Annotations or a Collection of Annotations. Returns null if * there are no Annotations at that position */ @SuppressWarnings("unchecked") private final Collection getAnnotsByStartNode(Integer id) { Object objFromPoint = annotsByStartNode.get(id); if(objFromPoint == null) return null; if(objFromPoint instanceof Annotation) { List al = new ArrayList(2); al.add((Annotation)objFromPoint); return al; } // it is already a collection // return it return (Collection)objFromPoint; } /** * * @return a clone of this set. * @throws CloneNotSupportedException */ @Override public Object clone() throws CloneNotSupportedException { return super.clone(); } @Override public synchronized void removeAnnotationSetListener(AnnotationSetListener l) { if(annotationSetListeners != null && annotationSetListeners.contains(l)) { @SuppressWarnings("unchecked") Vector v = (Vector)annotationSetListeners.clone(); v.removeElement(l); annotationSetListeners = v; } } @Override public synchronized void addAnnotationSetListener(AnnotationSetListener l) { @SuppressWarnings("unchecked") Vector v = annotationSetListeners == null ? new Vector(2) : (Vector)annotationSetListeners.clone(); if(!v.contains(l)) { v.addElement(l); annotationSetListeners = v; } } protected void fireAnnotationAdded(AnnotationSetEvent e) { if(annotationSetListeners != null) { Vector listeners = annotationSetListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).annotationAdded(e); } } } protected void fireAnnotationRemoved(AnnotationSetEvent e) { if(annotationSetListeners != null) { Vector listeners = annotationSetListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).annotationRemoved(e); } } } @Override public synchronized void removeGateListener(GateListener l) { if(gateListeners != null && gateListeners.contains(l)) { @SuppressWarnings("unchecked") Vector v = (Vector)gateListeners.clone(); v.removeElement(l); gateListeners = v; } } @Override public synchronized void addGateListener(GateListener l) { @SuppressWarnings("unchecked") Vector v = gateListeners == null ? new Vector(2) : (Vector)gateListeners .clone(); if(!v.contains(l)) { v.addElement(l); gateListeners = v; } } protected void fireGateEvent(GateEvent e) { if(gateListeners != null) { Vector listeners = gateListeners; int count = listeners.size(); for(int i = 0; i < count; i++) { listeners.elementAt(i).processGateEvent(e); } } } // how to serialize this object? // there is no need to serialize the indices // so it's probably as fast to just recreate them // if required private void writeObject(java.io.ObjectOutputStream out) throws IOException { ObjectOutputStream.PutField pf = out.putFields(); pf.put("name", this.name); pf.put("doc", this.doc); // // out.writeObject(this.name); // out.writeObject(this.doc); // save only the annotations // in an array that will prevent the need for casting // when deserializing annotations = new Annotation[this.annotsById.size()]; annotations = this.annotsById.values().toArray(annotations); // out.writeObject(annotations); pf.put("annotations", this.annotations); pf.put("relations", this.relations); out.writeFields(); annotations = null; boolean isIndexedByType = (this.annotsByType != null); boolean isIndexedByStartNode = (this.annotsByStartNode != null); out.writeBoolean(isIndexedByType); out.writeBoolean(isIndexedByStartNode); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { this.longestAnnot = 0l; ObjectInputStream.GetField gf = in.readFields(); this.name = (String)gf.get("name", null); this.doc = (DocumentImpl)gf.get("doc", null); boolean isIndexedByType = false; boolean isIndexedByStartNode = false; this.annotations = (Annotation[])gf.get("annotations", null); if(this.annotations == null) { // old style serialised version @SuppressWarnings("unchecked") Map annotsByIdMap = (Map)gf .get("annotsById", null); if(annotsByIdMap == null) throw new IOException( "Invalid serialised data: neither annotations array or map by id" + " are present."); annotations = annotsByIdMap.values().toArray(new Annotation[]{}); } else { // new style serialised version isIndexedByType = in.readBoolean(); isIndexedByStartNode = in.readBoolean(); } // this.name = (String)in.readObject(); // this.doc = (DocumentImpl)in.readObject(); // Annotation[] annotations = (Annotation[])in.readObject(); // do we need to create the indices? // boolean isIndexedByType = in.readBoolean(); // boolean isIndexedByStartNode = in.readBoolean(); this.annotsById = new HashMap(annotations.length); // rebuilds the indices if required if(isIndexedByType) { annotsByType = new HashMap(Gate.HASH_STH_SIZE); } if(isIndexedByStartNode) { nodesByOffset = new RBTreeMap(); annotsByStartNode = new HashMap(annotations.length); } // add all the annotations one by one for(int i = 0; i < annotations.length; i++) { add(annotations[i]); } this.relations = (RelationSet)gf.get("relations", null); annotations = null; } @Override public RelationSet getRelations() { if (relations == null) { relations = new RelationSet(this); } return relations; } // utility method that replaces the former static singleton member ImmutableAnnotationSet(null,null). // We should not give back annotation sets which have a null document, so instead we return // as an empty annotation set one that does not have annotations, but points to the same document // as the one it was created from. protected AnnotationSet emptyAS() { return new ImmutableAnnotationSetImpl(doc, null); } } // AnnotationSetImpl





© 2015 - 2024 Weber Informatics LLC | Privacy Policy