gate.annotation.AnnotationSetImpl Maven / Gradle / Ivy

/*
 *  AnnotationSetImpl.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Hamish Cunningham, 7/Feb/2000
 *
 *  Developer notes:
 *  ---
 *
 *  the addToIndex... and indexBy... methods could be refactored as I'm
 *  sure they can be made simpler
 *
 *  every set to which annotation will be added has to have positional
 *  indexing, so that we can find or create the nodes on the new annotations
 *
 *  note that annotations added anywhere other than sets that are
 *  stored on the document will not get stored anywhere...
 *
 *  nodes aren't doing anything useful now. needs some interface that allows
 *  their creation, defaulting to no coterminous duplicates, but allowing such
 *  if required
 *
 *  $Id: AnnotationSetImpl.java 19658 2016-10-10 06:46:13Z markagreenwood $
 */
package gate.annotation;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.FeatureMap;
import gate.Gate;
import gate.GateConstants;
import gate.Node;
import gate.corpora.DocumentImpl;
import gate.event.AnnotationSetEvent;
import gate.event.AnnotationSetListener;
import gate.event.GateEvent;
import gate.event.GateListener;
import gate.relations.RelationSet;
import gate.util.InvalidOffsetException;
import gate.util.RBTreeMap;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.lang.StringUtils;

/**
 * Implementation of AnnotationSet. Has a number of indices, all bar one of
 * which are null by default and are only constructed when asked for. Has lots
 * of get methods with various selection criteria; these return views into the
 * set, which are nonetheless valid sets in their own right (but will not
 * necesarily be fully indexed). Has a name, which is null by default; clients
 * of Document can request named AnnotationSets if they so desire. Has a
 * reference to the Document it is attached to. Contrary to Collections
 * convention, there is no no-arg constructor, as this would leave the set in an
 * inconsistent state.
 * 
 * There are four indices: annotation by id, annotations by type, annotations by
 * start node and nodes by offset. The last two jointly provide positional
 * indexing; construction of these is triggered by indexByStart(), or by calling
 * a get method that selects on offset. The type index is triggered by
 * indexByType(), or calling a get method that selects on type. The id index is
 * always present.
 * 

 * NOTE: equality and hashCode of this implementation is exclusively based on the annotations
 * which appear in the set (if any). The document the set comes from, the name of the set or
 * the relations stored in that set are not taken into account for equality or hashSet!!
 *
 * 
 */
public class AnnotationSetImpl extends AbstractSet implements
                                                              AnnotationSet {
  /** Freeze the serialization UID. */
  static final long serialVersionUID = 1479426765310434166L;
  /** The name of this set */
  String name = null;
  /** The document this set belongs to */
  DocumentImpl doc;
  /** Maps annotation ids (Integers) to Annotations */
  transient protected HashMap annotsById;
  /** Maps offsets (Longs) to nodes */
  transient RBTreeMap nodesByOffset = null;
  /**
   * This field is used temporarily during serialisation to store all the
   * annotations that need to be saved. At all other times, this will be null;
   */
  private Annotation[] annotations;
  /** Maps annotation types (Strings) to AnnotationSets */
  transient Map annotsByType = null;
  /**
   * Maps node ids (Integers) to Annotations or a Collection of Annotations that
   * start from that node
   */
  transient Map annotsByStartNode;
  protected transient Vector annotationSetListeners;
  private transient Vector gateListeners;

  /**
   * A caching value that greatly improves the performance of get
   * methods that have a defined beginning and end. By tracking the
   * maximum length that an annotation can be, we know the maximum
   * amount of nodes outside of a specified range that must be checked
   * to see if an annotation starting at one of those nodes crosses into
   * the range. This mechanism is not perfect because we do not check if
   * we have to decrease it if an annotation is removed from the set.
   * However, usually annotations are removed because they are about to
   * be replaced with another one that is >= to the length of the one
   * being replaced, so this isn't a big deal. At worst, it means that
   * the get methods simply checks a few more start positions than it
   * needs to.
   */
  protected transient Long longestAnnot = 0l;
  
  protected RelationSet relations = null;

  // Empty AnnotationSet to be returned instead of null
   //public final static AnnotationSet emptyAS;

   //static {
   //emptyAnnotationSet = new ImmutableAnnotationSetImpl(null,null);
   //}

  /** Construction from Document. */
  public AnnotationSetImpl(Document doc) {
    annotsById = new HashMap();
    this.doc = (DocumentImpl)doc;
  } // construction from document

  /** Construction from Document and name. */
  public AnnotationSetImpl(Document doc, String name) {
    this(doc);
    this.name = name;
  } // construction from document and name

  /** Construction from an existing AnnotationSet */
  @SuppressWarnings("unchecked")
  public AnnotationSetImpl(AnnotationSet c) throws ClassCastException {
    this(c.getDocument(), c.getName());
    // the original annotationset is of the same implementation
    if(c instanceof AnnotationSetImpl) {
      AnnotationSetImpl theC = (AnnotationSetImpl)c;
      annotsById.putAll(theC.annotsById);
      if(theC.annotsByStartNode != null) {
        annotsByStartNode = new HashMap(Gate.HASH_STH_SIZE);
        annotsByStartNode.putAll(theC.annotsByStartNode);
      }
      if(theC.annotsByType != null) {
        annotsByType = new HashMap(Gate.HASH_STH_SIZE);
        annotsByType.putAll(theC.annotsByType);
      }
      if(theC.nodesByOffset != null) {
        nodesByOffset = (RBTreeMap)theC.nodesByOffset.clone();
      }
    }
    // the implementation is not the default one
    // let's add the annotations one by one
    else {
      Iterator iterannots = c.iterator();
      while(iterannots.hasNext()) {
        add(iterannots.next());
      }
    }
  }
  
  @Override
  public void clear() {
    // while nullifying the indexes does clear the set it doesn't fire the
    // appropriate events so use the Iterator based clear implementation in
    // AbstractSet.clear() first and then reset the indexes
    super.clear();
    
    //reset all the indexes to be sure everything has been cleared correctly
    annotsById = new HashMap();
    nodesByOffset = null;
    annotsByStartNode = null;
    annotsByType = null;
    longestAnnot = 0l;
  }

  /**
   * This inner class serves as the return value from the iterator() method.
   */
  class AnnotationSetIterator implements Iterator {
    private Iterator iter;
    protected Annotation lastNext = null;

    AnnotationSetIterator() {
      iter = annotsById.values().iterator();
    }

    @Override
    public boolean hasNext() {
      return iter.hasNext();
    }

    @Override
    public Annotation next() {
      return (lastNext = iter.next());
    }

    @Override
    public void remove() {
      // this takes care of the ID index
      iter.remove();

      // what if lastNext is null
      if(lastNext == null) return;

      // remove from type index
      removeFromTypeIndex(lastNext);
      // remove from offset indices
      removeFromOffsetIndex(lastNext);
      // that's the second way of removing annotations from a set
      // apart from calling remove() on the set itself
      fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this,
              AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(),
              lastNext));
    } // remove()
  }; // AnnotationSetIterator

  /** Get an iterator for this set */
  @Override
  public Iterator iterator() {
    return new AnnotationSetIterator();
  }

  /** Remove an element from this set. */
  @Override
  public boolean remove(Object o) throws ClassCastException {
    Annotation a = (Annotation)o;
    boolean wasPresent = removeFromIdIndex(a);
    if(wasPresent) {
      removeFromTypeIndex(a);
      removeFromOffsetIndex(a);
    }
    // fire the event
    fireAnnotationRemoved(new AnnotationSetEvent(AnnotationSetImpl.this,
            AnnotationSetEvent.ANNOTATION_REMOVED, getDocument(), a));
    return wasPresent;
  } // remove(o)

  /** Remove from the ID index. */
  protected boolean removeFromIdIndex(Annotation a) {
    if(annotsById.remove(a.getId()) == null) return false;
    return true;
  } // removeFromIdIndex(a)

  /** Remove from the type index. */
  protected void removeFromTypeIndex(Annotation a) {
    if(annotsByType != null) {
      AnnotationSet sameType = annotsByType.get(a.getType());
      if(sameType != null) sameType.remove(a);
      if(sameType != null && sameType.isEmpty()) // none left of this type
        annotsByType.remove(a.getType());
    }
  } // removeFromTypeIndex(a)

  /** Remove from the offset indices. */
  protected void removeFromOffsetIndex(Annotation a) {
    /*if(nodesByOffset != null) {
      // knowing when a node is no longer needed would require keeping a
      // reference
      // count on annotations, or using a weak reference to the nodes in
      // nodesByOffset
    }*/
    if(annotsByStartNode != null) {
      Integer id = a.getStartNode().getId();
      // might be an annotation or an annotationset
      Object objectAtNode = annotsByStartNode.get(id);
      if(objectAtNode instanceof Annotation) {
        annotsByStartNode.remove(id); // no annotations start here any
        // more
        return;
      }
      // otherwise it is a Collection
      @SuppressWarnings("unchecked")
      Collection starterAnnots = (Collection)objectAtNode;
      starterAnnots.remove(a);
      // if there is only one annotation left
      // we discard the set and put directly the annotation
      if(starterAnnots.size() == 1)
        annotsByStartNode.put(id, starterAnnots.iterator().next());
    }
  } // removeFromOffsetIndex(a)

  /** The size of this set */
  @Override
  public int size() {
    return annotsById.size();
  }

  /** Find annotations by id */
  @Override
  public Annotation get(Integer id) {
    return annotsById.get(id);
  } // get(id)

  /**
   * Get all annotations.
   *
   * @return an ImmutableAnnotationSet, empty or not
   */
  @Override
  public AnnotationSet get() {
    if (annotsById.isEmpty()) return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotsById.values());
  } // get()

  /**
   * Select annotations by type
   *
   * @return an ImmutableAnnotationSet
   */
  @Override
  public AnnotationSet get(String type) {
    if(annotsByType == null) indexByType();
    AnnotationSet byType = annotsByType.get(type);
    if (byType==null)return emptyAS();
    // convert the mutable AS into an immutable one
    return byType.get();
  } // get(type)

  /**
   * Select annotations by a set of types. Expects a Set of String.
   *
   * @return an ImmutableAnnotationSet
   */
  @Override
  public AnnotationSet get(Set types) throws ClassCastException {
    if(annotsByType == null) indexByType();
    Iterator iter = types.iterator();
    List annotations = new ArrayList();
    while(iter.hasNext()) {
      String type = iter.next();
      AnnotationSet as = annotsByType.get(type);
      if(as != null) {
        Iterator iterAnnot = as.iterator();
        while(iterAnnot.hasNext()) {
          annotations.add(iterAnnot.next());
        }
      }
    } // while
    if(annotations.isEmpty()) return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotations);
  } // get(types)

  /**
   * Select annotations by type and features
   *
   * This will return an annotation set containing just those annotations of a
   * particular type (i.e. with a particular name) and which have features with
   * specific names and values. (It will also return annotations that have
   * features besides those specified, but it will not return any annotations
   * that do not have all the specified feature-value pairs.)
   *
   * However, if constraints contains a feature whose value is equal to
   * gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally
   * "class"), then GATE will attempt to match that feature using an ontology
   * which it will try to retreive from a feature on the both the annotation and
   * in constraints. If these do not return identical ontologies, or if either
   * the annotation or constraints does not contain an ontology, then matching
   * will fail, and the annotation will not be added. In summary, this method
   * will not work normally for features with the name "class".
   *
   * @param type
   *          The name of the annotations to return.
   * @param constraints
   *          A feature map containing all of the feature value pairs that the
   *          annotation must have in order for them to be returned.
   * @return An annotation set containing only those annotations with the given
   *         name and which have the specified set of feature-value pairs.
   */
  @Override
  public AnnotationSet get(String type, FeatureMap constraints) {
    if(annotsByType == null) indexByType();
    AnnotationSet typeSet = get(type);
    if(typeSet == null) return null;
    Iterator iter = typeSet.iterator();
    List annotationsToAdd = new ArrayList();
    while(iter.hasNext()) {
      Annotation a = iter.next();
      // we check for matching constraints by simple equality. a
      // feature map satisfies the constraints if it contains all the
      // key/value pairs from the constraints map
      // if
      // (a.getFeatures().entrySet().containsAll(constraints.entrySet()))
      if(a.getFeatures().subsumes(constraints)) annotationsToAdd.add(a);
    } // while
    if(annotationsToAdd.isEmpty()) return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // get(type, constraints)

  /** Select annotations by type and feature names */
  @Override
  public AnnotationSet get(String type, Set featureNames) {
    if(annotsByType == null) indexByType();
    AnnotationSet typeSet = null;
    if(type != null) {
      // if a type is provided, try finding annotations of this type
      typeSet = get(type);
      // if none exist, then return coz nothing left to do
      if(typeSet == null) return null;
    }
    List annotationsToAdd = new ArrayList();
    Iterator iter = null;
    if(type != null)
      iter = typeSet.iterator();
    else iter = annotsById.values().iterator();
    while(iter.hasNext()) {
      Annotation a = iter.next();
      // we check for matching constraints by simple equality. a
      // feature map satisfies the constraints if it contains all the
      // key/value pairs from the constraints map
      if(a.getFeatures().keySet().containsAll(featureNames))
        annotationsToAdd.add(a);
    } // while
    if(annotationsToAdd.isEmpty()) return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // get(type, featureNames)

  /**
   * Select annotations by offset. This returns the set of annotations whose
   * start node is the least such that it is less than or equal to offset. If a
   * positional index doesn't exist it is created. If there are no nodes at or
   * beyond the offset param then it will return an empty annotationset.
   */
  @Override
  public AnnotationSet get(Long offset) {
    if(annotsByStartNode == null) indexByStartOffset();
    // find the next node at or after offset; get the annots starting
    // there
    Node nextNode = nodesByOffset.getNextOf(offset);
    if(nextNode == null) // no nodes at or beyond this offset
      return emptyAS();
    Collection annotationsToAdd = getAnnotsByStartNode(nextNode
            .getId());
    // skip all the nodes that have no starting annotations
    while(annotationsToAdd == null) {
      nextNode = nodesByOffset.getNextOf(nextNode.getOffset()
              .longValue() + 1);
      if (nextNode==null) return emptyAS();
      annotationsToAdd = getAnnotsByStartNode(nextNode.getId());
    }
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  }

  
  /**
   * Select annotations by offset. This returns the set of annotations that
   * start exactly at the given offset. If a
   * positional index doesn't exist it is created. If there are no annotations
   * at the given offset then an empty annotation set is returned.
   * 
   * @param offset The starting offset for which to return annotations 
   * @return a ImmutableAnnotationSetImpl containing all annotations starting at the given
   *   offset (possibly empty).
   */
  public AnnotationSet getStartingAt(long offset) {
    if(annotsByStartNode == null) indexByStartOffset();
    Node node = nodesByOffset.get(offset);
    if(node == null) { // no nodes at or beyond this offset
      return emptyAS();
    }
    return new ImmutableAnnotationSetImpl(doc, getAnnotsByStartNode(node.getId()));
  }
  
  /**
   * Return a list of annotations sorted by increasing start offset, i.e. in the order
   * they appear in the document. If more than one annotation starts at a specific offset
   * the order of these annotations is unspecified.
   * 
   * @return a list of annotations ordered by increasing start offset. If a positional
   * index does not exist, it is created.
   */
  @Override
  public List inDocumentOrder() {
    if(annotsByStartNode == null) indexByStartOffset();
    Collection values = nodesByOffset.values();
    List result = new ArrayList();
    for(Node nodeObj : values) {
      Collection anns = getAnnotsByStartNode(nodeObj.getId());
      if(anns != null) {
        result.addAll(anns);
      }
    }
    return result;
  }
  
  /**
   * Select annotations by offset. This returns the set of annotations that
   * overlap totaly or partially with the interval defined by the two provided
   * offsets.The result will include all the annotations that either:
   * 
   * start before the start offset and end strictly after it
   * OR
   * start at a position between the start and the end offsets
   *
   * @return an ImmutableAnnotationSet
   */
  @Override
  public AnnotationSet get(Long startOffset, Long endOffset) {
    return get(null, startOffset, endOffset);
  } // get(startOfset, endOffset)

  /**
   * Select annotations by offset. This returns the set of annotations that
   * overlap strictly with the interval defined by the two provided offsets.The
   * result will include all the annotations that start at the start offset and
   * end strictly at the end offset
   */
  public AnnotationSet getStrict(Long startOffset, Long endOffset) {
    // the result will include all the annotations that
    // start at the start offset and end strictly at the end offset
    if(annotsByStartNode == null) indexByStartOffset();
    List annotationsToAdd = null;
    Iterator annotsIter;
    Node currentNode;
    Annotation currentAnnot;
    // find all the annots that start at the start offset
    currentNode = nodesByOffset.get(startOffset);
    if(currentNode != null) {
      Collection objFromPoint = getAnnotsByStartNode(currentNode
              .getId());
      if(objFromPoint != null) {
        annotsIter = objFromPoint.iterator();
        while(annotsIter.hasNext()) {
          currentAnnot = annotsIter.next();
          if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) == 0) {
            if(annotationsToAdd == null) annotationsToAdd = new ArrayList();
            annotationsToAdd.add(currentAnnot);
          } // if
        } // while
      } // if
    } // if
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // getStrict(startOfset, endOffset)

  /**
   * Select annotations by offset. This returns the set of annotations of the
   * given type that overlap totaly or partially with the interval defined by
   * the two provided offsets.The result will include all the annotations that
   * either:
   * 
   * start before the start offset and end strictly after it
   * OR
   * start at a position between the start and the end offsets
   */
  @Override
  public AnnotationSet get(String neededType, Long startOffset, Long endOffset) {
    if(annotsByStartNode == null) indexByStartOffset();
    List annotationsToAdd = new ArrayList();
    Iterator nodesIter;
    Iterator annotsIter;
    Node currentNode;
    Annotation currentAnnot;
    boolean checkType = StringUtils.isNotBlank(neededType);
    // find all the annots that start strictly before the start offset
    // and end
    // strictly after it
    Long searchStart = (startOffset - longestAnnot);
    if (searchStart < 0) searchStart = 0l;
    //nodesIter = nodesByOffset.headMap(startOffset).values().iterator();
    nodesIter = nodesByOffset.subMap(searchStart, startOffset).values().iterator();
    while(nodesIter.hasNext()) {
      currentNode = nodesIter.next();
      Collection objFromPoint = getAnnotsByStartNode(currentNode
              .getId());
      if(objFromPoint == null) continue;
      annotsIter = objFromPoint.iterator();
      while(annotsIter.hasNext()) {
        currentAnnot = annotsIter.next();
        //if neededType is set, make sure this is the right type
        if (checkType && !currentAnnot.getType().equals(neededType))
          continue;
        if(currentAnnot.getEndNode().getOffset().compareTo(startOffset) > 0) {
          annotationsToAdd.add(currentAnnot);
        } // if
      } // while
    }
    // find all the annots that start at or after the start offset but
    // before the end offset
    nodesIter = nodesByOffset.subMap(startOffset, endOffset).values()
            .iterator();
    while(nodesIter.hasNext()) {
      currentNode = nodesIter.next();
      Collection objFromPoint = getAnnotsByStartNode(currentNode
              .getId());
      if(objFromPoint == null) continue;
      //if no specific type requested, add all of the annots
      if (!checkType)
        annotationsToAdd.addAll(objFromPoint);
      else {
        //check the type of each annot
        annotsIter = objFromPoint.iterator();
        while(annotsIter.hasNext()) {
          currentAnnot = annotsIter.next();
          if (currentAnnot.getType().equals(neededType))
            annotationsToAdd.add(currentAnnot);
        } // while
      }
    }
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // get(type, startOfset, endOffset)

  /**
   * Select annotations of the given type that completely span the range.
   * Formally, for any annotation a, a will be included in the return
   * set if:
   * 
   * a.getStartNode().getOffset() <= startOffset
   * and
   * a.getEndNode().getOffset() >= endOffset
   *
   * @param neededType Type of annotation to return. If empty, all
   *          annotation types will be returned.
   * @return annotations of the given type that completely span the range.
   */
  @Override
  public AnnotationSet getCovering(String neededType, Long startOffset, Long endOffset) {
    //check the range
    if(endOffset < startOffset) return emptyAS();
    //ensure index
    if(annotsByStartNode == null) indexByStartOffset();
    //if the requested range is longer than the longest annotation in this set, 
    //then there can be no annotations covering the range
    // so we return an empty set.
    if(endOffset - startOffset > longestAnnot) return emptyAS();
    
    List annotationsToAdd = new ArrayList();
    Iterator nodesIter;
    Iterator annotsIter;
    Node currentNode;
    Annotation currentAnnot;
    boolean checkType = StringUtils.isNotBlank(neededType);
    // find all the annots with startNode <= startOffset.  Need the + 1 because
    // headMap returns strictly less than.
    // the length of the longest annot from the endOffset since we know that nothing
    // that starts earlier will be long enough to cover the entire span.
    Long searchStart = ((endOffset - 1) - longestAnnot);
    if (searchStart < 0) searchStart = 0l;
    //nodesIter = nodesByOffset.headMap(startOffset + 1).values().iterator();
    nodesIter = nodesByOffset.subMap(searchStart, startOffset + 1).values().iterator();

    while(nodesIter.hasNext()) {
      currentNode = nodesIter.next();
      Collection objFromPoint = getAnnotsByStartNode(currentNode
              .getId());
      if(objFromPoint == null) continue;
      annotsIter = objFromPoint.iterator();
      while(annotsIter.hasNext()) {
        currentAnnot = annotsIter.next();
        //if neededType is set, make sure this is the right type
        if (checkType && !currentAnnot.getType().equals(neededType))
          continue;
        //check that the annot ends at or after the endOffset
        if(currentAnnot.getEndNode().getOffset().compareTo(endOffset) >= 0)
          annotationsToAdd.add(currentAnnot);
      } // while
    }
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // get(type, startOfset, endOffset)

  /** Select annotations by type, features and offset */
  @Override
  public AnnotationSet get(String type, FeatureMap constraints, Long offset) {
    // select by offset
    AnnotationSet nextAnnots = get(offset);
    
    // select by type and constraints from the next annots
    return nextAnnots.get(type, constraints);
  } // get(type, constraints, offset)

  /**
   * Select annotations contained within an interval, i.e.
   * those annotations whose start position is
   * >= startOffset and whose end position is <= 
   * endOffset.
   */
  @Override
  public AnnotationSet getContained(Long startOffset, Long endOffset) {
    // the result will include all the annotations that either:
    // start at a position between the start and end before the end
    // offsets
    //check the range
    if(endOffset < startOffset) return emptyAS();
    //ensure index
    if(annotsByStartNode == null) indexByStartOffset();
    List annotationsToAdd = null;
    Iterator nodesIter;
    Node currentNode;
    Iterator annotIter;
    // find all the annots that start at or after the start offset but
    // strictly
    // before the end offset
    nodesIter = nodesByOffset.subMap(startOffset, endOffset).values()
            .iterator();
    while(nodesIter.hasNext()) {
      currentNode = nodesIter.next();
      Collection objFromPoint = getAnnotsByStartNode(currentNode
              .getId());
      if(objFromPoint == null) continue;
      // loop through the annotations and find only those that
      // also end before endOffset
      annotIter = objFromPoint.iterator();
      while(annotIter.hasNext()) {
        Annotation annot = annotIter.next();
        if(annot.getEndNode().getOffset().compareTo(endOffset) <= 0) {
          if(annotationsToAdd == null) annotationsToAdd = new ArrayList();
          annotationsToAdd.add(annot);
        }
      }
    }
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
  } // get(startOfset, endOffset)

  /** Get the node with the smallest offset */
  @Override
  public Node firstNode() {
    indexByStartOffset();
    if(nodesByOffset.isEmpty())
      return null;
    else return nodesByOffset.get(nodesByOffset.firstKey());
  } // firstNode

  /** Get the node with the largest offset */
  @Override
  public Node lastNode() {
    indexByStartOffset();
    if(nodesByOffset.isEmpty())
      return null;
    else return nodesByOffset.get(nodesByOffset.lastKey());
  } // lastNode

  /**
   * Get the first node that is relevant for this annotation set and which has
   * the offset larger than the one of the node provided.
   */
  @Override
  public Node nextNode(Node node) {
    indexByStartOffset();
    return nodesByOffset.getNextOf(node.getOffset().longValue() + 1);
  }

  protected static AnnotationFactory annFactory;

  /**
   * Set the annotation factory used to create annotation objects. The default
   * factory is {@link DefaultAnnotationFactory}.
   */
  public static void setAnnotationFactory(AnnotationFactory newFactory) {
    annFactory = newFactory;
  }

  static {
    // set the default factory to always create AnnotationImpl objects
    setAnnotationFactory(new DefaultAnnotationFactory());
  }

  /**
   * Create and add an annotation with pre-existing nodes, and return its id.
   * Note that only Nodes retrieved from the same annotation set should be used
   * to create a new annotation using this method. Using Nodes from other annotation
   * sets may lead to undefined behaviour. If in any doubt use the Long based add
   * method instead of this one.
   */
  @Override
  public Integer add(Node start, Node end, String type, FeatureMap features) {
    // the id of the new annotation
    Integer id = doc.getNextAnnotationId();
    // construct an annotation
    annFactory.createAnnotationInSet(this, id, start, end, type, features);

    return id;
  } // add(Node, Node, String, FeatureMap)

  /** Add an existing annotation. Returns true when the set is modified. */
  @Override
  public boolean add(Annotation a) throws ClassCastException {
    Annotation oldValue = annotsById.put(a.getId(), a);
    
    if (oldValue != null) {
    	if (annotsByType != null) removeFromTypeIndex(oldValue);
    	if (annotsByStartNode != null) removeFromOffsetIndex(oldValue);
    }
    
    if(annotsByType != null) addToTypeIndex(a);
    if(annotsByStartNode != null) addToStartOffsetIndex(a);
    AnnotationSetEvent evt = new AnnotationSetEvent(this,
            AnnotationSetEvent.ANNOTATION_ADDED, doc, a);
    fireAnnotationAdded(evt);
    fireGateEvent(evt);
    return oldValue != a;
  } // add(o)

  /**
   * Adds multiple annotations to this set in one go. All the objects in the
   * provided collection should be of {@link gate.Annotation} type, otherwise a
   * ClassCastException will be thrown. The provided annotations will be used to
   * create new annotations using the appropriate add() methods from this set.
   * The new annotations will have different IDs from the old ones (which is
   * required in order to preserve the uniqueness of IDs inside an annotation
   * set).
   *
   * @param c
   *          a collection of annotations
   * @return true if the set has been modified as a result of this
   *         call.
   */
  @Override
  public boolean addAll(Collection c) {
    Iterator annIter = c.iterator();
    boolean changed = false;
    while(annIter.hasNext()) {
      Annotation a = annIter.next();
      try {
        add(a.getStartNode().getOffset(), a.getEndNode().getOffset(), a
                .getType(), a.getFeatures());
        changed = true;
      } catch(InvalidOffsetException ioe) {
        throw new IllegalArgumentException(ioe.toString());
      }
    }
    return changed;
  }

  /**
   * Adds multiple annotations to this set in one go. All the objects in the
   * provided collection should be of {@link gate.Annotation} type, otherwise a
   * ClassCastException will be thrown. This method does not create copies of
   * the annotations like addAll() does but simply adds the new annotations to
   * the set. It is intended to be used solely by annotation sets in order to
   * construct the results for various get(...) methods.
   *
   * @param c
   *          a collection of annotations
   * @return true if the set has been modified as a result of this
   *         call.
   */
  protected boolean addAllKeepIDs(Collection c) {
    Iterator annIter = c.iterator();
    boolean changed = false;
    while(annIter.hasNext()) {
      Annotation a = annIter.next();
      changed |= add(a);
    }
    return changed;
  }

  /** Returns the nodes corresponding to the Longs. The Nodes are created if
   * they don't exist.
   **/
  private final Node[] getNodes(Long start, Long end) throws InvalidOffsetException
  {
    // are the offsets valid?
    if(!doc.isValidOffsetRange(start, end)) {
      throw new InvalidOffsetException("Offsets [" + start + ":" + end +
              "] not valid for this document of size " + doc.getContent().size());
    }
    // the set has to be indexed by position in order to add, as we need
    // to find out if nodes need creating or if they exist already
    if(nodesByOffset == null) {
      indexByStartOffset();
    }
    // find existing nodes if appropriate nodes don't already exist,
    // create them
    Node startNode = nodesByOffset.get(start);
    if(startNode == null)
      startNode = new NodeImpl(doc.getNextNodeId(), start);

    Node endNode = null;
    if(start.equals(end)){
      endNode = startNode;
      return new Node[]{startNode,endNode};
    }

    endNode = nodesByOffset.get(end);
    if(endNode == null)
      endNode = new NodeImpl(doc.getNextNodeId(), end);

    return new Node[]{startNode,endNode};
  }


  /** Create and add an annotation and return its id */
  @Override
  public Integer add(Long start, Long end, String type, FeatureMap features)
          throws InvalidOffsetException {
    Node[] nodes = getNodes(start,end);
    // delegate to the method that adds annotations with existing nodes
    return add(nodes[0], nodes[1], type, features);
  } // add(start, end, type, features)

  /**
   * Create and add an annotation from database read data In this case the id is
   * already known being previously fetched from the database
   */
  @Override
  public void add(Integer id, Long start, Long end, String type,
          FeatureMap features) throws InvalidOffsetException {
    Node[] nodes = getNodes(start,end);
    // construct an annotation
    annFactory.createAnnotationInSet(this, id, nodes[0], nodes[1], type,
            features);
    
    //try to ensure that if someone adds an annotation directly by ID
    //the other methods don't trample all over it later
    if (id >= doc.peakAtNextAnnotationId()) {
      doc.setNextAnnotationId(id+1);
    }
  } // add(id, start, end, type, features)

  /** Construct the positional index. */
  protected void indexByType() {
    if(annotsByType != null) return;
    annotsByType = new HashMap(Gate.HASH_STH_SIZE);
    Iterator annotIter = annotsById.values().iterator();
    while(annotIter.hasNext())
      addToTypeIndex(annotIter.next());
  } // indexByType()

  /** Construct the positional indices for annotation start */
  protected void indexByStartOffset() {
    if(annotsByStartNode != null) return;
    if(nodesByOffset == null) nodesByOffset = new RBTreeMap();
    annotsByStartNode = new HashMap(annotsById.size());
    Iterator annotIter = annotsById.values().iterator();
    while(annotIter.hasNext())
      addToStartOffsetIndex(annotIter.next());
  } // indexByStartOffset()

  /**
   * Add an annotation to the type index. Does nothing if the index doesn't
   * exist.
   */
  void addToTypeIndex(Annotation a) {
    if(annotsByType == null) return;
    String type = a.getType();
    AnnotationSet sameType = annotsByType.get(type);
    if(sameType == null) {
      sameType = new AnnotationSetImpl(doc);
      annotsByType.put(type, sameType);
    }
    sameType.add(a);
  } // addToTypeIndex(a)

  /**
   * Add an annotation to the start offset index. Does nothing if the index
   * doesn't exist.
   */
  @SuppressWarnings("unchecked")
  void addToStartOffsetIndex(Annotation a) {
    Node startNode = a.getStartNode();
    Node endNode = a.getEndNode();
    Long start = startNode.getOffset();
    Long end = endNode.getOffset();
    // add a's nodes to the offset index
    if(nodesByOffset != null) {
      nodesByOffset.put(start, startNode);
      nodesByOffset.put(end, endNode);
    }

    //add marking for longest annot
    long annotLength = end - start;
    if (annotLength > longestAnnot)
        longestAnnot = annotLength;

    // if there's no appropriate index give up
    if(annotsByStartNode == null) return;
    // get the annotations that start at the same node, or create new
    // set
    Object thisNodeObject = annotsByStartNode.get(startNode.getId());
    if(thisNodeObject == null) {
      // put directly the annotation
      annotsByStartNode.put(startNode.getId(), a);
    } else { // already something there : a single Annotation or a
      // Collection
      Set newCollection = null;
      if(thisNodeObject instanceof Annotation) {
        // we need to create a set - we have more than one annotation
        // starting
        // at this Node
        if(thisNodeObject.equals(a)) return;
        newCollection = new HashSet(3);
        newCollection.add((Annotation)thisNodeObject);
        annotsByStartNode.put(startNode.getId(), newCollection);
      } else newCollection = (Set)thisNodeObject;
      // get the existing set
      // add the new node annotation
      newCollection.add(a);
    }
  } // addToStartOffsetIndex(a)

  /**
   * Propagate document content changes to this AnnotationSet. 
   * 
   * This method is called for all annotation sets of a document from 
   * DocumentImpl.edit to adapt the annotations to the text changes made through
   * the edit. The behaviour of this method is influenced by the configuration
   * setting {@link gate.GateConstants#DOCEDIT_INSERT_PREPEND GateConstants.DOCEDIT_INSERT_PREPEND }: 
   * annotations immediately 
   * ending before or starting after the point of insertion will either become
   * part of the inserted text or not. Currently it works like this:
   * 
   * PREPEND=true: annotation before will become part, annotation after not
   * 
PREPEND=false: annotation before will not become part, annotation after 
   * will become part
   * 
   * NOTE 1 (JP): There is another setting
   * {@link gate.GateConstants#DOCEDIT_INSERT_APPEND GateConstants.DOCEDIT_INSERT_APPEND }
   * but 
   * this setting does currently not influence the behaviour of this method. 
   * The behaviour of this method may change in the future so that 
   * DOCEDIT_INSERT_APPEND is considered separately and in addition to 
   * DOCEDIT_INSERT_PREPEND so that it can be controlled independently if 
   * the annotation before and/or after an insertion point gets expanded or not.
   * 
   * NOTE 2: This method has, unfortunately, to be
   * public, to allow DocumentImpls to get at it. Oh for a "friend" declaration.
   * Doesn't throw InvalidOffsetException as DocumentImpl is the only client,
   * and that checks the offsets before calling this method.
   */
  public void edit(Long start, Long end, DocumentContent replacement) {
    // make sure we have the indices computed
    indexByStartOffset();
    if(end.compareTo(start) > 0) {
      // get the nodes that need to be processed (the nodes internal to
      // the
      // removed section plus the marginal ones
      List affectedNodes = new ArrayList(nodesByOffset.subMap(start,
              end.longValue() + 1).values());
      // if we have more than 1 node we need to delete all apart from
      // the first
      // and move the annotations so that they refer to the one we keep
      // (the
      // first)
      NodeImpl firstNode = null;
      if(!affectedNodes.isEmpty()) {
        firstNode = (NodeImpl)affectedNodes.get(0);
        List startingAnnotations = new ArrayList();
        List endingAnnotations = new ArrayList();
        // now we need to find all the annotations
        // ending in the zone
        List beforeNodes = new ArrayList(nodesByOffset.subMap(0L,
                end.longValue() + 1).values());
        Iterator beforeNodesIter = beforeNodes.iterator();
        while(beforeNodesIter.hasNext()) {
          Node currentNode = beforeNodesIter.next();
          Collection annotations = getAnnotsByStartNode(currentNode.getId());
          if(annotations == null) continue;
          // iterates on the annotations in this set
          Iterator localIterator = annotations.iterator();
          while(localIterator.hasNext()) {
            Annotation annotation = localIterator.next();
            long offsetEndAnnotation = annotation.getEndNode().getOffset()
                    .longValue();
            // we are interested only in the annotations ending
            // inside the zone
            if(offsetEndAnnotation >= start.longValue()
                    && offsetEndAnnotation <= end.longValue())
              endingAnnotations.add(annotation);
          }
        }
        for(int i = 1; i < affectedNodes.size(); i++) {
          Node aNode = affectedNodes.get(i);
          Collection annSet = getAnnotsByStartNode(aNode.getId());
          if(annSet != null) {
            startingAnnotations.addAll(annSet);
          }
          // remove the node
          // nodesByOffset.remove(aNode.getOffset());
          // annotsByStartNode.remove(aNode);
        }
        // modify the annotations so they point to the saved node
        Iterator annIter = startingAnnotations.iterator();
        while(annIter.hasNext()) {
          AnnotationImpl anAnnot = (AnnotationImpl)annIter.next();
          anAnnot.start = firstNode;
          // remove the modified annotation if it has just become
          // zero-length
          if(anAnnot.start == anAnnot.end) {
            remove(anAnnot);
          } else {
            addToStartOffsetIndex(anAnnot);
          }
        }
        annIter = endingAnnotations.iterator();
        while(annIter.hasNext()) {
          AnnotationImpl anAnnot = (AnnotationImpl)annIter.next();
          anAnnot.end = firstNode;
          // remove the modified annotation if it has just become
          // zero-length
          if(anAnnot.start == anAnnot.end) {
            remove(anAnnot);
          }
        }
        // remove the unused nodes inside the area
        for(int i = 1; i < affectedNodes.size(); i++) {
          Node aNode = affectedNodes.get(i);
          nodesByOffset.remove(aNode.getOffset());
          annotsByStartNode.remove(aNode.getId());
        }
        // repair the first node
        // remove from offset index
        nodesByOffset.remove(firstNode.getOffset());
        // change the offset for the saved node
        firstNode.setOffset(start);
        // add back to the offset index
        nodesByOffset.put(firstNode.getOffset(), firstNode);
      }
    }
    // now handle the insert and/or update the rest of the nodes'
    // position
    // get the user selected behaviour (defaults to append)
    boolean shouldPrepend = Gate.getUserConfig().getBoolean(
            GateConstants.DOCEDIT_INSERT_PREPEND).booleanValue();
    long s = start.longValue(), e = end.longValue();
    long rlen = // length of the replacement value
    ((replacement == null) ? 0 : replacement.size().longValue());
    // update the offsets and the index by offset for the rest of the
    // nodes
    List nodesAfterReplacement = new ArrayList(nodesByOffset.tailMap(start)
            .values());
    // remove from the index by offset
    Iterator nodesAfterReplacementIter = nodesAfterReplacement.iterator();
    while(nodesAfterReplacementIter.hasNext()) {
      NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next();
      nodesByOffset.remove(n.getOffset());
    }
    // change the offsets
    nodesAfterReplacementIter = nodesAfterReplacement.iterator();
    while(nodesAfterReplacementIter.hasNext()) {
      NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next();
      long oldOffset = n.getOffset().longValue();
      // by default we move all nodes back
      long newOffset = oldOffset - (e - s) + rlen;
      // for the first node we need behave differently
      if(oldOffset == s) {
        // the first offset never moves back
        if(newOffset < s) newOffset = s;
        // if we're prepending we don't move forward
        if(shouldPrepend) newOffset = s;
      }
      n.setOffset(newOffset);
    }
    // add back to the index by offset with the new offsets
    nodesAfterReplacementIter = nodesAfterReplacement.iterator();
    while(nodesAfterReplacementIter.hasNext()) {
      NodeImpl n = (NodeImpl)nodesAfterReplacementIter.next();
      nodesByOffset.put(n.getOffset(), n);
    }
    // //rebuild the indices with the new offsets
    // nodesByOffset = null;
    // annotsByStartNode = null;
    // annotsByEndNode = null;
    // indexByStartOffset();
    // indexByEndOffset();
  } // edit(start,end,replacement)

  /** Get the name of this set. */
  @Override
  public String getName() {
    return name;
  }

  /** Get the document this set is attached to. */
  @Override
  public Document getDocument() {
    return doc;
  }

  /**
   * Get a set of java.lang.String objects representing all the annotation types
   * present in this annotation set.
   */
  @Override
  public Set getAllTypes() {
    indexByType();
    return Collections.unmodifiableSet(annotsByType.keySet());
  }

  /**
   * Returns a set of annotations starting at that position This intermediate
   * method is used to simplify the code as the values of the annotsByStartNode
   * hashmap can be Annotations or a Collection of Annotations. Returns null if
   * there are no Annotations at that position
   */
  @SuppressWarnings("unchecked")
  private final Collection getAnnotsByStartNode(Integer id) {
    Object objFromPoint = annotsByStartNode.get(id);
    if(objFromPoint == null) return null;
    if(objFromPoint instanceof Annotation) {
      List al = new ArrayList(2);
      al.add((Annotation)objFromPoint);
      return al;
    }
    // it is already a collection
    // return it
    return (Collection)objFromPoint;
  }

  /**
   *
   * @return a clone of this set.
   * @throws CloneNotSupportedException
   */
  @Override
  public Object clone() throws CloneNotSupportedException {
    return super.clone();
  }

  @Override
  public synchronized void removeAnnotationSetListener(AnnotationSetListener l) {
    if(annotationSetListeners != null && annotationSetListeners.contains(l)) {
      @SuppressWarnings("unchecked")
      Vector v = (Vector)annotationSetListeners.clone();
      v.removeElement(l);
      annotationSetListeners = v;
    }
  }

  @Override
  public synchronized void addAnnotationSetListener(AnnotationSetListener l) {
    @SuppressWarnings("unchecked")
    Vector v = annotationSetListeners == null
            ? new Vector(2)
            : (Vector)annotationSetListeners.clone();
    if(!v.contains(l)) {
      v.addElement(l);
      annotationSetListeners = v;
    }
  }

  protected void fireAnnotationAdded(AnnotationSetEvent e) {
    if(annotationSetListeners != null) {
      Vector listeners = annotationSetListeners;
      int count = listeners.size();
      for(int i = 0; i < count; i++) {
        listeners.elementAt(i).annotationAdded(e);
      }
    }
  }

  protected void fireAnnotationRemoved(AnnotationSetEvent e) {
    if(annotationSetListeners != null) {
      Vector listeners = annotationSetListeners;
      int count = listeners.size();
      for(int i = 0; i < count; i++) {
        listeners.elementAt(i).annotationRemoved(e);
      }
    }
  }

  @Override
  public synchronized void removeGateListener(GateListener l) {
    if(gateListeners != null && gateListeners.contains(l)) {
      @SuppressWarnings("unchecked")
      Vector v = (Vector)gateListeners.clone();
      v.removeElement(l);
      gateListeners = v;
    }
  }

  @Override
  public synchronized void addGateListener(GateListener l) {
    @SuppressWarnings("unchecked")
    Vector v = gateListeners == null ? new Vector(2) : (Vector)gateListeners
            .clone();
    if(!v.contains(l)) {
      v.addElement(l);
      gateListeners = v;
    }
  }

  protected void fireGateEvent(GateEvent e) {
    if(gateListeners != null) {
      Vector listeners = gateListeners;
      int count = listeners.size();
      for(int i = 0; i < count; i++) {
        listeners.elementAt(i).processGateEvent(e);
      }
    }
  }

  // how to serialize this object?
  // there is no need to serialize the indices
  // so it's probably as fast to just recreate them
  // if required
  private void writeObject(java.io.ObjectOutputStream out) throws IOException {
    ObjectOutputStream.PutField pf = out.putFields();
    pf.put("name", this.name);
    pf.put("doc", this.doc);
    //
    // out.writeObject(this.name);
    // out.writeObject(this.doc);
    // save only the annotations
    // in an array that will prevent the need for casting
    // when deserializing
    annotations = new Annotation[this.annotsById.size()];
    annotations = this.annotsById.values().toArray(annotations);
    // out.writeObject(annotations);
    pf.put("annotations", this.annotations);    
    pf.put("relations", this.relations);
    
    
    out.writeFields();
    annotations = null;
    boolean isIndexedByType = (this.annotsByType != null);
    boolean isIndexedByStartNode = (this.annotsByStartNode != null);
    out.writeBoolean(isIndexedByType);
    out.writeBoolean(isIndexedByStartNode);
  }

  private void readObject(java.io.ObjectInputStream in) throws IOException,
          ClassNotFoundException {
    this.longestAnnot = 0l;
    ObjectInputStream.GetField gf = in.readFields();
    this.name = (String)gf.get("name", null);
    this.doc = (DocumentImpl)gf.get("doc", null);
    boolean isIndexedByType = false;
    boolean isIndexedByStartNode = false;
    this.annotations = (Annotation[])gf.get("annotations", null);
    
    if(this.annotations == null) {
      // old style serialised version
      @SuppressWarnings("unchecked")
      Map annotsByIdMap = (Map)gf
              .get("annotsById", null);
      if(annotsByIdMap == null)
        throw new IOException(
                "Invalid serialised data: neither annotations array or map by id"
                        + " are present.");
      annotations = annotsByIdMap.values().toArray(new Annotation[]{});
    } else {
      // new style serialised version
      isIndexedByType = in.readBoolean();
      isIndexedByStartNode = in.readBoolean();
    }
    // this.name = (String)in.readObject();
    // this.doc = (DocumentImpl)in.readObject();
    // Annotation[] annotations = (Annotation[])in.readObject();
    // do we need to create the indices?
    // boolean isIndexedByType = in.readBoolean();
    // boolean isIndexedByStartNode = in.readBoolean();
    this.annotsById = new HashMap(annotations.length);
    // rebuilds the indices if required
    if(isIndexedByType) {
      annotsByType = new HashMap(Gate.HASH_STH_SIZE);
    }
    if(isIndexedByStartNode) {
      nodesByOffset = new RBTreeMap();
      annotsByStartNode = new HashMap(annotations.length);
    }
    // add all the annotations one by one
    for(int i = 0; i < annotations.length; i++) {
      add(annotations[i]);
    }
    
    this.relations = (RelationSet)gf.get("relations", null);
    
    annotations = null;
  }
  
  @Override
  public RelationSet getRelations() {
    if (relations == null) {
      relations = new RelationSet(this);
    }
    return relations;
  }
  
  // utility method that replaces the former static singleton member ImmutableAnnotationSet(null,null).
  // We should not give back annotation sets which have a null document, so instead we return
  // as an empty annotation set one that does not have annotations, but points to the same document
  // as the one it was created from. 
  protected AnnotationSet emptyAS() {
    return new ImmutableAnnotationSetImpl(doc, null);
  }
  
  
} // AnnotationSetImpl