
de.julielab.jcore.utility.JCoReAnnotationTools Maven / Gradle / Ivy
Show all versions of jcore-utilities Show documentation
/**
* AnnotationTools.java
*
* Copyright (c) 2006, JULIE Lab.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
*
* Author: tomanek
*
* Current version: 1.3
* Since version: 1.0
*
* Creation date: Feb 19, 2006
*
* Tool for creating new UIMA annotation Objects and other annotation related things
*
* //TODO: we may move some functions from JulesTools here...
**/
package de.julielab.jcore.utility;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.Range;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
// import de.julielab.jcore.types.Annotation;
import org.apache.uima.jcas.tcas.Annotation;
public class JCoReAnnotationTools {
/**
* returns an annotation object (de.julielab.jcore.types.annotation) of the type specified by fullEntityClassName.
* This is done by means of dynamic class loading and reflection.
*
* @param aJCas
* the jcas to which to link this annotation object
* @param fullAnnotationClassName
* the full class name of the new annotation object
* @return
*/
public static Annotation getAnnotationByClassName(JCas aJCas, String fullAnnotationClassName)
throws ClassNotFoundException, SecurityException, NoSuchMethodException, IllegalArgumentException,
InstantiationException, IllegalAccessException, InvocationTargetException {
Class[] parameterTypes = new Class[] { JCas.class };
Class myNewClass = Class.forName(fullAnnotationClassName);
Constructor myConstructor = myNewClass.getConstructor(parameterTypes);
Annotation anno = (Annotation) myConstructor.newInstance(aJCas);
return anno;
}
/**
* returns an annotation of the type fullEntityClassName which has exactly the specified offset
*
* @param aJCas
* the cas to search in
* @param fullAnnotationClassName
* the full class name of the specific annotation type
*
* @param startOffset
* @param endOffset
* @return the first annotation object of the given type at exactly the given offset. If no annotation is found
* there, NULL is returned
*/
public static Annotation getAnnotationAtOffset(JCas aJCas, String fullAnnotationClassName, int startOffset,
int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
while (annoIter.hasNext()) {
Annotation currAnno = (Annotation) annoIter.next();
if ((currAnno.getBegin() == startOffset) && (currAnno.getEnd() == endOffset)) {
return currAnno;
}
}
return null;
}
@SuppressWarnings("unchecked")
public static T getAnnotationAtMatchingOffsets(JCas aJCas, Annotation focusAnnotation,
Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
cursor.moveTo(focusAnnotation);
if (!cursor.isValid())
throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
+ focusAnnotation);
while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) {
cursor.moveToPrevious();
}
if (!cursor.isValid())
cursor.moveToFirst();
else
cursor.moveToNext();
// Now that we have our starting point, we go to the right until we find an annotation of the correct type and
// the same offsets as focusAnnotation
Annotation currentAnnotation = null;
while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) {
if (!cls.isInstance(currentAnnotation)) {
cursor.moveToNext();
continue;
}
Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.equals(currentRange))
return (T) currentAnnotation;
cursor.moveToNext();
}
return null;
}
/**
* returns an annotation of the type fullEntityClassName which overlaps an or is overlapped by an annotation of the
* same type at the given offset
*
* @param aJCas
* The cas to search in
* @param ullAnnotationClassName
* The full class name of the specific annotation type
*
* @param startOffset
* @param endOffset
* @return The first annotation object of the given type at exactly the given offset. If no annotation is found
* there, NULL is returned
*/
public static Annotation getOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName, int startOffset,
int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
while (annoIter.hasNext()) {
Annotation currAnno = (Annotation) annoIter.next();
if ((currAnno.getBegin() <= startOffset) && (currAnno.getEnd() >= endOffset)) {
return currAnno;
} else if ((currAnno.getBegin() >= startOffset) && (currAnno.getEnd() <= endOffset)) {
return currAnno;
}
//
else if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) {
return currAnno;
} else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) {
return currAnno;
}
}
return null;
}
/**
* returns an annotation of the type fullEntityClassName which partially overlaps an or is overlapped by an
* annotation of the same type at the given offset
*
* @param aJCas
* The cas to search in
* @param fullAnnotationClassName
* The full class name of the specific annotation type
*
* @param startOffset
* @param endOffset
* @return The first annotation object of the given type at exactly the given offset. If no annotation is found
* there, NULL is returned
*/
public static Annotation getPartiallyOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName,
int startOffset, int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
while (annoIter.hasNext()) {
Annotation currAnno = (Annotation) annoIter.next();
if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) {
return currAnno;
} else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) {
return currAnno;
}
}
return null;
}
/**
* Returns the leftmost annotation of type cls that overlaps focusAnnotation. That is, if multiple
* annotations of type cls overlap with focusAnnotation, the one with the lowest begin offset will
* be chosen.
*
* The two annotations may overlap in any way (partial, nested, inclusion, exact match). This algorithm has
* O(n) runtime with n being the number of annotations in the annotation index.
*
* *
*
* TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a
* number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to
* focusAnnotation and not always from the very beginning of the annotation index. Same thing for
* getIncludingAnnotation().
*
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return the leftmost annotation of type cls that overlaps focusAnnotation.
*/
@SuppressWarnings("unchecked")
public static T getPartiallyOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation,
Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
// Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the
// beginning.
cursor.moveToFirst();
// Now go to the right as long as we don't yet overlap with the focus annotation, then stop.
Annotation currentAnnotation = null;
while (cursor.isValid()
&& ((currentAnnotation = cursor.get()).getEnd() <= focusAnnotation.getBegin() || !cls
.isInstance(currentAnnotation))) {
cursor.moveToNext();
}
// Check whether we have found an overlapping annotation.
Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.isOverlappedBy(focusRange))
return (T) cursor.get();
return null;
}
/**
* Returns, in ascending order, all annotations of type cls that are completely included - perhaps with
* having the same begin and/or end as the focusAnnotation - in focusAnnotation.
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return
*/
@SuppressWarnings("unchecked")
public static List getIncludedAnnotations(JCas aJCas, Annotation focusAnnotation,
Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
// for debugging: print out absolutely all annotations
// cursor.moveToFirst();
// while (cursor.isValid()) {
// System.out.println(cursor.get());
// cursor.moveToNext();
// }
cursor.moveTo(focusAnnotation);
if (!cursor.isValid())
throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
+ focusAnnotation);
// The annotations are sorted by begin offset. So go to the first annotation with a lower begin offset compared
// to the focusAnnotation.
while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) {
cursor.moveToPrevious();
}
if (!cursor.isValid())
cursor.moveToFirst();
else
cursor.moveToNext();
// Now that we have our starting point, we go to the right as long as there is a possibility to still find
// annotations included in the focusAnnotation, i.e. as long the current begin offset is still lower (or equal
// for the weird case of zero-length-annotations) than the
// end offset of the focusAnnotation
Annotation currentAnnotation = null;
List includedAnnotations = new ArrayList<>();
while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) {
if (!cls.isInstance(currentAnnotation)) {
cursor.moveToNext();
continue;
}
Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.containsRange(currentRange))
includedAnnotations.add((T) currentAnnotation);
cursor.moveToNext();
}
return includedAnnotations;
}
/**
* Returns the leftmost annotation of type cls that completely includes focusAnnotation. That is,
* if multiple annotations of type cls include focusAnnotation, the one with the lowest begin
* offset will be chosen.
*
* This algorithm has O(n) runtime with n being the number of annotations in the annotation index.
*
*
* TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a
* number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to
* focusAnnotation and not always from the very beginning of the annotation index. Same thing for
* getPartiallyOverlappingAnnotation().
*
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return the leftmost annotation of type cls that completely includes focusAnnotation.
*/
@SuppressWarnings("unchecked")
public static T getIncludingAnnotation(JCas aJCas, Annotation focusAnnotation, Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
// Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the
// beginning.
cursor.moveToFirst();
// Now go to the right as long as we don't yet overlap with the focus annotation, then stop.
Annotation currentAnnotation = null;
while (cursor.isValid()
&& ((currentAnnotation = cursor.get()).getEnd() < focusAnnotation.getEnd() || !cls
.isInstance(currentAnnotation))) {
cursor.moveToNext();
}
// Check whether we have found an overlapping annotation.
Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange))
return (T) cursor.get();
return null;
}
/**
* Returns the nearest annotation of class cls to focusAnnotation, i.e. the one (or just one, if
* multiple exist) with the highest start-offset that completely overlaps focusAnnotation.
*
* This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding
* the nearest token or sentence.
*
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return the leftmost annotation of type cls that completely includes focusAnnotation.
*/
@SuppressWarnings("unchecked")
public static T getNearestIncludingAnnotation(JCas aJCas, Annotation focusAnnotation,
Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
if (!cursor.isValid())
throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
+ focusAnnotation);
// The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared
// to the focusAnnotation. Afterwards we we search for an including annotation to the left.
cursor.moveTo(focusAnnotation);
while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getBegin()) {
cursor.moveToNext();
}
if (!cursor.isValid())
cursor.moveTo(focusAnnotation);
else
cursor.moveToPrevious();
// Now that we have our starting point, we go to the left until we find the first annotation of correct type
// completely overlapping the focus annotation.
while (cursor.isValid()) {
Annotation currentAnnotation = cursor.get();
if (!cls.isInstance(currentAnnotation)) {
cursor.moveToPrevious();
continue;
}
Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange))
return (T) currentAnnotation;
cursor.moveToPrevious();
}
return null;
}
/**
* Returns the nearest annotations of class cls to focusAnnotation, i.e. all annotations
* overlapping focusAnnotation where between the leftest returned annotation and the next returned
* annotation (that is, returned offsets 0 and 1, if there are multiple returned annotations) there is no other
* annotation of type cls.
*
* This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding
* overlapping tokens.
*
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return the leftmost annotation of type cls that completely includes focusAnnotation.
*/
@SuppressWarnings("unchecked")
public static List getNearestOverlappingAnnotations(JCas aJCas,
Annotation focusAnnotation, Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
// for debugging: print out absolutely all annotations
// cursor.moveToFirst();
// while (cursor.isValid()) {
// System.out.println(cursor.get());
// cursor.moveToNext();
// }
cursor.moveTo(focusAnnotation);
if (!cursor.isValid())
throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
+ focusAnnotation);
// The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared
// to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right.
while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) {
cursor.moveToNext();
}
if (!cursor.isValid())
cursor.moveToLast();
else
cursor.moveToPrevious();
List overlappingAnnotations = new ArrayList<>();
while (cursor.isValid()) {
Annotation currentAnnotation = cursor.get();
if (!cls.isInstance(currentAnnotation)) {
cursor.moveToPrevious();
continue;
}
if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd()
&& currentAnnotation.getEnd() > focusAnnotation.getBegin()) {
overlappingAnnotations.add((T) currentAnnotation);
// As soon as we have an overlapping annotation of the correct type that begins at or before the begin
// offset of the
// focusAnnotation, we are finished.
if (currentAnnotation.getBegin() < focusAnnotation.getBegin()) {
Collections.reverse(overlappingAnnotations);
return overlappingAnnotations;
}
}
cursor.moveToPrevious();
}
// Order by ascending begin offsets.
Collections.reverse(overlappingAnnotations);
return overlappingAnnotations;
}
/**
* Returns the annotation with the highest end offset of type cls overlapping focusAnnotation.
*
* This method is very similar to {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)}. Actually, the
* last result element of {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)} equals the returned
* annotation from this method.
*
*
* @param aJCas
* @param focusAnnotation
* @param cls
* @return
*/
@SuppressWarnings("unchecked")
public static T getLastOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation,
Class cls) {
FSIterator cursor = aJCas.getAnnotationIndex().iterator();
// for debugging: print out absolutely all annotations
// cursor.moveToFirst();
// while (cursor.isValid()) {
// System.out.println(cursor.get());
// cursor.moveToNext();
// }
cursor.moveTo(focusAnnotation);
if (!cursor.isValid())
throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
+ focusAnnotation);
// The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared
// to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right.
while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) {
cursor.moveToNext();
}
if (!cursor.isValid())
cursor.moveToLast();
else
cursor.moveToPrevious();
while (cursor.isValid()) {
Annotation currentAnnotation = cursor.get();
if (!cls.isInstance(currentAnnotation)) {
cursor.moveToPrevious();
continue;
}
if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd()
&& currentAnnotation.getEnd() > focusAnnotation.getBegin()) {
return (T) currentAnnotation;
}
cursor.moveToPrevious();
}
return null;
}
}