All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.julielab.jcore.utility.JCoReAnnotationTools Maven / Gradle / Ivy

The newest version!
/** 
 * AnnotationTools.java
 * 
 * Copyright (c) 2006, JULIE Lab. 
 * All rights reserved. This program and the accompanying materials 
 * are made available under the terms of the Common Public License v1.0 
 * 
 * Author: tomanek
 * 
 * Current version: 1.3	
 * Since version:   1.0
 *
 * Creation date: Feb 19, 2006 
 * 
 * Tool for creating new UIMA annotation Objects and other annotation related things
 * 
 *  //TODO: we may move some functions from JulesTools here...
 **/
package de.julielab.jcore.utility;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang3.Range;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
// import de.julielab.jcore.types.Annotation;
import org.apache.uima.jcas.tcas.Annotation;

public class JCoReAnnotationTools {

	/**
	 * returns an annotation object (de.julielab.jcore.types.annotation) of the type specified by fullEntityClassName.
	 * This is done by means of dynamic class loading and reflection.
	 * 
	 * @param aJCas
	 *            the jcas to which to link this annotation object
	 * @param fullAnnotationClassName
	 *            the full class name of the new annotation object
	 * @return
	 */
	public static Annotation getAnnotationByClassName(JCas aJCas, String fullAnnotationClassName)
			throws ClassNotFoundException, SecurityException, NoSuchMethodException, IllegalArgumentException,
			InstantiationException, IllegalAccessException, InvocationTargetException {

		Class[] parameterTypes = new Class[] { JCas.class };
		Class myNewClass = Class.forName(fullAnnotationClassName);
		Constructor myConstructor = myNewClass.getConstructor(parameterTypes);
		Annotation anno = (Annotation) myConstructor.newInstance(aJCas);
		return anno;
	}

	/**
	 * returns an annotation of the type fullEntityClassName which has exactly the specified offset
	 * 
	 * @param aJCas
	 *            the cas to search in
	 * @param fullAnnotationClassName
	 *            the full class name of the specific annotation type
	 * 
	 * @param startOffset
	 * @param endOffset
	 * @return the first annotation object of the given type at exactly the given offset. If no annotation is found
	 *         there, NULL is returned
	 */
	public static Annotation getAnnotationAtOffset(JCas aJCas, String fullAnnotationClassName, int startOffset,
			int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
			NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {

		Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
		JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
		Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
		while (annoIter.hasNext()) {
			Annotation currAnno = (Annotation) annoIter.next();
			if ((currAnno.getBegin() == startOffset) && (currAnno.getEnd() == endOffset)) {
				return currAnno;
			}
		}
		return null;
	}

	@SuppressWarnings("unchecked")
	public static  T getAnnotationAtMatchingOffsets(JCas aJCas, Annotation focusAnnotation,
			Class cls) {
		FSIterator cursor = aJCas.getAnnotationIndex().iterator();

		cursor.moveTo(focusAnnotation);

		if (!cursor.isValid())
			throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: "
					+ focusAnnotation);

		while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) {
			cursor.moveToPrevious();
		}
		if (!cursor.isValid())
			cursor.moveToFirst();
		else
			cursor.moveToNext();

		// Now that we have our starting point, we go to the right until we find an annotation of the correct type and
		// the same offsets as focusAnnotation
		Annotation currentAnnotation = null;
		while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) {
			if (!cls.isInstance(currentAnnotation)) {
				cursor.moveToNext();
				continue;
			}
			Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd());
			Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd());
			if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.equals(currentRange))
				return (T) currentAnnotation;
			cursor.moveToNext();
		}
		return null;
	}

	/**
	 * returns an annotation of the type fullEntityClassName which overlaps an or is overlapped by an annotation of the
	 * same type at the given offset
	 * 
	 * @param aJCas
	 *            The cas to search in
	 * @param ullAnnotationClassName
	 *            The full class name of the specific annotation type
	 * 
	 * @param startOffset
	 * @param endOffset
	 * @return The first annotation object of the given type at exactly the given offset. If no annotation is found
	 *         there, NULL is returned
	 */
	public static Annotation getOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName, int startOffset,
			int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
			NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {

		Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
		JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
		Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
		while (annoIter.hasNext()) {
			Annotation currAnno = (Annotation) annoIter.next();

			if ((currAnno.getBegin() <= startOffset) && (currAnno.getEnd() >= endOffset)) {
				return currAnno;
			} else if ((currAnno.getBegin() >= startOffset) && (currAnno.getEnd() <= endOffset)) {
				return currAnno;
			}
			//
			else if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) {
				return currAnno;
			} else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) {
				return currAnno;
			}
		}
		return null;
	}

	/**
	 * returns an annotation of the type fullEntityClassName which partially overlaps an or is overlapped by an
	 * annotation of the same type at the given offset
	 * 
	 * @param aJCas
	 *            The cas to search in
	 * @param fullAnnotationClassName
	 *            The full class name of the specific annotation type
	 * 
	 * @param startOffset
	 * @param endOffset
	 * @return The first annotation object of the given type at exactly the given offset. If no annotation is found
	 *         there, NULL is returned
	 */
	public static Annotation getPartiallyOverlappingAnnotation(JCas aJCas, String fullAnnotationClassName,
			int startOffset, int endOffset) throws SecurityException, IllegalArgumentException, ClassNotFoundException,
			NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {

		Annotation anno = getAnnotationByClassName(aJCas, fullAnnotationClassName);
		JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
		Iterator annoIter = indexes.getAnnotationIndex(anno.getTypeIndexID()).iterator();
		while (annoIter.hasNext()) {
			Annotation currAnno = (Annotation) annoIter.next();

			if ((currAnno.getBegin() < endOffset) && (currAnno.getEnd() > endOffset)) {
				return currAnno;
			} else if ((currAnno.getBegin() < startOffset) && (currAnno.getEnd() > startOffset)) {
				return currAnno;
			}
		}
		return null;
	}

	/**
	 * Returns the leftmost annotation of type cls that overlaps focusAnnotation. That is, if multiple
	 * annotations of type cls overlap with focusAnnotation, the one with the lowest begin offset will
	 * be chosen.
	 * 

* The two annotations may overlap in any way (partial, nested, inclusion, exact match). This algorithm has * O(n) runtime with n being the number of annotations in the annotation index. *

* * *

* TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a * number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to * focusAnnotation and not always from the very beginning of the annotation index. Same thing for * getIncludingAnnotation(). *

* * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type cls that overlaps focusAnnotation. */ @SuppressWarnings("unchecked") public static T getPartiallyOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); // Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the // beginning. cursor.moveToFirst(); // Now go to the right as long as we don't yet overlap with the focus annotation, then stop. Annotation currentAnnotation = null; while (cursor.isValid() && ((currentAnnotation = cursor.get()).getEnd() <= focusAnnotation.getBegin() || !cls .isInstance(currentAnnotation))) { cursor.moveToNext(); } // Check whether we have found an overlapping annotation. Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.isOverlappedBy(focusRange)) return (T) cursor.get(); return null; } /** * Returns, in ascending order, all annotations of type cls that are completely included - perhaps with * having the same begin and/or end as the focusAnnotation - in focusAnnotation. * * @param aJCas * @param focusAnnotation * @param cls * @return */ @SuppressWarnings("unchecked") public static List getIncludedAnnotations(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a lower begin offset compared // to the focusAnnotation. while (cursor.isValid() && cursor.get().getBegin() >= focusAnnotation.getBegin()) { cursor.moveToPrevious(); } if (!cursor.isValid()) cursor.moveToFirst(); else cursor.moveToNext(); // Now that we have our starting point, we go to the right as long as there is a possibility to still find // annotations included in the focusAnnotation, i.e. as long the current begin offset is still lower (or equal // for the weird case of zero-length-annotations) than the // end offset of the focusAnnotation Annotation currentAnnotation = null; List includedAnnotations = new ArrayList<>(); while (cursor.isValid() && (currentAnnotation = cursor.get()).getBegin() <= focusAnnotation.getEnd()) { if (!cls.isInstance(currentAnnotation)) { cursor.moveToNext(); continue; } Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && focusRange.containsRange(currentRange)) includedAnnotations.add((T) currentAnnotation); cursor.moveToNext(); } return includedAnnotations; } /** * Returns the leftmost annotation of type cls that completely includes focusAnnotation. That is, * if multiple annotations of type cls include focusAnnotation, the one with the lowest begin * offset will be chosen. *

* This algorithm has O(n) runtime with n being the number of annotations in the annotation index. *

*

* TODO: A start offset parameter could be introduced from where to start looking. This way, when iterating over a * number of different focusAnnotations in ascending order, one would have only to check from focusAnnotation to * focusAnnotation and not always from the very beginning of the annotation index. Same thing for * getPartiallyOverlappingAnnotation(). *

* * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type cls that completely includes focusAnnotation. */ @SuppressWarnings("unchecked") public static T getIncludingAnnotation(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); // Annotations are sorted by begin offset and may be arbitrarily long. Thus we just have to start from the // beginning. cursor.moveToFirst(); // Now go to the right as long as we don't yet overlap with the focus annotation, then stop. Annotation currentAnnotation = null; while (cursor.isValid() && ((currentAnnotation = cursor.get()).getEnd() < focusAnnotation.getEnd() || !cls .isInstance(currentAnnotation))) { cursor.moveToNext(); } // Check whether we have found an overlapping annotation. Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange)) return (T) cursor.get(); return null; } /** * Returns the nearest annotation of class cls to focusAnnotation, i.e. the one (or just one, if * multiple exist) with the highest start-offset that completely overlaps focusAnnotation. *

* This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding * the nearest token or sentence. *

* * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type cls that completely includes focusAnnotation. */ @SuppressWarnings("unchecked") public static T getNearestIncludingAnnotation(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); if (!cursor.isValid()) throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation. Afterwards we we search for an including annotation to the left. cursor.moveTo(focusAnnotation); while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getBegin()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveTo(focusAnnotation); else cursor.moveToPrevious(); // Now that we have our starting point, we go to the left until we find the first annotation of correct type // completely overlapping the focus annotation. while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } Range currentRange = Range.between(currentAnnotation.getBegin(), currentAnnotation.getEnd()); Range focusRange = Range.between(focusAnnotation.getBegin(), focusAnnotation.getEnd()); if (cursor.isValid() && cls.isInstance(currentAnnotation) && currentRange.containsRange(focusRange)) return (T) currentAnnotation; cursor.moveToPrevious(); } return null; } /** * Returns the nearest annotations of class cls to focusAnnotation, i.e. all annotations * overlapping focusAnnotation where between the leftest returned annotation and the next returned * annotation (that is, returned offsets 0 and 1, if there are multiple returned annotations) there is no other * annotation of type cls. *

* This method has nice performance properties when it is known that the annotation looked for is near, e.g. finding * overlapping tokens. *

* * @param aJCas * @param focusAnnotation * @param cls * @return the leftmost annotation of type cls that completely includes focusAnnotation. */ @SuppressWarnings("unchecked") public static List getNearestOverlappingAnnotations(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right. while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveToLast(); else cursor.moveToPrevious(); List overlappingAnnotations = new ArrayList<>(); while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd() && currentAnnotation.getEnd() > focusAnnotation.getBegin()) { overlappingAnnotations.add((T) currentAnnotation); // As soon as we have an overlapping annotation of the correct type that begins at or before the begin // offset of the // focusAnnotation, we are finished. if (currentAnnotation.getBegin() < focusAnnotation.getBegin()) { Collections.reverse(overlappingAnnotations); return overlappingAnnotations; } } cursor.moveToPrevious(); } // Order by ascending begin offsets. Collections.reverse(overlappingAnnotations); return overlappingAnnotations; } /** * Returns the annotation with the highest end offset of type cls overlapping focusAnnotation. *

* This method is very similar to {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)}. Actually, the * last result element of {@link #getNearestOverlappingAnnotations(JCas, Annotation, Class)} equals the returned * annotation from this method. *

* * @param aJCas * @param focusAnnotation * @param cls * @return */ @SuppressWarnings("unchecked") public static T getLastOverlappingAnnotation(JCas aJCas, Annotation focusAnnotation, Class cls) { FSIterator cursor = aJCas.getAnnotationIndex().iterator(); // for debugging: print out absolutely all annotations // cursor.moveToFirst(); // while (cursor.isValid()) { // System.out.println(cursor.get()); // cursor.moveToNext(); // } cursor.moveTo(focusAnnotation); if (!cursor.isValid()) throw new IllegalArgumentException("Given FocusAnnotation was not found in the JCas' annotation index: " + focusAnnotation); // The annotations are sorted by begin offset. So go to the first annotation with a larger begin offset compared // to the focusAnnotation's end offset since then there won't be any more overlapping annotations to the right. while (cursor.isValid() && cursor.get().getBegin() <= focusAnnotation.getEnd()) { cursor.moveToNext(); } if (!cursor.isValid()) cursor.moveToLast(); else cursor.moveToPrevious(); while (cursor.isValid()) { Annotation currentAnnotation = cursor.get(); if (!cls.isInstance(currentAnnotation)) { cursor.moveToPrevious(); continue; } if (cursor.isValid() && currentAnnotation.getBegin() < focusAnnotation.getEnd() && currentAnnotation.getEnd() > focusAnnotation.getBegin()) { return (T) currentAnnotation; } cursor.moveToPrevious(); } return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy