All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.util.CachedJCasUtil Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;

/**
 * This is a custom wrapper implementation to reduce the necessary calls to UIMA
 * JCasUtil, which seems to be terribly slow. An instance can be retrieved using
 * the "getInstance" function. Note that you do not need to fill the cache
 * explicitly. This is done in the background at the first query. Note that the
 * cache will not be updated afterwards! This is only done if
 * "update" is called.
 *
 * @author Benjamin Paassen - [email protected]
 */
public class CachedJCasUtil {

	/*
	 * Right now we do not need multiple caches for any application. Thus we
	 * choose the mos space-efficient variant of only caching information
	 * for one document at a time.
	 */
	public static final int MAX_NUM_INSTANCES = 1;

	private static final ReentrantLock cacheLock = new ReentrantLock();
	private static final ArrayList cachedDocuments = new ArrayList<>();
	private static final ArrayList cachedInstances = new ArrayList<>();

	private final JCas jcas;

	private CachedJCasUtil(JCas jcas) {
		this.jcas = jcas;
	}

	public JCas getJcas() {
		return jcas;
	}

	/**
	 * Returns the cache for the given JCas instance. Please note that this is
	 * only thread-safe with regard to the runtime of this function. The
	 * returned cache itself is not synchronized. Please ensure
	 * that only one annotator processes a given jcas instance at a time.
	 *
	 * @param jcas a jcas instance.
	 * @return the corresponding CachedJCasUtil.
	 */
	public static CachedJCasUtil getInstance(JCas jcas) {
		cacheLock.lock();
		try {
			int cacheIdx = cachedDocuments.indexOf(jcas);
			final CachedJCasUtil instance;
			if (cacheIdx > -1) {
				/*
				 * If we have a cache for this document, move it to the end of
				 * the list to indicate that it was used recently.
				 */
				cachedDocuments.remove(cacheIdx);
				instance = cachedInstances.remove(cacheIdx);
				cachedDocuments.add(jcas);
				cachedInstances.add(instance);
			} else {
				/*
				 * Otherwise create a new cache.
				 */
				instance = new CachedJCasUtil(jcas);
				cachedDocuments.add(jcas);
				cachedInstances.add(instance);
				/*
				 * If we have now more caches than we want we remove the cache
				 * that was used longest ago.
				 */
				if (cachedDocuments.size() > MAX_NUM_INSTANCES) {
					cachedDocuments.remove(0);
					cachedInstances.remove(0);
				}
			}
			return instance;
		} finally {
			cacheLock.unlock();
		}
	}
	/**
	 * We store a Tree-based cache for each class that is queried.
	 *
	 * Each cache stores the Annotations for a given gives in order of their
	 * starting position.
	 */
	private final HashMap, AnnotationCache> caches
			= new HashMap<>();

	/**
	 * Manually adds the given instance to the respective cache.
	 *
	 * Please note that this constructs the cache if it is not there yet. During
	 * construction all Annotions of the given class will be added to the cache.
	 * If the given Annotation is part of the UIMA indices already, it will thus
	 * be added twice.
	 *
	 * In short: Please do only use this method if you know, what you are doing.
	 * Otherwise use "update" after you have added new Annotations.
	 *
	 * @param 
	 * @param annotationInstance
	 */
	public  void add(X annotationInstance) {
		add((Class) annotationInstance.getClass(), annotationInstance);
	}

	/**
	 * Manually adds the given instance to the respective cache.
	 *
	 * Please note that this constructs the cache if it is not there yet. During
	 * construction all Annotions of the given class will be added to the cache.
	 * If the given Annotation is part of the UIMA indices already, it will thus
	 * be added twice.
	 *
	 * In short: Please do only use this method if you know, what you are doing.
	 * Otherwise use "update" after you have added new Annotations.
	 *
	 * @param 
	 * @param annotationInstance
	 */
	public  void add(Class annotationClass, X annotationInstance) {
		//get the cache
		final AnnotationCache cache = getCache(annotationClass);
		cache.add(annotationInstance);
	}

	public  boolean remove(Class annotationClass, X annotationInstance) {
		//get the cache
		final AnnotationCache cache = getCache(annotationClass);
		return cache.remove(annotationInstance);
	}

	/**
	 * Updates the cache for the given class and inserts all new instances of
	 * the given class.
	 *
	 * @param 
	 * @param annotationClass
	 */
	public  void update(Class annotationClass) {
		final Collection annoInstances = JCasUtil.select(jcas, annotationClass);
		final AnnotationCache cache = new AnnotationCache<>();
		for (final X anno : annoInstances) {
			cache.add(anno);
		}
		caches.put(annotationClass, cache);
	}

	/**
	 * Returns all annotations of the given class.
	 *
	 * The returned list has a well defined order: The output Annotations are
	 * first ordered according to their begin, than according to their end
	 * index.
	 */
	public  List select(Class annotationClass) {
		final AnnotationCache cache = getCache(annotationClass);
		return cache.getAll();
	}

	/**
	 * Returns all annotations of the given class that are contained in the text
	 * region specified by the given begin and end (inclusive).
	 *
	 * The returned list has a well defined order: The output Annotations are
	 * first ordered according to their begin, than according to their end
	 * index.
	 */
	public  List selectCovered(Class annotationClass, int begin, int end) {
		final AnnotationCache cache = getCache(annotationClass);
		return cache.getCovered(begin, end);
	}

	/**
	 * Returns all annotations of the given class that contain at least the text
	 * region specified by the given begin and end (inclusive).
	 *
	 * The returned list has a well defined order: The output Annotations are
	 * first ordered according to their begin, than according to their end
	 * index.
	 */
	public  List selectCovering(Class annotationClass, int begin,
			int end) {
		final AnnotationCache cache = getCache(annotationClass);
		return cache.getCovering(begin, end);
	}

	/**
	 * Returns all annotations of the given class that are overlapping with the
	 * text region specified by the given begin and end (inclusive).
	 *
	 * The returned list has a well defined order: The output Annotations are
	 * first ordered according to their begin, than according to their end
	 * index.
	 */
	public  List selectOverlapping(Class annotationClass, int begin,
			int end) {
		final AnnotationCache cache = getCache(annotationClass);
		return cache.getOverlapping(begin, end);
	}

	/**
	 * This constructs a cache for the given class if necessary or returns it if
	 * it is already cached.
	 */
	private  AnnotationCache getCache(Class annotationClass) {
		AnnotationCache cache = caches.get(annotationClass);
		if (cache == null) {
			final Collection annoInstances = JCasUtil.select(jcas, annotationClass);
			cache = new AnnotationCache<>();
			for (final X anno : annoInstances) {
				cache.add(anno);
			}
			caches.put(annotationClass, cache);
		}
		return cache;
	}

	private static class AnnotationCache {

		/**
		 * We store the annotations themselves in a TreeMap indiced by their
		 * start position. For the case of overlapping annotations we have an
		 * additional hierarchy level of a TreeMap that stores the Annotations
		 * with the same start positions indiced according to their end
		 * position. Unfortunately, even that is not enough, because several
		 * annotations of the same class can exist for the same begin and end
		 * index. Thus we need a LinkedList at that last level of hierarchy.
		 */
		private final TreeMap>> actualCashe
				= new TreeMap<>();
		/**
		 * For the special case of overlapping annotations however we also need
		 * to now how many annotations we need to check in linear time to ensure
		 * that given criteria are met.
		 */
		private int maxLength = 0;

		public AnnotationCache() {
		}

		public void add(X anno) {
			final int length = anno.getEnd() - anno.getBegin();
			if (length > maxLength) {
				maxLength = length;
			}
			TreeMap> overlappingAnnos = actualCashe.get(anno.getBegin());
			if (overlappingAnnos == null) {
				overlappingAnnos = new TreeMap<>();
				actualCashe.put(anno.getBegin(), overlappingAnnos);
			}
			LinkedList samePosAnnos = overlappingAnnos.get(anno.getEnd());
			if (samePosAnnos == null) {
				samePosAnnos = new LinkedList<>();
				overlappingAnnos.put(anno.getEnd(), samePosAnnos);
			}

			samePosAnnos.add(anno);
		}

		public boolean remove(X anno) {
			TreeMap> overlappingAnnos = actualCashe.get(anno.getBegin());
			if (overlappingAnnos == null) {
				return false;
			}
			LinkedList samePosAnnos = overlappingAnnos.get(anno.getEnd());
			if (samePosAnnos == null) {
				return false;
			}
			return samePosAnnos.remove(anno);
		}

		/**
		 * Returns all Annotations. The returned list has a well defined order:
		 * The output Annotations are first ordered according to their begin,
		 * than according to their end index.
		 *
		 * @return
		 */
		public List getAll() {
			final ArrayList returnList = new ArrayList<>(actualCashe.size());
			for (TreeMap> overlappingAnnos : actualCashe.values()) {
				for (LinkedList samePosAnnos : overlappingAnnos.values()) {
					returnList.addAll(samePosAnnos);
				}
			}
			return returnList;
		}

		/**
		 * Returns all annotations stored in this cache that start before (or
		 * at) the start index and end after (or at) the end index.
		 *
		 * The returned list has a well defined order: The output Annotations
		 * are first ordered according to their begin, than according to their
		 * end index.
		 *
		 * @param start
		 * @param end
		 * @return
		 */
		public List getCovering(int start, int end) {
			final ArrayList returnList = new ArrayList<>();
			final int lowerKey = end - maxLength;
			if (lowerKey > start) {
				return returnList;
			}
			/*
			 * We only care about the SubMap of Annotations that begin from
			 *
			 * end-maxLength
			 *
			 * to
			 *
			 * start
			 *
			 */
			final NavigableMap>> subMap
					= actualCashe.subMap(lowerKey, true, start, true);
			//for that subMap we check if the given criteria are met.
			for (final TreeMap> overlappingAnnos : subMap.values()) {
				//get only the annotations that end at or after the specified end.
				final NavigableMap> samePosAnnoMap = overlappingAnnos.
						tailMap(end, true);
				//and add them all.
				for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
					returnList.addAll(samePosAnnos);
				}
			}
			return returnList;
		}

		/**
		 * Returns all annotations stored in this cache that start after (or at)
		 * the start index and end before (or at) the end index.
		 *
		 * The returned list has a well defined order: The output Annotations
		 * are first ordered according to their begin, than according to their
		 * end index.
		 *
		 * @param start
		 * @param end
		 * @return
		 */
		public List getCovered(int start, int end) {
			/*
			 * We only care about the SubMap of Annotations that begin from
			 *
			 * begin
			 *
			 * to
			 *
			 * end
			 */
			final NavigableMap>> subMap
					= actualCashe.subMap(start, true, end, true);
			final ArrayList returnList = new ArrayList<>();
			//for that subMap we check if the given criteria are met.
			for (final TreeMap> overlappingAnnos : subMap.values()) {
				//get only the annotations that end before or at the specified end.
				final NavigableMap> samePosAnnoMap = overlappingAnnos.
						headMap(end, true);
				//and add them all.
				for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
					returnList.addAll(samePosAnnos);
				}
			}
			return returnList;
		}

		/**
		 * Returns all annotations stored in this cache that start before (or
		 * at) the end index and end after (or at) the start index.
		 *
		 * The returned list has a well defined order: The output Annotations
		 * are first ordered according to their begin, than according to their
		 * end index.
		 *
		 * @param start
		 * @param end
		 * @return
		 */
		public List getOverlapping(int start, int end) {
			final ArrayList returnList = new ArrayList<>();
			final int lowerKey = end - maxLength;
			if (lowerKey > start) {
				return returnList;
			}
			/*
			 * We only care about the SubMap of Annotations that begin from
			 *
			 * end-maxLength
			 *
			 * to
			 *
			 * end
			 *
			 */
			final NavigableMap>> subMap
					= actualCashe.subMap(lowerKey, true, end, true);
			//for that subMap we check if the given criteria are met.
			for (final TreeMap> overlappingAnnos : subMap.values()) {
				//get only the annotations that end at or after the specified end.
				final NavigableMap> samePosAnnoMap = overlappingAnnos.
						tailMap(start, true);
				//and add them all.
				for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
					returnList.addAll(samePosAnnos);
				}
			}
			return returnList;
		}

		public List getFollowing(int index, int count) {
			//get the sub map following the given index.
			final NavigableMap>> tailMap
					= actualCashe.tailMap(index, false);
			final ArrayList returnList = new ArrayList<>();
			for (final TreeMap> overlappingAnnos : tailMap.values()) {
				for (final LinkedList samePosAnnos : overlappingAnnos.values()) {
					for (final X anno : samePosAnnos) {
						if (returnList.size() == count) {
							return returnList;
						}
						returnList.add(anno);
					}
				}
			}
			return returnList;

		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy