de.citec.scie.util.CachedJCasUtil Maven / Gradle / Ivy
/*
* SCIE -- Spinal Cord Injury Information Extraction
* Copyright (C) 2013, 2014
* Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.scie.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
/**
* This is a custom wrapper implementation to reduce the necessary calls to UIMA
* JCasUtil, which seems to be terribly slow. An instance can be retrieved using
* the "getInstance" function. Note that you do not need to fill the cache
* explicitly. This is done in the background at the first query. Note that the
* cache will not be updated afterwards! This is only done if
* "update" is called.
*
* @author Benjamin Paassen - [email protected]
*/
public class CachedJCasUtil {
/*
* Right now we do not need multiple caches for any application. Thus we
* choose the mos space-efficient variant of only caching information
* for one document at a time.
*/
public static final int MAX_NUM_INSTANCES = 1;
private static final ReentrantLock cacheLock = new ReentrantLock();
private static final ArrayList cachedDocuments = new ArrayList<>();
private static final ArrayList cachedInstances = new ArrayList<>();
private final JCas jcas;
private CachedJCasUtil(JCas jcas) {
this.jcas = jcas;
}
public JCas getJcas() {
return jcas;
}
/**
* Returns the cache for the given JCas instance. Please note that this is
* only thread-safe with regard to the runtime of this function. The
* returned cache itself is not synchronized. Please ensure
* that only one annotator processes a given jcas instance at a time.
*
* @param jcas a jcas instance.
* @return the corresponding CachedJCasUtil.
*/
public static CachedJCasUtil getInstance(JCas jcas) {
cacheLock.lock();
try {
int cacheIdx = cachedDocuments.indexOf(jcas);
final CachedJCasUtil instance;
if (cacheIdx > -1) {
/*
* If we have a cache for this document, move it to the end of
* the list to indicate that it was used recently.
*/
cachedDocuments.remove(cacheIdx);
instance = cachedInstances.remove(cacheIdx);
cachedDocuments.add(jcas);
cachedInstances.add(instance);
} else {
/*
* Otherwise create a new cache.
*/
instance = new CachedJCasUtil(jcas);
cachedDocuments.add(jcas);
cachedInstances.add(instance);
/*
* If we have now more caches than we want we remove the cache
* that was used longest ago.
*/
if (cachedDocuments.size() > MAX_NUM_INSTANCES) {
cachedDocuments.remove(0);
cachedInstances.remove(0);
}
}
return instance;
} finally {
cacheLock.unlock();
}
}
/**
* We store a Tree-based cache for each class that is queried.
*
* Each cache stores the Annotations for a given gives in order of their
* starting position.
*/
private final HashMap, AnnotationCache> caches
= new HashMap<>();
/**
* Manually adds the given instance to the respective cache.
*
* Please note that this constructs the cache if it is not there yet. During
* construction all Annotions of the given class will be added to the cache.
* If the given Annotation is part of the UIMA indices already, it will thus
* be added twice.
*
* In short: Please do only use this method if you know, what you are doing.
* Otherwise use "update" after you have added new Annotations.
*
* @param
* @param annotationInstance
*/
public void add(X annotationInstance) {
add((Class) annotationInstance.getClass(), annotationInstance);
}
/**
* Manually adds the given instance to the respective cache.
*
* Please note that this constructs the cache if it is not there yet. During
* construction all Annotions of the given class will be added to the cache.
* If the given Annotation is part of the UIMA indices already, it will thus
* be added twice.
*
* In short: Please do only use this method if you know, what you are doing.
* Otherwise use "update" after you have added new Annotations.
*
* @param
* @param annotationInstance
*/
public void add(Class annotationClass, X annotationInstance) {
//get the cache
final AnnotationCache cache = getCache(annotationClass);
cache.add(annotationInstance);
}
public boolean remove(Class annotationClass, X annotationInstance) {
//get the cache
final AnnotationCache cache = getCache(annotationClass);
return cache.remove(annotationInstance);
}
/**
* Updates the cache for the given class and inserts all new instances of
* the given class.
*
* @param
* @param annotationClass
*/
public void update(Class annotationClass) {
final Collection annoInstances = JCasUtil.select(jcas, annotationClass);
final AnnotationCache cache = new AnnotationCache<>();
for (final X anno : annoInstances) {
cache.add(anno);
}
caches.put(annotationClass, cache);
}
/**
* Returns all annotations of the given class.
*
* The returned list has a well defined order: The output Annotations are
* first ordered according to their begin, than according to their end
* index.
*/
public List select(Class annotationClass) {
final AnnotationCache cache = getCache(annotationClass);
return cache.getAll();
}
/**
* Returns all annotations of the given class that are contained in the text
* region specified by the given begin and end (inclusive).
*
* The returned list has a well defined order: The output Annotations are
* first ordered according to their begin, than according to their end
* index.
*/
public List selectCovered(Class annotationClass, int begin, int end) {
final AnnotationCache cache = getCache(annotationClass);
return cache.getCovered(begin, end);
}
/**
* Returns all annotations of the given class that contain at least the text
* region specified by the given begin and end (inclusive).
*
* The returned list has a well defined order: The output Annotations are
* first ordered according to their begin, than according to their end
* index.
*/
public List selectCovering(Class annotationClass, int begin,
int end) {
final AnnotationCache cache = getCache(annotationClass);
return cache.getCovering(begin, end);
}
/**
* Returns all annotations of the given class that are overlapping with the
* text region specified by the given begin and end (inclusive).
*
* The returned list has a well defined order: The output Annotations are
* first ordered according to their begin, than according to their end
* index.
*/
public List selectOverlapping(Class annotationClass, int begin,
int end) {
final AnnotationCache cache = getCache(annotationClass);
return cache.getOverlapping(begin, end);
}
/**
* This constructs a cache for the given class if necessary or returns it if
* it is already cached.
*/
private AnnotationCache getCache(Class annotationClass) {
AnnotationCache cache = caches.get(annotationClass);
if (cache == null) {
final Collection annoInstances = JCasUtil.select(jcas, annotationClass);
cache = new AnnotationCache<>();
for (final X anno : annoInstances) {
cache.add(anno);
}
caches.put(annotationClass, cache);
}
return cache;
}
private static class AnnotationCache {
/**
* We store the annotations themselves in a TreeMap indiced by their
* start position. For the case of overlapping annotations we have an
* additional hierarchy level of a TreeMap that stores the Annotations
* with the same start positions indiced according to their end
* position. Unfortunately, even that is not enough, because several
* annotations of the same class can exist for the same begin and end
* index. Thus we need a LinkedList at that last level of hierarchy.
*/
private final TreeMap>> actualCashe
= new TreeMap<>();
/**
* For the special case of overlapping annotations however we also need
* to now how many annotations we need to check in linear time to ensure
* that given criteria are met.
*/
private int maxLength = 0;
public AnnotationCache() {
}
public void add(X anno) {
final int length = anno.getEnd() - anno.getBegin();
if (length > maxLength) {
maxLength = length;
}
TreeMap> overlappingAnnos = actualCashe.get(anno.getBegin());
if (overlappingAnnos == null) {
overlappingAnnos = new TreeMap<>();
actualCashe.put(anno.getBegin(), overlappingAnnos);
}
LinkedList samePosAnnos = overlappingAnnos.get(anno.getEnd());
if (samePosAnnos == null) {
samePosAnnos = new LinkedList<>();
overlappingAnnos.put(anno.getEnd(), samePosAnnos);
}
samePosAnnos.add(anno);
}
public boolean remove(X anno) {
TreeMap> overlappingAnnos = actualCashe.get(anno.getBegin());
if (overlappingAnnos == null) {
return false;
}
LinkedList samePosAnnos = overlappingAnnos.get(anno.getEnd());
if (samePosAnnos == null) {
return false;
}
return samePosAnnos.remove(anno);
}
/**
* Returns all Annotations. The returned list has a well defined order:
* The output Annotations are first ordered according to their begin,
* than according to their end index.
*
* @return
*/
public List getAll() {
final ArrayList returnList = new ArrayList<>(actualCashe.size());
for (TreeMap> overlappingAnnos : actualCashe.values()) {
for (LinkedList samePosAnnos : overlappingAnnos.values()) {
returnList.addAll(samePosAnnos);
}
}
return returnList;
}
/**
* Returns all annotations stored in this cache that start before (or
* at) the start index and end after (or at) the end index.
*
* The returned list has a well defined order: The output Annotations
* are first ordered according to their begin, than according to their
* end index.
*
* @param start
* @param end
* @return
*/
public List getCovering(int start, int end) {
final ArrayList returnList = new ArrayList<>();
final int lowerKey = end - maxLength;
if (lowerKey > start) {
return returnList;
}
/*
* We only care about the SubMap of Annotations that begin from
*
* end-maxLength
*
* to
*
* start
*
*/
final NavigableMap>> subMap
= actualCashe.subMap(lowerKey, true, start, true);
//for that subMap we check if the given criteria are met.
for (final TreeMap> overlappingAnnos : subMap.values()) {
//get only the annotations that end at or after the specified end.
final NavigableMap> samePosAnnoMap = overlappingAnnos.
tailMap(end, true);
//and add them all.
for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
returnList.addAll(samePosAnnos);
}
}
return returnList;
}
/**
* Returns all annotations stored in this cache that start after (or at)
* the start index and end before (or at) the end index.
*
* The returned list has a well defined order: The output Annotations
* are first ordered according to their begin, than according to their
* end index.
*
* @param start
* @param end
* @return
*/
public List getCovered(int start, int end) {
/*
* We only care about the SubMap of Annotations that begin from
*
* begin
*
* to
*
* end
*/
final NavigableMap>> subMap
= actualCashe.subMap(start, true, end, true);
final ArrayList returnList = new ArrayList<>();
//for that subMap we check if the given criteria are met.
for (final TreeMap> overlappingAnnos : subMap.values()) {
//get only the annotations that end before or at the specified end.
final NavigableMap> samePosAnnoMap = overlappingAnnos.
headMap(end, true);
//and add them all.
for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
returnList.addAll(samePosAnnos);
}
}
return returnList;
}
/**
* Returns all annotations stored in this cache that start before (or
* at) the end index and end after (or at) the start index.
*
* The returned list has a well defined order: The output Annotations
* are first ordered according to their begin, than according to their
* end index.
*
* @param start
* @param end
* @return
*/
public List getOverlapping(int start, int end) {
final ArrayList returnList = new ArrayList<>();
final int lowerKey = end - maxLength;
if (lowerKey > start) {
return returnList;
}
/*
* We only care about the SubMap of Annotations that begin from
*
* end-maxLength
*
* to
*
* end
*
*/
final NavigableMap>> subMap
= actualCashe.subMap(lowerKey, true, end, true);
//for that subMap we check if the given criteria are met.
for (final TreeMap> overlappingAnnos : subMap.values()) {
//get only the annotations that end at or after the specified end.
final NavigableMap> samePosAnnoMap = overlappingAnnos.
tailMap(start, true);
//and add them all.
for (LinkedList samePosAnnos : samePosAnnoMap.values()) {
returnList.addAll(samePosAnnos);
}
}
return returnList;
}
public List getFollowing(int index, int count) {
//get the sub map following the given index.
final NavigableMap>> tailMap
= actualCashe.tailMap(index, false);
final ArrayList returnList = new ArrayList<>();
for (final TreeMap> overlappingAnnos : tailMap.values()) {
for (final LinkedList samePosAnnos : overlappingAnnos.values()) {
for (final X anno : samePosAnnos) {
if (returnList.size() == count) {
return returnList;
}
returnList.add(anno);
}
}
}
return returnList;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy