org.carrot2.core.CachingProcessingComponentManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset
(core algorithms and infrastructure, no document sources).
/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.core;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import org.carrot2.core.Controller.IControllerStatisticsProvider;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.Pair;
import org.carrot2.util.attribute.*;
import org.carrot2.shaded.guava.common.cache.*;
import org.carrot2.shaded.guava.common.collect.*;
import org.carrot2.shaded.guava.common.util.concurrent.UncheckedExecutionException;
/**
* An {@link IProcessingComponentManager} that implements processing results caching
* functionality.
*
* This manager wraps some delegate manager (e.g. a
* {@link SimpleProcessingComponentManager} or a {@link PoolingProcessingComponentManager}
* ) and wraps the components the delegate with a functionality that either returns the
* results from cache or performs the processing if the result are not yet cached.
*
*/
public class CachingProcessingComponentManager implements IProcessingComponentManager,
Controller.IControllerStatisticsProvider
{
/** The delegate manager that prepares the actual processing components */
final IProcessingComponentManager delegate;
/**
* Descriptors of {@link Input} and {@link Output} {@link Processing} attributes of
* components whose output is to be cached.
*/
private final Map, String>, InputOutputAttributeDescriptors> cachedComponentAttributeDescriptors = Maps
.newHashMap();
/**
* A set of {@link IProcessingComponent}s whose data should be cached internally.
*/
final Set> cachedComponentClasses;
/**
* Populates on-demand and caches the data from components of classes provided in
* {@link #cachedComponentClasses}. The key of the cache is a map of all {@link Input}
* {@link Processing} attributes of the component for which caching is performed. The
* value of the cache is a map of all {@link Output} {@link Processing} attributes
* produced by the component.
*/
private Cache> cache;
/** Cache statistics keys. */
static final String CACHE_MISSES = "cache.misses";
static final String CACHE_HITS_TOTAL = "cache.hits.total";
/**
* Creates a {@link CachingProcessingComponentManager}.
*
* @param delegate the manager to handle the preparation of the actual processing
* component instances
* @param cachedComponentClasses classes of components whose output should be cached
* by the controller. If a superclass is provided here, e.g.
* {@link IDocumentSource}, all its subclasses will be subject to caching.
* If {@link IProcessingComponent} is provided here, output of all
* components will be cached.
*/
@SafeVarargs
public CachingProcessingComponentManager(IProcessingComponentManager delegate,
Class extends IProcessingComponent>... cachedComponentClasses)
{
this.delegate = delegate;
this.cachedComponentClasses = ImmutableSet.copyOf(cachedComponentClasses);
// Initialize cache.
cache = CacheBuilder.newBuilder()
.maximumSize(100)
.recordStats()
.build();
}
public void init(IControllerContext context, Map attributes,
ProcessingComponentConfiguration... configurations)
{
delegate.init(context, attributes, configurations);
}
public IProcessingComponent prepare(Class extends IProcessingComponent> clazz,
String id, Map inputAttributes,
Map outputAttributes)
{
// If the processing component is to be cached, wrap with our internal
// processing component implementation that will do the caching.
// One very important implementation detail is that the only moment we can pass
// all input attributes (including the processing-time ones) to the component
// wrapper is here when we create it. For this reason, the controller needs to
// pass all attributes at this state, even though the other manager will likely
// use only the init-time attributes. The same goes for output attributes,
// these will be collected to the map we provide during the creation of the
// wrapper.
for (Class> cachedClass : cachedComponentClasses)
{
if (cachedClass.isAssignableFrom(clazz))
{
return new CachedProcessingComponent(clazz, id, inputAttributes,
outputAttributes);
}
}
// Otherwise, return the original component
return delegate.prepare(clazz, id, inputAttributes, outputAttributes);
}
@Override
public void recycle(IProcessingComponent component, String id)
{
// If not our wrapper, recycle.
if (!(component instanceof CachedProcessingComponent))
{
delegate.recycle(component, id);
}
// The wrapped actual components are recycled in ValueProducer when
// they're asked to perform processing.
}
public void dispose()
{
try
{
delegate.dispose();
if (cache != null)
{
cache.invalidateAll();
}
}
finally
{
cache = null;
}
}
public Map getStatistics()
{
// Return some custom statistics
final CacheStats cacheStats = cache.stats();
final Map stats = Maps.newHashMap();
if (delegate instanceof IControllerStatisticsProvider)
{
stats.putAll(((IControllerStatisticsProvider) delegate).getStatistics());
}
stats.put(CACHE_MISSES, cacheStats.missCount());
stats.put(CACHE_HITS_TOTAL, cacheStats.hitCount());
return stats;
}
// Two extra attributes to add to the input map. This way, they will also become
// part of the cache key, which is what we need.
private static final String COMPONENT_CLASS_KEY =
CachingProcessingComponentManager.class.getName() + ".componentClass";
private static final String COMPONENT_ID_KEY =
CachingProcessingComponentManager.class.getName() + ".componentId";
/** Any values put under this attribute will cause a cache bypass (dropping of the stale value). */
public static final String CACHE_BYPASS_ATTR =
CachingProcessingComponentManager.class.getName() + ".cacheBypass";
/**
* A stub component that fetches the data from the cache and adds the results to the
* attribute map.
*/
private final class CachedProcessingComponent extends ProcessingComponentBase
{
private final Class extends IProcessingComponent> componentClass;
private final String componentId;
/** All input attributes, including processing-time ones. */
private final Map inputAttributes;
/** A map to store the output attributes in. */
private final Map outputAttributes;
CachedProcessingComponent(Class extends IProcessingComponent> componentClass,
String componentId, Map inputAttributes,
Map outputAttributes)
{
this.componentClass = componentClass;
this.inputAttributes = inputAttributes;
this.outputAttributes = outputAttributes;
this.componentId = componentId;
}
@Override
public void process() throws ProcessingException
{
final InputOutputAttributeDescriptors descriptors = prepareAttributeDescriptors();
// Copy the output attributes produced by the preceding components. Normally,
// this could be done by ControllerUtils, but the wrapper was created before
// any processing took place anyway, so the inputAttributes did not have any
// results yet.
inputAttributes.putAll(outputAttributes);
// We'll need @Input @Processing attributes for the cache key
final Map inputProcessingAttributes = getAttributesForDescriptors(
descriptors.inputProcessingDescriptors, inputAttributes);
// Plus component class and id
inputProcessingAttributes.put(COMPONENT_CLASS_KEY, componentClass);
inputProcessingAttributes.put(COMPONENT_ID_KEY, componentId);
// Get data from cache. If the result is not in the cache yet, it will
// be created by the ValueProducer.
final AttributeMapCacheKey key = new AttributeMapCacheKey(
inputProcessingAttributes, inputAttributes);
// Cache bypass.
if (inputAttributes.containsKey(CACHE_BYPASS_ATTR) &&
Boolean.valueOf(inputAttributes.get(CACHE_BYPASS_ATTR).toString())) {
cache.invalidate(key);
}
try
{
final Map processingResult = cache.get(key, new ValueProducer(key));
// Copy the results @Output @Processing attributes back to the result
outputAttributes.putAll(getAttributesForDescriptors(
descriptors.outputDescriptors, processingResult));
}
catch (UncheckedExecutionException e)
{
throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause());
}
catch (ExecutionException e)
{
throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause());
}
}
/**
* Returns attribute descriptors for {@link Input} {@link Processing} and
* {@link Output} {@link Processing} attributes of the component whose results
* will be cached.
*/
private InputOutputAttributeDescriptors prepareAttributeDescriptors()
{
InputOutputAttributeDescriptors descriptors = null;
synchronized (cachedComponentAttributeDescriptors)
{
descriptors = cachedComponentAttributeDescriptors
.get(new Pair, String>(
componentClass, componentId));
if (descriptors == null)
{
// Need to borrow a component for a while to build descriptors
IProcessingComponent component = null;
try
{
component = delegate.prepare(componentClass, componentId,
inputAttributes, Maps. newHashMap());
// Build and store descriptors
descriptors = new InputOutputAttributeDescriptors(
BindableDescriptorBuilder.buildDescriptor(component)
.only(Input.class, Processing.class).flatten().attributeDescriptors,
BindableDescriptorBuilder.buildDescriptor(component)
.only(Output.class).flatten().attributeDescriptors);
cachedComponentAttributeDescriptors.put(
new Pair, String>(
componentClass, componentId), descriptors);
}
finally
{
if (component != null)
{
delegate.recycle(component, componentId);
}
}
}
}
return descriptors;
}
/**
* Returns a map with only with values corresponding to the provided descriptors.
*/
Map getAttributesForDescriptors(
final Map inputDescriptors,
Map attributes)
{
final Map attributesForDrescriptors = Maps.newHashMap();
for (AttributeDescriptor descriptor : inputDescriptors.values())
{
if (attributes.containsKey(descriptor.key))
{
attributesForDrescriptors.put(descriptor.key, attributes
.get(descriptor.key));
}
}
return attributesForDrescriptors;
}
}
/**
* A compound cache key based on the input attributes map that ensures that possible
* modifications to the attributes map or its values do not change the hashCode and
* equality behavior of the key.
*/
private static final class AttributeMapCacheKey
{
/** Input processing attributes, the key for the cache */
private Map inputProcessingAttributes;
/** Hash code for input processing attributes */
private int hashCode;
/**
* All input attributes. This map is not part of the cache key, but we will need
* it to properly retrieve entries from the cache.
*/
private Map inputAttributes;
private AttributeMapCacheKey(
Map inputProcessingAttributes,
Map inputAttributes)
{
/*
* Empty attributes should never happen because the attributes object must
* hold component identifiers, etc.
*/
assert inputProcessingAttributes != null
&& inputProcessingAttributes.size() > 0;
/*
* In theory, we could make a shallow copy of the provided map, but if someone
* wants to make modifications they'll make them anyway on the objects
* contained in the map. To be completely safe, we'd have to make a deep copy.
* To prevent simple errors, we make the map unmodifiable.
*/
this.inputProcessingAttributes = Collections.unmodifiableMap(inputProcessingAttributes);
this.hashCode = inputProcessingAttributes.hashCode();
this.inputAttributes = inputAttributes;
}
/*
* We assume that equal hash codes means equal objects, which is not true in case
* of conflicts, but there is no other way really if we don't want to make deep
* copies of the attribute map. If a conflict occurs, we would retrieve a stale
* result from the cache (a result associated with a different query, possibly a
* different component even). The cache is in-memory only and is rather small (so
* that re-querying for documents and clusters does not cause duplicated
* processing), conflicts do not seem like a big problem.
*/
@Override
public boolean equals(Object obj)
{
if (!(obj instanceof AttributeMapCacheKey))
{
return false;
}
final boolean result = (obj.hashCode() == this.hashCode);
if (result)
{
assert ((AttributeMapCacheKey) obj).inputProcessingAttributes
.equals(this.inputProcessingAttributes);
}
return result;
}
@Override
public int hashCode()
{
return hashCode;
}
}
/**
* A cached data factory that actually performs the processing. This factory is called
* only if the cache does not contain the requested value.
*/
private final class ValueProducer
implements Callable