All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.core.CachingProcessingComponentManager Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2019, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;

import org.carrot2.core.Controller.IControllerStatisticsProvider;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.Pair;
import org.carrot2.util.attribute.*;

import org.carrot2.shaded.guava.common.cache.*;
import org.carrot2.shaded.guava.common.collect.*;
import org.carrot2.shaded.guava.common.util.concurrent.UncheckedExecutionException;

/**
 * An {@link IProcessingComponentManager} that implements processing results caching
 * functionality.
 * 

* This manager wraps some delegate manager (e.g. a * {@link SimpleProcessingComponentManager} or a {@link PoolingProcessingComponentManager} * ) and wraps the components the delegate with a functionality that either returns the * results from cache or performs the processing if the result are not yet cached. *

*/ public class CachingProcessingComponentManager implements IProcessingComponentManager, Controller.IControllerStatisticsProvider { /** The delegate manager that prepares the actual processing components */ final IProcessingComponentManager delegate; /** * Descriptors of {@link Input} and {@link Output} {@link Processing} attributes of * components whose output is to be cached. */ private final Map, String>, InputOutputAttributeDescriptors> cachedComponentAttributeDescriptors = Maps .newHashMap(); /** * A set of {@link IProcessingComponent}s whose data should be cached internally. */ final Set> cachedComponentClasses; /** * Populates on-demand and caches the data from components of classes provided in * {@link #cachedComponentClasses}. The key of the cache is a map of all {@link Input} * {@link Processing} attributes of the component for which caching is performed. The * value of the cache is a map of all {@link Output} {@link Processing} attributes * produced by the component. */ private Cache> cache; /** Cache statistics keys. */ static final String CACHE_MISSES = "cache.misses"; static final String CACHE_HITS_TOTAL = "cache.hits.total"; /** * Creates a {@link CachingProcessingComponentManager}. * * @param delegate the manager to handle the preparation of the actual processing * component instances * @param cachedComponentClasses classes of components whose output should be cached * by the controller. If a superclass is provided here, e.g. * {@link IDocumentSource}, all its subclasses will be subject to caching. * If {@link IProcessingComponent} is provided here, output of all * components will be cached. */ @SafeVarargs public CachingProcessingComponentManager(IProcessingComponentManager delegate, Class... cachedComponentClasses) { this.delegate = delegate; this.cachedComponentClasses = ImmutableSet.copyOf(cachedComponentClasses); // Initialize cache. cache = CacheBuilder.newBuilder() .maximumSize(100) .recordStats() .build(); } public void init(IControllerContext context, Map attributes, ProcessingComponentConfiguration... configurations) { delegate.init(context, attributes, configurations); } public IProcessingComponent prepare(Class clazz, String id, Map inputAttributes, Map outputAttributes) { // If the processing component is to be cached, wrap with our internal // processing component implementation that will do the caching. // One very important implementation detail is that the only moment we can pass // all input attributes (including the processing-time ones) to the component // wrapper is here when we create it. For this reason, the controller needs to // pass all attributes at this state, even though the other manager will likely // use only the init-time attributes. The same goes for output attributes, // these will be collected to the map we provide during the creation of the // wrapper. for (Class cachedClass : cachedComponentClasses) { if (cachedClass.isAssignableFrom(clazz)) { return new CachedProcessingComponent(clazz, id, inputAttributes, outputAttributes); } } // Otherwise, return the original component return delegate.prepare(clazz, id, inputAttributes, outputAttributes); } @Override public void recycle(IProcessingComponent component, String id) { // If not our wrapper, recycle. if (!(component instanceof CachedProcessingComponent)) { delegate.recycle(component, id); } // The wrapped actual components are recycled in ValueProducer when // they're asked to perform processing. } public void dispose() { try { delegate.dispose(); if (cache != null) { cache.invalidateAll(); } } finally { cache = null; } } public Map getStatistics() { // Return some custom statistics final CacheStats cacheStats = cache.stats(); final Map stats = Maps.newHashMap(); if (delegate instanceof IControllerStatisticsProvider) { stats.putAll(((IControllerStatisticsProvider) delegate).getStatistics()); } stats.put(CACHE_MISSES, cacheStats.missCount()); stats.put(CACHE_HITS_TOTAL, cacheStats.hitCount()); return stats; } // Two extra attributes to add to the input map. This way, they will also become // part of the cache key, which is what we need. private static final String COMPONENT_CLASS_KEY = CachingProcessingComponentManager.class.getName() + ".componentClass"; private static final String COMPONENT_ID_KEY = CachingProcessingComponentManager.class.getName() + ".componentId"; /** Any values put under this attribute will cause a cache bypass (dropping of the stale value). */ public static final String CACHE_BYPASS_ATTR = CachingProcessingComponentManager.class.getName() + ".cacheBypass"; /** * A stub component that fetches the data from the cache and adds the results to the * attribute map. */ private final class CachedProcessingComponent extends ProcessingComponentBase { private final Class componentClass; private final String componentId; /** All input attributes, including processing-time ones. */ private final Map inputAttributes; /** A map to store the output attributes in. */ private final Map outputAttributes; CachedProcessingComponent(Class componentClass, String componentId, Map inputAttributes, Map outputAttributes) { this.componentClass = componentClass; this.inputAttributes = inputAttributes; this.outputAttributes = outputAttributes; this.componentId = componentId; } @Override public void process() throws ProcessingException { final InputOutputAttributeDescriptors descriptors = prepareAttributeDescriptors(); // Copy the output attributes produced by the preceding components. Normally, // this could be done by ControllerUtils, but the wrapper was created before // any processing took place anyway, so the inputAttributes did not have any // results yet. inputAttributes.putAll(outputAttributes); // We'll need @Input @Processing attributes for the cache key final Map inputProcessingAttributes = getAttributesForDescriptors( descriptors.inputProcessingDescriptors, inputAttributes); // Plus component class and id inputProcessingAttributes.put(COMPONENT_CLASS_KEY, componentClass); inputProcessingAttributes.put(COMPONENT_ID_KEY, componentId); // Get data from cache. If the result is not in the cache yet, it will // be created by the ValueProducer. final AttributeMapCacheKey key = new AttributeMapCacheKey( inputProcessingAttributes, inputAttributes); // Cache bypass. if (inputAttributes.containsKey(CACHE_BYPASS_ATTR) && Boolean.valueOf(inputAttributes.get(CACHE_BYPASS_ATTR).toString())) { cache.invalidate(key); } try { final Map processingResult = cache.get(key, new ValueProducer(key)); // Copy the results @Output @Processing attributes back to the result outputAttributes.putAll(getAttributesForDescriptors( descriptors.outputDescriptors, processingResult)); } catch (UncheckedExecutionException e) { throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause()); } catch (ExecutionException e) { throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause()); } } /** * Returns attribute descriptors for {@link Input} {@link Processing} and * {@link Output} {@link Processing} attributes of the component whose results * will be cached. */ private InputOutputAttributeDescriptors prepareAttributeDescriptors() { InputOutputAttributeDescriptors descriptors = null; synchronized (cachedComponentAttributeDescriptors) { descriptors = cachedComponentAttributeDescriptors .get(new Pair, String>( componentClass, componentId)); if (descriptors == null) { // Need to borrow a component for a while to build descriptors IProcessingComponent component = null; try { component = delegate.prepare(componentClass, componentId, inputAttributes, Maps. newHashMap()); // Build and store descriptors descriptors = new InputOutputAttributeDescriptors( BindableDescriptorBuilder.buildDescriptor(component) .only(Input.class, Processing.class).flatten().attributeDescriptors, BindableDescriptorBuilder.buildDescriptor(component) .only(Output.class).flatten().attributeDescriptors); cachedComponentAttributeDescriptors.put( new Pair, String>( componentClass, componentId), descriptors); } finally { if (component != null) { delegate.recycle(component, componentId); } } } } return descriptors; } /** * Returns a map with only with values corresponding to the provided descriptors. */ Map getAttributesForDescriptors( final Map inputDescriptors, Map attributes) { final Map attributesForDrescriptors = Maps.newHashMap(); for (AttributeDescriptor descriptor : inputDescriptors.values()) { if (attributes.containsKey(descriptor.key)) { attributesForDrescriptors.put(descriptor.key, attributes .get(descriptor.key)); } } return attributesForDrescriptors; } } /** * A compound cache key based on the input attributes map that ensures that possible * modifications to the attributes map or its values do not change the hashCode and * equality behavior of the key. */ private static final class AttributeMapCacheKey { /** Input processing attributes, the key for the cache */ private Map inputProcessingAttributes; /** Hash code for input processing attributes */ private int hashCode; /** * All input attributes. This map is not part of the cache key, but we will need * it to properly retrieve entries from the cache. */ private Map inputAttributes; private AttributeMapCacheKey( Map inputProcessingAttributes, Map inputAttributes) { /* * Empty attributes should never happen because the attributes object must * hold component identifiers, etc. */ assert inputProcessingAttributes != null && inputProcessingAttributes.size() > 0; /* * In theory, we could make a shallow copy of the provided map, but if someone * wants to make modifications they'll make them anyway on the objects * contained in the map. To be completely safe, we'd have to make a deep copy. * To prevent simple errors, we make the map unmodifiable. */ this.inputProcessingAttributes = Collections.unmodifiableMap(inputProcessingAttributes); this.hashCode = inputProcessingAttributes.hashCode(); this.inputAttributes = inputAttributes; } /* * We assume that equal hash codes means equal objects, which is not true in case * of conflicts, but there is no other way really if we don't want to make deep * copies of the attribute map. If a conflict occurs, we would retrieve a stale * result from the cache (a result associated with a different query, possibly a * different component even). The cache is in-memory only and is rather small (so * that re-querying for documents and clusters does not cause duplicated * processing), conflicts do not seem like a big problem. */ @Override public boolean equals(Object obj) { if (!(obj instanceof AttributeMapCacheKey)) { return false; } final boolean result = (obj.hashCode() == this.hashCode); if (result) { assert ((AttributeMapCacheKey) obj).inputProcessingAttributes .equals(this.inputProcessingAttributes); } return result; } @Override public int hashCode() { return hashCode; } } /** * A cached data factory that actually performs the processing. This factory is called * only if the cache does not contain the requested value. */ private final class ValueProducer implements Callable> { private final AttributeMapCacheKey key; public ValueProducer(AttributeMapCacheKey key) { this.key = key; } @Override public Map call() throws Exception { final Map inputProcessingAttributes = key.inputProcessingAttributes; @SuppressWarnings("unchecked") final Class componentClass = (Class) inputProcessingAttributes.get(COMPONENT_CLASS_KEY); final String componentId = (String) inputProcessingAttributes.get(COMPONENT_ID_KEY); IProcessingComponent component = null; try { final Map attributes = Maps.newHashMap(); component = delegate.prepare(componentClass, componentId, key.inputAttributes, attributes); ControllerUtils.performProcessing(component, inputProcessingAttributes, attributes); return attributes; } finally { if (component != null) { delegate.recycle(component, componentId); } } } } /** * Stores a pair of maps of {@link Input} and {@link Output} descriptors. */ private final static class InputOutputAttributeDescriptors { final Map inputProcessingDescriptors; final Map outputDescriptors; InputOutputAttributeDescriptors( Map inputDescriptors, Map outputDescriptors) { this.inputProcessingDescriptors = inputDescriptors; this.outputDescriptors = outputDescriptors; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy