/*
* Copyright (c) 2016 The original author or authors
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* and Apache License v2.0 which accompanies this distribution.
*
* The Eclipse Public License is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* The Apache License v2.0 is available at
* http://www.opensource.org/licenses/apache2.0.php
*
* You may elect to redistribute this code under either of these licenses.
*/
package org.dataloader;
import org.dataloader.impl.CompletableFutureKit;
import org.dataloader.stats.Statistics;
import org.dataloader.stats.StatisticsCollector;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import java.util.stream.Collectors;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.dataloader.impl.Assertions.assertState;
import static org.dataloader.impl.Assertions.nonNull;
/**
* Data loader is a utility class that allows batch loading of data that is identified by a set of unique keys. For
* each key that is loaded a separate {@link CompletableFuture} is returned, that completes as the batch function completes.
*
* With batching enabled the execution will start after calling {@link DataLoader#dispatch()}, causing the queue of
* loaded keys to be sent to the batch function, clears the queue, and returns a promise to the values.
*
* As {@link org.dataloader.BatchLoader} batch functions are executed the resulting futures are cached using a cache
* implementation of choice, so they will only execute once. Individual cache keys can be cleared, so they will
* be re-fetched when referred to again.
*
* It is also possible to clear the cache entirely, and prime it with values before they are used.
*
* Both caching and batching can be disabled. Configuration of the data loader is done by providing a
* {@link DataLoaderOptions} instance on creation.
*
* A call to the batch loader might result in individual exception failures for item with the returned list. if
* you want to capture these specific item failures then use {@link org.dataloader.Try} as a return value and
* create the data loader with {@link #newDataLoaderWithTry(BatchLoader)} form. The Try values will be interpreted
* as either success values or cause the {@link #load(Object)} promise to complete exceptionally.
*
* @param type parameter indicating the type of the data load keys
* @param type parameter indicating the type of the data that is returned
*
* @author Arnold Schrijver
* @author Brad Baker
*/
public class DataLoader {
private final BatchLoader batchLoadFunction;
private final DataLoaderOptions loaderOptions;
private final CacheMap> futureCache;
private final List>> loaderQueue;
private final StatisticsCollector stats;
/**
* Creates new DataLoader with the specified batch loader function and default options
* (batching, caching and unlimited batch size).
*
* @param batchLoadFunction the batch load function to use
* @param the key type
* @param the value type
*
* @return a new DataLoader
*/
public static DataLoader newDataLoader(BatchLoader batchLoadFunction) {
return newDataLoader(batchLoadFunction, null);
}
/**
* Creates new DataLoader with the specified batch loader function with the provided options
*
* @param batchLoadFunction the batch load function to use
* @param options the options to use
* @param the key type
* @param the value type
*
* @return a new DataLoader
*/
public static DataLoader newDataLoader(BatchLoader batchLoadFunction, DataLoaderOptions options) {
return new DataLoader<>(batchLoadFunction, options);
}
/**
* Creates new DataLoader with the specified batch loader function and default options
* (batching, caching and unlimited batch size) where the batch loader function returns a list of
* {@link org.dataloader.Try} objects.
*
* This allows you to capture both the value that might be returned and also whether exception that might have occurred getting that individual value. If its important you to
* know gther exact status of each item in a batch call and whether it threw exceptions when fetched then
* you can use this form to create the data loader.
*
* @param batchLoadFunction the batch load function to use that uses {@link org.dataloader.Try} objects
* @param the key type
* @param the value type
*
* @return a new DataLoader
*/
public static DataLoader newDataLoaderWithTry(BatchLoader> batchLoadFunction) {
return newDataLoaderWithTry(batchLoadFunction, null);
}
/**
* Creates new DataLoader with the specified batch loader function and with the provided options
* where the batch loader function returns a list of
* {@link org.dataloader.Try} objects.
*
* @param batchLoadFunction the batch load function to use that uses {@link org.dataloader.Try} objects
* @param options the options to use
* @param the key type
* @param the value type
*
* @return a new DataLoader
*
* @see #newDataLoaderWithTry(BatchLoader)
*/
@SuppressWarnings("unchecked")
public static DataLoader newDataLoaderWithTry(BatchLoader> batchLoadFunction, DataLoaderOptions options) {
return new DataLoader<>((BatchLoader) batchLoadFunction, options);
}
/**
* Creates a new data loader with the provided batch load function, and default options.
*
* @param batchLoadFunction the batch load function to use
*/
public DataLoader(BatchLoader batchLoadFunction) {
this(batchLoadFunction, null);
}
/**
* Creates a new data loader with the provided batch load function and options.
*
* @param batchLoadFunction the batch load function to use
* @param options the batch load options
*/
public DataLoader(BatchLoader batchLoadFunction, DataLoaderOptions options) {
this.batchLoadFunction = nonNull(batchLoadFunction);
this.loaderOptions = options == null ? new DataLoaderOptions() : options;
this.futureCache = determineCacheMap(loaderOptions);
// order of keys matter in data loader
this.loaderQueue = new ArrayList<>();
this.stats = nonNull(this.loaderOptions.getStatisticsCollector());
}
@SuppressWarnings("unchecked")
private CacheMap> determineCacheMap(DataLoaderOptions loaderOptions) {
return loaderOptions.cacheMap().isPresent() ? (CacheMap>) loaderOptions.cacheMap().get() : CacheMap.simpleMap();
}
/**
* Requests to load the data with the specified key asynchronously, and returns a future of the resulting value.
*
* If batching is enabled (the default), you'll have to call {@link DataLoader#dispatch()} at a later stage to
* start batch execution. If you forget this call the future will never be completed (unless already completed,
* and returned from cache).
*
* @param key the key to load
*
* @return the future of the value
*/
public CompletableFuture load(K key) {
synchronized (this) {
Object cacheKey = getCacheKey(nonNull(key));
stats.incrementLoadCount();
boolean batchingEnabled = loaderOptions.batchingEnabled();
boolean cachingEnabled = loaderOptions.cachingEnabled();
if (cachingEnabled) {
if (futureCache.containsKey(cacheKey)) {
stats.incrementCacheHitCount();
return futureCache.get(cacheKey);
}
}
CompletableFuture future = new CompletableFuture<>();
if (batchingEnabled) {
loaderQueue.add(new SimpleImmutableEntry<>(key, future));
} else {
stats.incrementBatchLoadCountBy(1);
// immediate execution of batch function
CompletableFuture> batchedLoad = batchLoadFunction
.load(singletonList(key))
.toCompletableFuture();
future = batchedLoad
.thenApply(list -> list.get(0));
}
if (cachingEnabled) {
futureCache.set(cacheKey, future);
}
return future;
}
}
/**
* Requests to load the list of data provided by the specified keys asynchronously, and returns a composite future
* of the resulting values.
*
* If batching is enabled (the default), you'll have to call {@link DataLoader#dispatch()} at a later stage to
* start batch execution. If you forget this call the future will never be completed (unless already completed,
* and returned from cache).
*
* @param keys the list of keys to load
*
* @return the composite future of the list of values
*/
public CompletableFuture> loadMany(List keys) {
synchronized (this) {
List> collect = keys.stream()
.map(this::load)
.collect(Collectors.toList());
return CompletableFutureKit.allOf(collect);
}
}
/**
* Dispatches the queued load requests to the batch execution function and returns a promise of the result.
*
* If batching is disabled, or there are no queued requests, then a succeeded promise is returned.
*
* @return the promise of the queued load requests
*/
public CompletableFuture> dispatch() {
boolean batchingEnabled = loaderOptions.batchingEnabled();
//
// we copy the pre-loaded set of futures ready for dispatch
final List keys = new ArrayList<>();
final List> queuedFutures = new ArrayList<>();
synchronized (this) {
loaderQueue.forEach(entry -> {
keys.add(entry.getKey());
queuedFutures.add(entry.getValue());
});
loaderQueue.clear();
}
if (!batchingEnabled || keys.size() == 0) {
return CompletableFuture.completedFuture(emptyList());
}
//
// order of keys -> values matter in data loader hence the use of linked hash map
//
// See https://github.com/facebook/dataloader/blob/master/README.md for more details
//
//
// when the promised list of values completes, we transfer the values into
// the previously cached future objects that the client already has been given
// via calls to load("foo") and loadMany(["foo","bar"])
//
int maxBatchSize = loaderOptions.maxBatchSize();
if (maxBatchSize > 0 && maxBatchSize < keys.size()) {
return sliceIntoBatchesOfBatches(keys, queuedFutures, maxBatchSize);
} else {
return dispatchQueueBatch(keys, queuedFutures);
}
}
private CompletableFuture> sliceIntoBatchesOfBatches(List keys, List> queuedFutures, int maxBatchSize) {
// the number of keys is > than what the batch loader function can accept
// so make multiple calls to the loader
List>> allBatches = new ArrayList<>();
int len = keys.size();
int batchCount = (int) Math.ceil(len / (double) maxBatchSize);
for (int i = 0; i < batchCount; i++) {
int fromIndex = i * maxBatchSize;
int toIndex = Math.min((i + 1) * maxBatchSize, len);
List subKeys = keys.subList(fromIndex, toIndex);
List> subFutures = queuedFutures.subList(fromIndex, toIndex);
allBatches.add(dispatchQueueBatch(subKeys, subFutures));
}
//
// now reassemble all the futures into one that is the complete set of results
return CompletableFuture.allOf(allBatches.toArray(new CompletableFuture[allBatches.size()]))
.thenApply(v -> allBatches.stream()
.map(CompletableFuture::join)
.flatMap(Collection::stream)
.collect(Collectors.toList()));
}
@SuppressWarnings("unchecked")
private CompletableFuture> dispatchQueueBatch(List keys, List> queuedFutures) {
stats.incrementBatchLoadCountBy(keys.size());
CompletionStage> batchLoad;
try {
batchLoad = nonNull(batchLoadFunction.load(keys), "Your batch loader function MUST return a non null CompletionStage promise");
} catch (Exception e) {
batchLoad = CompletableFutureKit.failedFuture(e);
}
return batchLoad
.toCompletableFuture()
.thenApply(values -> {
assertState(keys.size() == values.size(), "The size of the promised values MUST be the same size as the key list");
for (int idx = 0; idx < queuedFutures.size(); idx++) {
Object value = values.get(idx);
CompletableFuture future = queuedFutures.get(idx);
if (value instanceof Throwable) {
stats.incrementLoadErrorCount();
future.completeExceptionally((Throwable) value);
// we don't clear the cached view of this entry to avoid
// frequently loading the same error
} else if (value instanceof Try) {
// we allow the batch loader to return a Try so we can better represent a computation
// that might have worked or not.
Try tryValue = (Try) value;
if (tryValue.isSuccess()) {
future.complete(tryValue.get());
} else {
stats.incrementLoadErrorCount();
future.completeExceptionally(tryValue.getThrowable());
}
} else {
V val = (V) value;
future.complete(val);
}
}
return values;
}).exceptionally(ex -> {
stats.incrementBatchLoadExceptionCount();
for (int idx = 0; idx < queuedFutures.size(); idx++) {
K key = keys.get(idx);
CompletableFuture future = queuedFutures.get(idx);
future.completeExceptionally(ex);
// clear any cached view of this key because they all failed
clear(key);
}
return emptyList();
});
}
/**
* Normally {@link #dispatch()} is an asynchronous operation but this version will 'join' on the
* results if dispatch and wait for them to complete. If the {@link CompletableFuture} callbacks make more
* calls to this data loader then the {@link #dispatchDepth()} will be > 0 and this method will loop
* around and wait for any other extra batch loads to occur.
*
* @return the list of all results when the {@link #dispatchDepth()} reached 0
*/
public List dispatchAndJoin() {
List results = new ArrayList<>();
List joinedResults = dispatch().join();
results.addAll(joinedResults);
while (this.dispatchDepth() > 0) {
joinedResults = dispatch().join();
results.addAll(joinedResults);
}
return results;
}
/**
* @return the depth of the batched key loads that need to be dispatched
*/
public int dispatchDepth() {
synchronized (this) {
return loaderQueue.size();
}
}
/**
* Clears the future with the specified key from the cache, if caching is enabled, so it will be re-fetched
* on the next load request.
*
* @param key the key to remove
*
* @return the data loader for fluent coding
*/
public DataLoader clear(K key) {
Object cacheKey = getCacheKey(key);
synchronized (this) {
futureCache.delete(cacheKey);
}
return this;
}
/**
* Clears the entire cache map of the loader.
*
* @return the data loader for fluent coding
*/
public DataLoader clearAll() {
synchronized (this) {
futureCache.clear();
}
return this;
}
/**
* Primes the cache with the given key and value.
*
* @param key the key
* @param value the value
*
* @return the data loader for fluent coding
*/
public DataLoader prime(K key, V value) {
Object cacheKey = getCacheKey(key);
synchronized (this) {
if (!futureCache.containsKey(cacheKey)) {
futureCache.set(cacheKey, CompletableFuture.completedFuture(value));
}
}
return this;
}
/**
* Primes the cache with the given key and error.
*
* @param key the key
* @param error the exception to prime instead of a value
*
* @return the data loader for fluent coding
*/
public DataLoader prime(K key, Exception error) {
Object cacheKey = getCacheKey(key);
if (!futureCache.containsKey(cacheKey)) {
futureCache.set(cacheKey, CompletableFutureKit.failedFuture(error));
}
return this;
}
/**
* Gets the object that is used in the internal cache map as key, by applying the cache key function to
* the provided key.
*
* If no cache key function is present in {@link DataLoaderOptions}, then the returned value equals the input key.
*
* @param key the input key
*
* @return the cache key after the input is transformed with the cache key function
*/
@SuppressWarnings("unchecked")
public Object getCacheKey(K key) {
return loaderOptions.cacheKeyFunction().isPresent() ?
loaderOptions.cacheKeyFunction().get().getKey(key) : key;
}
/**
* Gets the statistics associated with this data loader. These will have been gather via
* the {@link org.dataloader.stats.StatisticsCollector} passed in via {@link DataLoaderOptions#getStatisticsCollector()}
*
* @return statistics for this data loader
*/
public Statistics getStatistics() {
return stats.getStatistics();
}
}