All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dataloader.DataLoader Maven / Gradle / Ivy

There is a newer version: 2022-09-12T23-25-35-08559ba
Show newest version
/*
 * Copyright (c) 2016 The original author or authors
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * and Apache License v2.0 which accompanies this distribution.
 *
 *      The Eclipse Public License is available at
 *      http://www.eclipse.org/legal/epl-v10.html
 *
 *      The Apache License v2.0 is available at
 *      http://www.opensource.org/licenses/apache2.0.php
 *
 * You may elect to redistribute this code under either of these licenses.
 */

package org.dataloader;

import org.dataloader.impl.CompletableFutureKit;

import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.dataloader.impl.Assertions.assertState;
import static org.dataloader.impl.Assertions.nonNull;

/**
 * Data loader is a utility class that allows batch loading of data that is identified by a set of unique keys. For
 * each key that is loaded a separate {@link CompletableFuture} is returned, that completes as the batch function completes.
 * 

* With batching enabled the execution will start after calling {@link DataLoader#dispatch()}, causing the queue of * loaded keys to be sent to the batch function, clears the queue, and returns a promise to the values. *

* As batch functions are executed the resulting futures are cached using a cache implementation of choice, so they * will only execute once. Individual cache keys can be cleared, so they will be re-fetched when referred to again. * It is also possible to clear the cache entirely, and prime it with values before they are used. *

* Both caching and batching can be disabled. Configuration of the data loader is done by providing a * {@link DataLoaderOptions} instance on creation. * * @param type parameter indicating the type of the data load keys * @param type parameter indicating the type of the data that is returned * * @author Arnold Schrijver * @author Brad Baker */ public class DataLoader { private final BatchLoader batchLoadFunction; private final DataLoaderOptions loaderOptions; private final CacheMap> futureCache; private final Map> loaderQueue; /** * Creates a new data loader with the provided batch load function, and default options. * * @param batchLoadFunction the batch load function to use */ public DataLoader(BatchLoader batchLoadFunction) { this(batchLoadFunction, null); } /** * Creates a new data loader with the provided batch load function and options. * * @param batchLoadFunction the batch load function to use * @param options the batch load options */ public DataLoader(BatchLoader batchLoadFunction, DataLoaderOptions options) { this.batchLoadFunction = nonNull(batchLoadFunction); this.loaderOptions = options == null ? new DataLoaderOptions() : options; this.futureCache = determineCacheMap(loaderOptions); // order of keys matter in data loader this.loaderQueue = new LinkedHashMap<>(); } @SuppressWarnings("unchecked") private CacheMap> determineCacheMap(DataLoaderOptions loaderOptions) { return loaderOptions.cacheMap().isPresent() ? (CacheMap>) loaderOptions.cacheMap().get() : CacheMap.simpleMap(); } /** * Requests to load the data with the specified key asynchronously, and returns a future of the resulting value. *

* If batching is enabled (the default), you'll have to call {@link DataLoader#dispatch()} at a later stage to * start batch execution. If you forget this call the future will never be completed (unless already completed, * and returned from cache). * * @param key the key to load * * @return the future of the value */ public CompletableFuture load(K key) { Object cacheKey = getCacheKey(nonNull(key)); synchronized (futureCache) { if (loaderOptions.cachingEnabled() && futureCache.containsKey(cacheKey)) { return futureCache.get(cacheKey); } } CompletableFuture future = new CompletableFuture<>(); if (loaderOptions.batchingEnabled()) { synchronized (loaderQueue) { loaderQueue.put(key, future); } } else { // immediate execution of batch function CompletableFuture> batchedLoad = batchLoadFunction .load(singletonList(key)) .toCompletableFuture(); future = batchedLoad .thenApply(list -> list.get(0)); } if (loaderOptions.cachingEnabled()) { synchronized (futureCache) { futureCache.set(cacheKey, future); } } return future; } /** * Requests to load the list of data provided by the specified keys asynchronously, and returns a composite future * of the resulting values. *

* If batching is enabled (the default), you'll have to call {@link DataLoader#dispatch()} at a later stage to * start batch execution. If you forget this call the future will never be completed (unless already completed, * and returned from cache). * * @param keys the list of keys to load * * @return the composite future of the list of values */ public CompletableFuture> loadMany(List keys) { synchronized (loaderQueue) { List> collect = keys.stream() .map(this::load) .collect(Collectors.toList()); return CompletableFutureKit.allOf(collect); } } /** * Dispatches the queued load requests to the batch execution function and returns a promise of the result. *

* If batching is disabled, or there are no queued requests, then a succeeded promise is returned. * * @return the promise of the queued load requests */ public CompletableFuture> dispatch() { // // we copy the pre-loaded set of futures ready for dispatch final List keys = new ArrayList<>(); final List> queuedFutures = new ArrayList<>(); synchronized (loaderQueue) { loaderQueue.forEach((key, future) -> { keys.add(key); queuedFutures.add(future); }); loaderQueue.clear(); } if (!loaderOptions.batchingEnabled() || keys.size() == 0) { return CompletableFuture.completedFuture(emptyList()); } // // order of keys -> values matter in data loader hence the use of linked hash map // // See https://github.com/facebook/dataloader/blob/master/README.md for more details // // // when the promised list of values completes, we transfer the values into // the previously cached future objects that the client already has been given // via calls to load("foo") and loadMany(["foo","bar"]) // int maxBatchSize = loaderOptions.maxBatchSize(); if (maxBatchSize > 0 && maxBatchSize < keys.size()) { return sliceIntoBatchesOfBatches(keys, queuedFutures, maxBatchSize); } else { return dispatchQueueBatch(keys, queuedFutures); } } private CompletableFuture> sliceIntoBatchesOfBatches(List keys, List> queuedFutures, int maxBatchSize) { // the number of keys is > than what the batch loader function can accept // so make multiple calls to the loader List>> allBatches = new ArrayList<>(); int len = keys.size(); int batchCount = (int) Math.ceil(len / (double) maxBatchSize); for (int i = 0; i < batchCount; i++) { int fromIndex = i * maxBatchSize; int toIndex = Math.min((i + 1) * maxBatchSize, len); List subKeys = keys.subList(fromIndex, toIndex); List> subFutures = queuedFutures.subList(fromIndex, toIndex); allBatches.add(dispatchQueueBatch(subKeys, subFutures)); } // // now reassemble all the futures into one that is the complete set of results return CompletableFuture.allOf(allBatches.toArray(new CompletableFuture[allBatches.size()])) .thenApply(v -> allBatches.stream() .map(CompletableFuture::join) .flatMap(Collection::stream) .collect(Collectors.toList())); } private CompletableFuture> dispatchQueueBatch(List keys, List> queuedFutures) { return batchLoadFunction.load(keys) .toCompletableFuture() .thenApply(values -> { assertState(keys.size() == values.size(), "The size of the promised values MUST be the same size as the key list"); for (int idx = 0; idx < queuedFutures.size(); idx++) { Object value = values.get(idx); CompletableFuture future = queuedFutures.get(idx); if (value instanceof Throwable) { future.completeExceptionally((Throwable) value); } else { @SuppressWarnings("unchecked") V val = (V) value; future.complete(val); } } return values; }).exceptionally(ex -> { for (int idx = 0; idx < queuedFutures.size(); idx++) { K key = keys.get(idx); CompletableFuture future = queuedFutures.get(idx); future.completeExceptionally(ex); // clear any cached view of this key clear(key); } return emptyList(); }); } /** * Normally {@link #dispatch()} is an asynchronous operation but this version will 'join' on the * results if dispatch and wait for them to complete. If the {@link CompletableFuture} callbacks make more * calls to this data loader then the {@link #dispatchDepth()} will be > 0 and this method will loop * around and wait for any other extra batch loads to occur. * * @return the list of all results when the {@link #dispatchDepth()} reached 0 */ public List dispatchAndJoin() { List results = new ArrayList<>(); List joinedResults = dispatch().join(); results.addAll(joinedResults); while (this.dispatchDepth() > 0) { joinedResults = dispatch().join(); results.addAll(joinedResults); } return results; } /** * @return the depth of the batched key loads that need to be dispatched */ public int dispatchDepth() { synchronized (loaderQueue) { return loaderQueue.size(); } } /** * Clears the future with the specified key from the cache, if caching is enabled, so it will be re-fetched * on the next load request. * * @param key the key to remove * * @return the data loader for fluent coding */ public DataLoader clear(K key) { Object cacheKey = getCacheKey(key); synchronized (futureCache) { futureCache.delete(cacheKey); } return this; } /** * Clears the entire cache map of the loader. * * @return the data loader for fluent coding */ public DataLoader clearAll() { synchronized (futureCache) { futureCache.clear(); } return this; } /** * Primes the cache with the given key and value. * * @param key the key * @param value the value * * @return the data loader for fluent coding */ public DataLoader prime(K key, V value) { Object cacheKey = getCacheKey(key); synchronized (futureCache) { if (!futureCache.containsKey(cacheKey)) { futureCache.set(cacheKey, CompletableFuture.completedFuture(value)); } } return this; } /** * Primes the cache with the given key and error. * * @param key the key * @param error the exception to prime instead of a value * * @return the data loader for fluent coding */ public DataLoader prime(K key, Exception error) { Object cacheKey = getCacheKey(key); if (!futureCache.containsKey(cacheKey)) { futureCache.set(cacheKey, CompletableFutureKit.failedFuture(error)); } return this; } /** * Gets the object that is used in the internal cache map as key, by applying the cache key function to * the provided key. *

* If no cache key function is present in {@link DataLoaderOptions}, then the returned value equals the input key. * * @param key the input key * * @return the cache key after the input is transformed with the cache key function */ @SuppressWarnings("unchecked") public Object getCacheKey(K key) { return loaderOptions.cacheKeyFunction().isPresent() ? loaderOptions.cacheKeyFunction().get().getKey(key) : key; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy