All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wavefront.fdb.utils.CollectKeyValuesUntilDone Maven / Gradle / Ivy

package com.wavefront.fdb.utils;

import com.apple.foundationdb.KeySelector;
import com.apple.foundationdb.KeyValue;
import com.apple.foundationdb.StreamingMode;

import javax.annotation.Nullable;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;

import static com.apple.foundationdb.KeySelector.firstGreaterOrEqual;
import static com.apple.foundationdb.KeySelector.lastLessThan;
import static com.apple.foundationdb.tuple.ByteArrayUtil.strinc;

/**
 * Helper class to collect a large (i.e. a key range scan that cannot be fetched in a single
 * transaction due to FDB transaction deadline limitations). Underneath the hood, uses {@link
 * BatchReader} which means the scan is only piece-wise consistent transactional (or perhaps just
 * consider this as non-snapshot scans altogether). For scanning things that are written in batches, it
 * doesn't truly matter to us whether all points, for instance, are fetched from the same
 * snapshot.
 *
 * @author Clement Pang ([email protected]).
 */
public abstract class CollectKeyValuesUntilDone {

  /**
   * Scan a large range of keys, instantiate a data structure to collect key values (supplier),
   * accumulate results (accumulator) and then finalize it into a data structure to return.
   *
   * @param prefix      Scan prefix. Keys returned must start with this byte prefix.
   * @param batchSize   Batch size (e.g. 1000). If this value is too large, the transaction may
   *                    time-out.
   * @param metrics     Scan metrics.
   * @param batchReader {@link BatchReader} to execute scans.
   * @param supplier    Supplier of the data structure for the accumulator to work on.
   * @param accumulator Accumulator that takes in the supplier's data structure and a list of key
   *                    values.
   * @param executor    Executor for callbacks.
   * @param          The returned object.
   * @return CompletableFuture of T that is completed (exceptionally or successfully) based on the
   * result of the finalizer. Any errors during iteration of runtime errors in the supplied lambdas
   * will also complete this future exceptionally.
   */
  public static  CompletableFuture collect(
      byte[] prefix, int batchSize,
      @Nullable Metrics metrics, BatchReader batchReader,
      Supplier supplier, BiFunction, T> accumulator, Executor executor,
      boolean batchPriority) {
    return collect(prefix, batchSize, metrics, batchReader, supplier, accumulator,
        Function.identity(), executor, batchPriority);
  }

  /**
   * Scan a large range of keys, instantiate a data structure to collect key values (supplier),
   * accumulate results (accumulator) and then finalize it into a data structure to return.
   *
   * @param prefix      Scan prefix. Keys returned must start with this byte prefix.
   * @param batchSize   Batch size (e.g. 1000). If this value is too large, the transaction may
   *                    time-out.
   * @param metrics     Scan metrics.
   * @param batchReader {@link BatchReader} to execute scans.
   * @param supplier    Supplier of the data structure for the accumulator to work on.
   * @param accumulator Accumulator that takes in the supplier's data structure and a list of key
   *                    values.
   * @param finalizer   Finalizer that prepares the result for setting the returned {@link
   *                    CompletableFuture}
   * @param executor    Executor for callbacks.
   * @param          The returned object.
   * @param          The intermediate object used for accumulation of results.
   * @return CompletableFuture of T that is completed (exceptionally or successfully) based on the
   * result of the finalizer. Any errors during iteration of runtime errors in the supplied lambdas
   * will also complete this future exceptionally.
   */
  public static  CompletableFuture collect(
      byte[] prefix, int batchSize,
      @Nullable Metrics metrics, BatchReader batchReader,
      Supplier supplier, BiFunction, U> accumulator,
      Function finalizer, Executor executor, boolean batchPriority) {
    return collect(firstGreaterOrEqual(prefix), lastLessThan(strinc(prefix)).add(1),
        batchSize, metrics, batchReader, supplier, accumulator, finalizer, executor, batchPriority);
  }

  /**
   * Scan a large range of keys, instantiate a data structure to collect key values (supplier),
   * accumulate results (accumulator) and then finalize it into a data structure to return.
   *
   * @param startRange  Start key range.
   * @param endRange    End key range.
   * @param batchSize   Batch size (e.g. 1000). If this value is too large, the transaction may
   *                    time-out.
   * @param metrics     Scan metrics.
   * @param batchReader {@link BatchReader} to execute scans.
   * @param supplier    Supplier of the data structure for the accumulator to work on.
   * @param accumulator Accumulator that takes in the supplier's data structure and a list of key
   *                    values.
   * @param executor    Executor for callbacks.
   * @param          The returned object.
   * @return CompletableFuture of T that is completed (exceptionally or successfully) based on the
   * result of the finalizer. Any errors during iteration of runtime errors in the supplied lambdas
   * will also complete this future exceptionally.
   */
  public static  CompletableFuture collect(
      KeySelector startRange, KeySelector endRange, int batchSize,
      @Nullable Metrics metrics, BatchReader batchReader,
      Supplier supplier, BiFunction, T> accumulator, Executor executor,
      boolean batchPriority) {
    return collect(startRange, endRange, batchSize, metrics, batchReader, supplier, accumulator,
        i -> i, executor, batchPriority);
  }

  /**
   * Scan a large range of keys, instantiate a data structure to collect key values (supplier),
   * accumulate results (accumulator) and then finalize it into a data structure to return.
   *
   * @param            The returned object.
   * @param            The intermediate object used for accumulation of results.
   * @param startRange    Start key range.
   * @param endRange      End key range.
   * @param batchSize     Batch size (e.g. 1000). If this value is too large, the transaction may
   *                      time-out.
   * @param metrics       Scan metrics.
   * @param batchReader   {@link BatchReader} to execute scans.
   * @param supplier      Supplier of the data structure for the accumulator to work on.
   * @param accumulator   Accumulator that takes in the supplier's data structure and a list of key
   *                      values.
   * @param finalizer     Finalizer that prepares the result for setting the returned {@link
   *                      CompletableFuture}
   * @param executor      Executor for callbacks.
   * @param batchPriority Batch priority (for FDB). Unused right now.
   * @return CompletableFuture of T that is completed (exceptionally or successfully) based on the
   * result of the finalizer. Any errors during iteration of runtime errors in the supplied lambdas
   * will also complete this future exceptionally.
   */
  public static  CompletableFuture collect(
      KeySelector startRange, KeySelector endRange, int batchSize,
      @Nullable Metrics metrics, BatchReader batchReader,
      Supplier supplier, BiFunction, U> accumulator,
      Function finalizer, Executor executor,
      @SuppressWarnings("unused") boolean batchPriority) {
    AtomicReference intermediate = new AtomicReference<>(supplier.get());
    AtomicLong start = new AtomicLong(System.currentTimeMillis());
    if (metrics != null) {
      metrics.scanIssued();
    }
    return batchReader.getRangeAsync(tx ->
        tx.getRange(startRange, endRange, batchSize, false, StreamingMode.WANT_ALL)).
        thenApply(keyValues -> {
          if (metrics != null) {
            long elapsed = System.currentTimeMillis() - start.get();
            metrics.keyValuesScanned(keyValues.size(), elapsed);
          }
          return keyValues;
        }).thenComposeAsync(
        new Function, CompletionStage>() {
          @Override
          public CompletionStage apply(List keyValues) {
            CompletableFuture> nextBatch = null;
            boolean done = keyValues.size() < batchSize;
            if (!done) {
              if (metrics != null) {
                start.set(System.currentTimeMillis());
                metrics.scanIssued();
              }
              KeyValue lastKeyValue = keyValues.get(keyValues.size() - 1);
              nextBatch = batchReader.getRangeAsync(
                  tx -> tx.getRange(KeySelector.firstGreaterThan(lastKeyValue.getKey()),
                      endRange, batchSize, false, StreamingMode.WANT_ALL)).
                  thenApply(kvs -> {
                    if (metrics != null) {
                      long elapsed = System.currentTimeMillis() - start.get();
                      metrics.keyValuesScanned(keyValues.size(), elapsed);
                    }
                    return kvs;
                  });
            }
            if (!keyValues.isEmpty()) {
              intermediate.set(accumulator.apply(intermediate.get(), keyValues));
            }
            if (done) {
              return CompletableFuture.completedFuture(finalizer.apply(intermediate.get()));
            }
            return nextBatch.thenComposeAsync(this, executor);
          }
        }, executor);
  }

  /**
   * Interface to handle generating metrics for the operations.
   */
  public interface Metrics {
    default void scanIssued() {
    }

    default void keyValuesScanned(int size, long millisecondsElapsed) {
    }
  }
}