All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.impl.api.bulk.BulkMultiGet Maven / Gradle / Ivy

Go to download

NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.

The newest version!
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.

package oracle.kv.impl.api.bulk;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;

import oracle.kv.Consistency;
import oracle.kv.Depth;
import oracle.kv.Direction;
import oracle.kv.Key;
import oracle.kv.KeyRange;
import oracle.kv.KeyValueVersion;
import oracle.kv.ParallelScanIterator;
import oracle.kv.StoreIteratorConfig;
import oracle.kv.impl.api.KVStoreImpl;
import oracle.kv.impl.api.KeySerializer;
import oracle.kv.impl.api.Request;
import oracle.kv.impl.api.StoreIteratorParams;
import oracle.kv.impl.api.ops.InternalOperation;
import oracle.kv.impl.api.ops.MultiGetBatchIterate;
import oracle.kv.impl.api.ops.MultiGetBatchKeysIterate;
import oracle.kv.impl.api.ops.Result;
import oracle.kv.impl.api.ops.ResultKey;
import oracle.kv.impl.api.ops.ResultKeyValueVersion;
import oracle.kv.impl.api.parallelscan.BaseParallelScanIteratorImpl;
import oracle.kv.impl.api.parallelscan.DetailedMetricsImpl;
import oracle.kv.impl.async.IterationHandleNotifier;
import oracle.kv.impl.topo.PartitionId;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.topo.TopologyUtil;
import oracle.kv.impl.util.KVThreadFactory;
import oracle.kv.stats.DetailedMetrics;


 * Implementation of a bulk get storeIterator and storeKeysIterator.
 * BulkMultiGet.BulkGetIterator<K, V>: Implements the bulk get
 * operation. It provides the common underpinnings for batching both rows and
 * KV pairs.
public class BulkMultiGet {

     * Creates a bulk get iterator returning KeyValueVersion.
    public static ParallelScanIterator
        createBulkMultiGetIterator(final KVStoreImpl storeImpl,
                                   final List> parentKeyIterators,
                                   final int batchSize,
                                   final KeyRange subRange,
                                   final Depth depth,
                                   final Consistency consistency,
                                   final long timeout,
                                   final TimeUnit timeoutUnit,
                                   final StoreIteratorConfig config) {

        /* Prohibit iteration of internal keyspace (//). */
        final KeyRange useRange =
            storeImpl.getKeySerializer().restrictRange(null, subRange);

        final StoreIteratorParams params =
            new StoreIteratorParams(Direction.UNORDERED,

        return new BulkGetIterator(
            storeImpl, parentKeyIterators, params, config,
            null /* iterationHandle */) {

            public void validateKey(Key key) {

            public Key getKey(Key key) {
                return key;

            protected InternalOperation
                generateBulkGetOp(List parentKeys, byte[] resumeKey) {

                return new MultiGetBatchIterate(parentKeys,

            protected void convertResult(Result result,
                                         List elementList) {
                final List results =
                if (results.size() == 0) {
                for (ResultKeyValueVersion entry: results) {
                    final byte[] keyBytes = entry.getKeyBytes();
                    final Key key = keySerializer.fromByteArray(keyBytes);
                    final KeyValueVersion value =

     * Creates a bulk get iterator returning Keys.
    public static ParallelScanIterator createBulkMultiGetKeysIterator
        (final KVStoreImpl storeImpl,
         final List> parentKeyiterators,
         final int batchSize,
         final KeyRange subRange,
         final Depth depth,
         final Consistency consistency,
         final long timeout,
         final TimeUnit timeoutUnit,
         final StoreIteratorConfig config) {

        /* Prohibit iteration of internal keyspace (//). */
        final KeyRange useRange =
            storeImpl.getKeySerializer().restrictRange(null, subRange);

        final StoreIteratorParams params =
            new StoreIteratorParams(Direction.UNORDERED,

        return new BulkGetIterator(storeImpl, parentKeyiterators,
                                             params, config,
                                             null /* iterationHandle */) {
            public void validateKey(Key key) {

            public Key getKey(Key key) {
                return key;

            protected InternalOperation
                generateBulkGetOp(List parentKeys, byte[] resumeKey) {

                return new MultiGetBatchKeysIterate(parentKeys,

            protected void convertResult(Result result, List elementList) {
                final List byteKeyResults = result.getKeyList();

                int cnt = byteKeyResults.size();
                if (cnt == 0) {
                    assert (!result.hasMoreElements());
                for (int i = 0; i < cnt; i += 1) {
                    final byte[] entry = byteKeyResults.get(i).getKeyBytes();

     * This class implements the bulk get operation. It provides the common
     * underpinnings for batching both rows and KV pairs.
     * In general, the bulk get operation works as follows:
     * 1) Multiple reader tasks that read keys supplied by user supplied
     * iterator, and the keys are grouped by partition and sorted and wrapped
     * into batches.
     * 2) Multiple bulk get tasks that take key batch and perform get batch
     * operation.
     * 3) The get batch operation retrieved records associated with the keys,
     * and also may return its child or dependents depending on configuration
     * of Depth for KV API or child/ancestor table for Table API.
     * The general flow of a key entry is as follows:
     * 1) The key entries is supplied by user supplied iterator. Multiple reader
     * threads read the input keys in parallel.
     * 2) Each reader accumulates the key entries, along with earlier entries,
     * in a sorted tree associated with each partition.
     * 3) When the threshold associated with the partition is exceeded, the
     * leading elements in the sorted tree are assembled into a batch and placed
     * into a queue associated with the partition.
     * 4) The ShardGetStream associated with the queue takes the batch and
     * retrieves the rows associated with the keys in the batch from store.
     * 5) Multiple ShardGetStreams perform the bulk get operation in parallel,
     * the number of streams is configurable.
     * 2 key parameters used in this operation:
     * 1) Threshold for each partition batch = batchSize
     * 2) Number of shard get tasks =
     *      StoreIteratorConfig.maxCurrentRequests > 0 ?
     *          StoreIteratorConfig.maxCurrentRequests :
     *          MIN(#available RNs, nProcessors).
     * @param  must be a PrimaryKey or a Key
     * @param  must be a Row or a KeyValueVersion
    public static abstract class BulkGetIterator
            extends BaseParallelScanIteratorImpl {

         * The Key comparator used to group keys associated with a partition,
         * so that they can be sent as a contiguous batch.
        private final static Comparator KEY_BYTES_COMPARATOR =
            new Key.BytesComparator();

         * Canonical PartitionBatch object to signify EOF in the partition
         * batch queue.
        private final PartitionBatch partitionBatchEOF =
            new PartitionBatch(null, null);

         * The topology associated with the store.
        private final Topology topology;

         * A map indexed by partition id which yields the keys that are being
         * aggregated for that partition.
        private final PartitionValues pMap[];

         * Used to manage key reader threads.
        private ExecutorService readerExecutor;
        private HashMap, KeysReader> readerTasks;
        private final AtomicInteger remainingReaders = new AtomicInteger();
        private final Set getStreams =
            new HashSet();

        private final Consistency consistency;
        private final TimeUnit timeoutUnit;
        private final long timeout;

        private final Map shardMetrics;

         * Used to hold the aggregate statistics associated with this operation
        private final AggregateStatistics statistics;

        protected final KeySerializer keySerializer;

        public BulkGetIterator(KVStoreImpl store,
                               List> parentKeyIterators,
                               StoreIteratorParams params,
                               StoreIteratorConfig config,
                               IterationHandleNotifier iterHandleNotifier) {
            super(store, store.getLogger(), getRequestTimeoutMs(store, params),
                  params.getDirection(), 32 /* maxResultsBatches */,
                  true, /* prefetch */

            topology = storeImpl.getTopology();
            keySerializer = storeImpl.getKeySerializer();
            shardMetrics = new HashMap();
            statistics = new AggregateStatistics();

            /* Initialize parameters */
            consistency = params.getConsistency();
            timeout = params.getTimeout();
            timeoutUnit = params.getTimeoutUnit();

            /* Create partition values array */
            final int partitionThreshold = params.getBatchSize();
            final int nParts = topology.getPartitionMap().size();
            PartitionValues[] partitionValues =
                new BulkGetIterator.PartitionValues[nParts+1];
            pMap = partitionValues;
            for (int i = 0 ; i <= nParts; i++) {
                pMap[i] = new PartitionValues(i, partitionThreshold);

             * Create shard get tasks, the number of tasks is set to the value
             * of StoreIteratorConfig.getMaxConcurrentRequests() if it is set
             * and greater than 0, otherwise set to MIN(#available RNs,
             * nProcessors).  The shard get tasks need to be created before the
             * reader tasks start running so that the readers can insert stream
             * EOF markers.
            final int maxConcurrentRequests =
                (config != null) ? config.getMaxConcurrentRequests() : 0;
            final int RNThreads = getNumOfShardTasks();
            final int maxShardTasks =
                    (maxConcurrentRequests > 0) ?
                                    Math.min(maxConcurrentRequests, RNThreads) :

            /* Start reader threads */

             * Start shard get tasks now that all reader threads are started so
             * that it is more likely that keys will be available for the get
             * operations without blocking.
            for (final ShardGetStream task : getStreams) {

            /* Start a thread to monitor readers. */
            ExecutorService executor = Executors.newSingleThreadExecutor(
                new KVThreadFactory("BulkGetReadersMonitor", logger));
            executor.submit(new Runnable() {
                    public void run() {
                        try {
                            for (Future f: readerTasks.keySet()) {
                                int nKeys = f.get();
                        } catch (InterruptedException ie) {
                                " caught " + ie);
                        } catch (ExecutionException ee) {
                                " caught " + ee);

         * Abstract method to check the validation of key supplied by iterator.
        protected abstract void validateKey(K key);

         * Abstract method to abstract how the key is obtained.
        protected abstract Key getKey(K key);

         * Abstract method to create bulk get operation.
        protected abstract InternalOperation generateBulkGetOp
            (List parentKeys, byte[] resumeKey);

        protected boolean close(Throwable reason) {
            if (!super.close(reason)) {
                return false;

            /* Cancel the reader tasks */
            for (Future f : readerTasks.keySet()) {

            List unfinishedBusiness = readerExecutor.shutdownNow();
            if (!unfinishedBusiness.isEmpty()) {
                final int nRemainingTasks = unfinishedBusiness.size();
                           "Bulk get reader executor didn't shutdown cleanly. "+
                           "{0} tasks remaining.", nRemainingTasks);

            unfinishedBusiness = getTaskExecutor().shutdownNow();
            if (!unfinishedBusiness.isEmpty()) {
                final int nRemainingTasks = unfinishedBusiness.size();
                           "Bulk get shard executor didn't shutdown cleanly. "+
                           "{0} tasks remaining.", nRemainingTasks);

            logger.log(Level.INFO, statistics.toString());
            return true;

        public List getPartitionMetrics() {
            return Collections.emptyList();

        public List getShardMetrics() {
            synchronized (shardMetrics) {
                final ArrayList ret =
                    new ArrayList(shardMetrics.size());
                return ret;

        private static int getRequestTimeoutMs(KVStoreImpl storeImpl,
                                               StoreIteratorParams params) {
            final long timeOut = params.getTimeout();
            if (timeOut == 0) {
                return storeImpl.getDefaultRequestTimeoutMs();
            final int requestTimeoutMs =
                PropUtil.durationToMillis(timeOut, params.getTimeoutUnit());
            if (requestTimeoutMs > storeImpl.getReadTimeoutMs()) {
                final String format = "Request timeout parameter: %,d ms " +
                    "exceeds socket read timeout: %,d ms";
                throw new IllegalArgumentException(
                    String.format(format, requestTimeoutMs,
            return requestTimeoutMs;

         * Create the tasks that will read batches of partition keys from
         * their respective shard. The number of tasks per shard is defined by
         * the configuration parameter: maxConcurrentRequest. There is at least
         * one task per shard to ensure that all shards are kept busy during
         * the load.
         * The partitions are divided amongst each shard to ensure that no two
         * tasks ever access the same partition.
        private void createShardExecutor(int maxShardTasks) {
            final String fmt = "createShardExecutor #ShardTasks:%d, " +
                "#Shards:%d, base parallelism per shard:%d, " +
                "residual parallelism per shard:%d";

            final int nShards = topology.getRepGroupMap().size();
            int basePerShardParallelism;
            int residualPerShardParallelism;
            if (maxShardTasks > nShards) {
                basePerShardParallelism = maxShardTasks / nShards;
                residualPerShardParallelism = maxShardTasks % nShards;
            } else {
                basePerShardParallelism = 1;
                residualPerShardParallelism = 0;

  , maxShardTasks, nShards,

            final Map> map =

            for (RepGroupId rgId : topology.getRepGroupIds()) {
                final List list = map.get(rgId);
                final int nParts = list.size();
                int perShardParallelism = basePerShardParallelism;
                if (residualPerShardParallelism > 0) {

                /* Divide up the partitions amongst the tasks. */
                final int basePerTaskPartitions = nParts / perShardParallelism;
                int residualPerTaskPartitions = nParts % perShardParallelism;

                for (int i = 0; i < nParts;  ) {
                    int perTaskPartitions = basePerTaskPartitions;
                    if (residualPerTaskPartitions > 0) {

                    if (perTaskPartitions == 0) {
                        /* More parallelism than partitions in shard. */
                        break doneWithShard;

                    final List taskPartitions =
                        list.subList(i, i + perTaskPartitions);

          "Partitions:" +
                        Arrays.toString(taskPartitions.toArray()) +
                        " assigned to RG task");

                    final ShardGetStream task =
                        new ShardGetStream(rgId, taskPartitions.size());
                    for (PartitionId pid : taskPartitions) {
                        PartitionValues pv = pMap[pid.getPartitionId()];

                    i += perTaskPartitions;

         * Returns the consistency used for this operation.
        private Consistency getConsistency() {
            return (consistency != null) ?
                    consistency : storeImpl.getDefaultConsistency();

         * Returns the total number of threads can be used for bulk get.
        private int getNumOfShardTasks() {
            final int useNumRepNodes;
            if (getConsistency() == Consistency.ABSOLUTE) {
                useNumRepNodes = topology.getRepGroupMap().size();
            } else {
                final int[] readZoneIds =
                useNumRepNodes =
                    TopologyUtil.getNumRepNodesForRead(topology, readZoneIds);
            if (useNumRepNodes == 0) {
                throw new IllegalStateException("Store not yet initialized");
            /* The 2x will keep all RNs busy, with a request in transit to/from
             * the RN and a request being processed.
            return useNumRepNodes * 2;

         * Start key reader tasks
        private void startReaderExecutor(List> parentKeyIterators) {
            final ThreadFactory threadFactory =
                new KVThreadFactory("BulkGetReaders", logger);
            final int nReaders = parentKeyIterators.size();
            readerExecutor =
                Executors.newFixedThreadPool(nReaders, threadFactory);
            readerTasks = new HashMap, KeysReader>(nReaders);

            for (int i = 0; i < nReaders; i++) {
                final Iterator keyIterator = parentKeyIterators.get(i);
                final KeysReader kr = new KeysReader(keyIterator);
                Future future = null;
                try {
                    future = readerExecutor.submit(kr);
                } catch (RejectedExecutionException ree) {
                readerTasks.put(future, kr);

         * Flush all residual values that were queued at their partitions to
         * their respective shards.
        private void flushPartitions()
            throws InterruptedException {

            for (PartitionValues pv : pMap) {
  "Flushed all partitions");

         * Log progress at one minute intervals until all the readers have
         * reached EOF
        private void logReaderProgress(final ExecutorService executor)
            throws InterruptedException {

            final long startMs = System.currentTimeMillis();
            long prevTotalRead = 0;

            while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
                final String fmt = "Reading continues. %,d values read. " +
                    "Throughput:%,d values/sec";
                final long totalRead = totalRead();
                final long throughput = (totalRead * 1000) /
                    (System.currentTimeMillis() - startMs);
                 * Log at warning level if there was no read progress and the
                 * operation appears to have stalled.
                logger.log((totalRead > prevTotalRead) ?
                    Level.INFO : Level.WARNING,
                    String.format(fmt, totalRead, throughput));
                prevTotalRead = totalRead;

         * Total entries read from all readers
        private long totalRead() {
            long totalRead = 0;
            for (KeysReader kr: readerTasks.values()) {
                totalRead += kr.getReadCount();
            return totalRead;

         * Total aggregate statistics
        private void getStatInfo() {
            for (ShardGetStream sgs : getStreams) {
                statistics.batchCount += sgs.getBatchCount();
                statistics.batchQueueUnderflow += sgs.getBatchQueueUnderflow();
                statistics.batchQueueOverflow += sgs.getBatchQueueOverflow();
                if (statistics.maxBatchRequestRepeated <
                        sgs.getMaxBatchRequestRepeated()) {
                    statistics.maxBatchRequestRepeated =

         * The function can be ignored because bulk get doesn't support sorting
        protected int compare(V one, V two) {
            return 0;

         * Dedicated task used to read a specific Key stream
        private class KeysReader implements Callable {

             * The KeyStream being read
            private final Iterator keyIterator;

             * The number of keys read by this stream reader.
            private volatile int readCount = 0;

            KeysReader(Iterator keyIterator) {
                this.keyIterator = keyIterator;

            public Integer call()
                throws Exception {

                final KeySerializer serializer = storeImpl.getKeySerializer();
      "Started keys reader");
                try {
                    K k;
                    while (keyIterator.hasNext()) {
                        k =;
                        if (k == null) {
                            throw new IllegalArgumentException("The parent key" +
                                " should not be null");
                        /* Call validateKey() to check the key */

                        final Key key = getKey(k);
                        final byte[] bytes = serializer.toByteArray(key);
                        final PartitionId pid = topology.getPartitionId(bytes);

                     * Flush partitions and add EOF markers only after all keys
                     * have been generated.  Otherwise, the iterator might
                     * think it was done if it encountered EOFs and some
                     * streams hadn't been added yet.
                    if (remainingReaders.decrementAndGet() <= 0) {
                        for (ShardGetStream sgs: getStreams) {
                } catch (RuntimeException e) {
                } catch (Error e) {
                } finally {
          "Finished keys reader");
                return readCount;

            public int getReadCount() {
                return readCount;

         * Used to hold a sorted list of key byte array. The sorted list
         * ensures locality of reference during reading on the server.
        private class PartitionBatch {
            final PartitionId pid;
            final List entries;

            PartitionBatch(PartitionId pid, List entries) {
       = pid;
                this.entries = entries;

         * Reading records of a single partition.
        public class ShardGetStream extends Stream {
             * The shard associated with this task
            private final RepGroupId rgId;

             *  The queue of batches to be processed by this task.
            private final ArrayBlockingQueue queuedBatchs;

             * The number of batches processed by this task.
            private long batchCount = 0 ;

             * The number of times this task was blocked because it did not have
             * a partition batch to execute. Large numbers of queue underflows
             * indicate that the user input streams are not providing data fast
             * enough and increasing stream parallelism could help.
            private long batchQueueUnderflow = 0 ;

             * The number of times a batch could not be inserted because there
             * was no space in the queue. Large numbers of queue overflows
             * indicate that performance could benefit from increased shard
             * parallelism.
            private long batchQueueOverflows = 0 ;

             * The maximum times a batch of parent keys was processed repeatedly
             * if the batch size is smaller than the total count of result set.
            private int maxBatchRequestRepeated = 0;
            private int batchRequestRepeated = 0;

            private PartitionBatch currentBatch = null;
            private int resumeParentKeyIndex = -1;
            private byte[] resumeKey = null;

            ShardGetStream(RepGroupId rgId, int numTaskPartitions) {
                this.rgId = rgId;
                queuedBatchs = new ArrayBlockingQueue(
                    numTaskPartitions * 2);

            void add(PartitionBatch partBatch)
                throws InterruptedException {

                if (!queuedBatchs.offer(partBatch)) {

            protected void updateDetailedMetrics(long timeInMs,
                                                 long recordCount) {

                final String shardName = rgId.toString();
                DetailedMetricsImpl dmi;

                /* Shard Metrics. */
                synchronized (shardMetrics) {
                    dmi = shardMetrics.get(rgId);
                    if (dmi == null) {
                        dmi = new DetailedMetricsImpl
                            (shardName, timeInMs, recordCount);
                        shardMetrics.put(rgId, dmi);
      , recordCount);

            protected void setResumeKey(Result result) {
                if (result.hasMoreElements()) {
                    if (resumeParentKeyIndex == -1) {
                        resumeParentKeyIndex = result.getResumeParentKeyIndex();
                    } else {
                        resumeParentKeyIndex +=result.getResumeParentKeyIndex();
                    resumeKey = result.getPrimaryResumeKey();
                } else {

            protected Request makeReadRequest() {
                final List keys;
                if (resumeParentKeyIndex == -1) {
                    if (currentBatch == null) {
                        currentBatch = getPartitionBatch();
                        if (currentBatch == null) {
                            return null;
                    keys = currentBatch.entries;

                } else {
                    final List batchKeys = currentBatch.entries;
                    keys = batchKeys.subList(resumeParentKeyIndex,

                final InternalOperation op = generateBulkGetOp(keys, resumeKey);
                return storeImpl.makeReadRequest(op,,
                                                 consistency, timeout,
                                                 timeoutUnit, null);

            protected boolean hasMoreElements(Result result) {
                if (result.hasMoreElements()) {
                    return true;
                currentBatch = getPartitionBatch();
                return (currentBatch != null);

            public String toString() {
                return "ShardGetStream[" + rgId + "]";

            private PartitionBatch getPartitionBatch() {
                try {
                    PartitionBatch pbatch = queuedBatchs.poll();
                    if (pbatch == null) {

                         * TODO: This method will block if the KeysReader
                         * threads have not be able to read enough keys from
                         * the iterators.  That blocking could end up blocking
                         * an async thread, which isn't good.  We should
                         * probably fix this.
                        pbatch = queuedBatchs.take();
                    if (pbatch == partitionBatchEOF) {
                        return null;
                    return pbatch;
                } catch (InterruptedException ie) {
           + " caught " + ie);
                return null;

            private void resetResumeKey() {
                resumeParentKeyIndex = -1;
                resumeKey = null;

            long getBatchCount() {
                return batchCount;

            long getBatchQueueUnderflow() {
                return batchQueueUnderflow;

            long getBatchQueueOverflow() {
                return batchQueueOverflows;

            int getMaxBatchRequestRepeated() {
                return maxBatchRequestRepeated;

            void setEOFPartitionBatch()
                throws InterruptedException {


            private void logMaxBatchRequestRepeated() {
                if (batchRequestRepeated == 0) {
                if (batchRequestRepeated > maxBatchRequestRepeated) {
                    maxBatchRequestRepeated = batchRequestRepeated;
                batchRequestRepeated = 0;

         * The values associated with a specific partition.
        private class PartitionValues {

             * The partition associated with the values.
            private final int partitionId;

             * The task designated to write this partition's values to its
             * shard.
            private ShardGetStream getStream;

             * The number of entries that were actually inserted into the
             * partition.
            private long getCount = 0;

             * The number of duplicated entries that were already existed in
             * the partition.
            private long dupCount = 0;

             * Holds the sorted values that are waiting to be written to the
             * shard. Tried ConcurrentSkipListMap to eliminate use of
             * synchronized methods but it resulted in lower perf on
             * Nashua machines.
            private final Set keys =
                new TreeSet(KEY_BYTES_COMPARATOR);

            private final int threshold;

            PartitionValues(int pid, int partitionThreshold) {
                this.partitionId = pid;
                this.threshold = partitionThreshold;

            void setShardTask(ShardGetStream stream) {
                getStream = stream;

            synchronized void put(byte[] key)
                throws InterruptedException {

                if (keys.contains(key)) {

             * Flushes keys set to batch queue of shard task if needed.
             * A flush is typically done if the number of the keys exceeds the
             * threshold number.
             * @param force if true the partition is flushed even if the
             * threshold has not been reached
            void flush(boolean force)
                throws InterruptedException {

                final int maxRequestSize = 1024 * 1024;

                final String fmt =
                    "Queued Partition %d flushed. Batch size %,d; Total:%,d;" +
                        " Number of keys:%,d; request size:%,d" ;
                int numKeys = keys.size();
                while ((force && numKeys > 0) || (numKeys >= threshold)) {
                    int getBatchCount = 0;
                    int requestSize = 0;
                    final List le = new ArrayList();
                    synchronized (this) {
                        for (Iterator iter = keys.iterator();
                            iter.hasNext();) {

                            final byte[] kvBytes =;
                            requestSize += kvBytes.length;
                            if (requestSize > maxRequestSize) {
                        getCount += getBatchCount;
                    /* Can block, do it outside sync block */
                    final PartitionBatch batch =
                        new PartitionBatch(new PartitionId(partitionId),le);
                    logger.fine(String.format(fmt, partitionId,
                                              getBatchCount, getCount,
                                              keys.size(), requestSize));

         * Represents the aggregate statistics across all keyStreams.
        private class AggregateStatistics {

            private long batchCount;
            private long batchQueueUnderflow;
            private long batchQueueOverflow;
            private int maxBatchRequestRepeated;

             * The total number of entries read from all the streams supplied
             * to the operation, it may be less than total number keys
             * supplied by iterator if contains duplicated keys
            private long readCount ;

            public void aggregate(int entriesRead) {
                readCount += entriesRead;

             * The total number of V actually retrieved from the store
             * as a result of the operation.
            public long totalGetCount() {
                long total = 0;
                for (Entry entry:
                     shardMetrics.entrySet()) {
                    total += entry.getValue().getScanRecordCount();
                return total;

            private long getTotalDupCount() {
                long total = 0;
                for (PartitionValues pv: pMap)  {
                    total += pv.dupCount;
                return total;

            public String toString() {
                final String fmt =
                    "%,d key streams; %,d shard streams; " +
                    "%,d keys read; %,d duplicated; " +
                    "%,d get; %,d batches; " +
                    "%,d batch queue underflows; " +
                    "%,d batch queue overflows; "  +
                    "%,d av batch size; " +
                    "%,d max batch request repeated;";
                final long getCount = totalGetCount();
                return String.format(fmt, readerTasks.size(), getStreams.size(),
                                     readCount, getTotalDupCount(),
                                     getCount, batchCount,
                                     ((batchCount > 0) ?
                                         (getCount / batchCount) : 0),

© 2015 - 2025 Weber Informatics LLC | Privacy Policy