org.infinispan.iteration.impl.DistributedEntryRetriever Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of infinispan-embedded
Infinispan Embedded All-in-One module
There is a newer version: 9.1.7.Final
package org.infinispan.iteration.impl;

import org.infinispan.AdvancedCache;
import org.infinispan.commands.CommandsFactory;
import org.infinispan.commons.CacheException;
import org.infinispan.commons.util.CloseableIterator;
import org.infinispan.commons.util.CollectionFactory;
import org.infinispan.commons.util.concurrent.ParallelIterableMap;
import org.infinispan.container.entries.CacheEntry;
import org.infinispan.container.entries.InternalCacheEntry;
import org.infinispan.context.Flag;
import org.infinispan.distribution.DistributionManager;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.factories.annotations.ComponentName;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.factories.annotations.Start;
import org.infinispan.filter.CollectionKeyFilter;
import org.infinispan.filter.CompositeKeyFilter;
import org.infinispan.filter.CompositeKeyValueFilter;
import org.infinispan.filter.KeyFilter;
import org.infinispan.filter.KeyFilterAsKeyValueFilter;
import org.infinispan.filter.KeyValueFilter;
import org.infinispan.filter.KeyValueFilterAsKeyFilter;
import org.infinispan.filter.KeyValueFilterConverter;
import org.infinispan.lifecycle.ComponentStatus;
import org.infinispan.filter.Converter;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachelistener.annotation.DataRehashed;
import org.infinispan.notifications.cachelistener.annotation.TopologyChanged;
import org.infinispan.notifications.cachelistener.event.DataRehashedEvent;
import org.infinispan.notifications.cachelistener.event.TopologyChangedEvent;
import org.infinispan.persistence.manager.PersistenceManager;
import org.infinispan.remoting.responses.ExceptionResponse;
import org.infinispan.remoting.responses.Response;
import org.infinispan.remoting.rpc.ResponseMode;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.rpc.RpcOptions;
import org.infinispan.remoting.transport.Address;
import org.infinispan.remoting.transport.jgroups.SuspectException;
import org.infinispan.util.concurrent.ConcurrentHashSet;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.AtomicReferenceArray;

import static org.infinispan.factories.KnownComponentNames.REMOTE_COMMAND_EXECUTOR;

/**
 * This is an entry retriever designed to retrieve values for a distributed cache.  This requests entries by segments
 * to further optimize when a rehash occurs to request less data each time.
 * The way this works is when a new entry retriever is acquired it first calculates the remote node that has the
 * most segments and sends a request to it (this is done asynchronously).  Also another thread is spawned off
 * that processes the local data asynchronously.  When either is done (remote sends response) then it will process
 * the entries found and complete all segments that were marked as completed.  If it is a remote invocation then it
 * will send a new remote request to the node that now has the most remaining segments.  If it is local it will
 * complete those segments and stop running, unless a rehash has caused it to regain new local segments.  If a rehash
 * occurs during a remote node processing then those segments will automatically marked as suspect to not complete
 * them.
 * Values retrieved for suspect segments are sent to the iterator and it is noted that they were raised.  When
 * these segments are requested again the noted keys are sent along to reduce value payload size.
 *
 * @author wburns
 * @since 7.0
 */
@Listener
public class DistributedEntryRetriever extends LocalEntryRetriever {
   private final AtomicReference currentHash = new AtomicReference();

   private DistributionManager distributionManager;
   private PersistenceManager persistenceManager;
   private CommandsFactory commandsFactory;
   private Address localAddress;
   private RpcManager rpcManager;
   private ExecutorService remoteExecutorService;

   class IterationStatus {
      final DistributedItr ongoingIterator;
      final SegmentListener segmentListener;
      final KeyValueFilter filter;
      final Converter converter;
      final Set flags;
      final AtomicReferenceArray> processedKeys;

      final AtomicReference awaitingResponseFrom = new AtomicReference<>();
      final AtomicReference localRunning = new AtomicReference<>(LocalStatus.IDLE);

      public IterationStatus(DistributedItr ongoingIterator, SegmentListener segmentListener,
                              KeyValueFilter filter,
                              Converter converter,
                              Set flags, AtomicReferenceArray> processedKeys) {
         this.ongoingIterator = ongoingIterator;
         this.segmentListener = segmentListener;
         this.filter = filter;
         this.converter = converter;
         this.flags = flags;
         this.processedKeys = processedKeys;
      }
   }

   private Map> iteratorDetails = CollectionFactory.makeConcurrentMap();

   // This map keeps track of a listener when it is provided, this is useful to let caller know when a segment is
   // completed so they can do additional optimizations.  This is both used in local and remote iteration processing
   private ConcurrentMap changeListener = CollectionFactory.makeConcurrentMap();

   private enum LocalStatus {
      RUNNING,
      REPEAT,
      IDLE
   }

   public DistributedEntryRetriever(int batchSize, long timeout, TimeUnit unit) {
      super(batchSize, timeout, unit);
   }

   /**
    * We need to listen to data rehash events in case if data moves while we are iterating over it.  This is both
    * important for the originator of the entry retriever request and remote nodes.  If a rehash occurs causing this
    * node to lose a segment and there is something iterating over the data container looking for values of that
    * segment, we can't guarantee that the data has all been seen correctly, so we must therefore suspect that node
    * and subsequently request it again from the new owner later.
    * @param event The data rehash event
    */
   @DataRehashed
   public void dataRehashed(DataRehashedEvent event) {
      ConsistentHash startHash = event.getConsistentHashAtStart();
      ConsistentHash endHash = event.getConsistentHashAtEnd();
      boolean trace = log.isTraceEnabled();
      if (event.isPre() && startHash != null && endHash != null) {
         log.tracef("Data rehash occurring startHash: %s and endHash: %s", startHash, endHash);

         if (!changeListener.isEmpty()) {
            if (trace) {
               log.tracef("Previous segments %s ", startHash.getPrimarySegmentsForOwner(localAddress));
               log.tracef("After segments %s ", endHash.getPrimarySegmentsForOwner(localAddress));
            }
            // we don't care about newly added segments, since that means our run wouldn't include them anyways
            Set beforeSegments = new HashSet(startHash.getPrimarySegmentsForOwner(localAddress));
            // Now any that were there before but aren't there now should be added - we don't care about new segments
            // since our current request shouldn't be working on it - it will have to retrieve it later
            beforeSegments.removeAll(endHash.getPrimarySegmentsForOwner(localAddress));
            if (!beforeSegments.isEmpty()) {
               // We have to make sure all current listeners get the newest hashes updated.  This has to occur for
               // new nodes and nodes leaving as the hash segments will change in both cases.
               for (Map.Entry entry : changeListener.entrySet()) {
                  if (trace) {
                     log.tracef("Notifying %s through SegmentChangeListener", entry.getKey());
                  }
                  entry.getValue().changedSegments(beforeSegments);
               }
            } else if (trace) {
               log.tracef("No segments have been removed from data rehash, no notification required");
            }
         }
      }
   }

   /**
    * We need to listen for topology change events.  This is important for the originator so it can know when a node
    * goes down that it needs to now send the new request to the next remote node.  Also if the originator has
    * acquired some of the segments
    * @param event The topology change event
    */
   @TopologyChanged
   public void topologyChanged(TopologyChangedEvent event) {
      if (event.isPre()) {
         ConsistentHash beforeHash = event.getConsistentHashAtStart();
         ConsistentHash afterHash = event.getConsistentHashAtEnd();

         currentHash.set(afterHash);
         boolean trace = log.isTraceEnabled();

         if (beforeHash != null && afterHash != null) {
            if (trace) {
               log.tracef("Rehash hashes before %s after %s", beforeHash, afterHash);
            }
            Set leavers = new HashSet(beforeHash.getMembers());
            leavers.removeAll(afterHash.getMembers());
            if (!leavers.isEmpty() && trace) {
               log.tracef("Found leavers are %s", leavers);
            }

            for (Map.Entry> details : iteratorDetails.entrySet()) {
               UUID identifier = details.getKey();
               final IterationStatus status = details.getValue();
               Set remoteSegments = findMissingRemoteSegments(status.processedKeys, afterHash);
               if (!remoteSegments.isEmpty()) {
                  Map.Entry> route = findOptimalRoute(remoteSegments, afterHash);
                  boolean sendRequest;
                  AtomicReference awaitingResponsefrom = status.awaitingResponseFrom;
                  Address waitingFor = awaitingResponsefrom.get();
                  // If the node we are waiting from a response from has gone down we have to resubmit it - note we just
                  // call sendRequest without checking awaitingResponseFrom
                  if (sendRequest = leavers.contains(waitingFor)) {
                     if (trace) {
                        log.tracef("Resending new segment request %s for identifier %s since node %s has gone down",
                                   route.getValue(), identifier, waitingFor);
                     }
                  } else if (sendRequest = (waitingFor == null && awaitingResponsefrom.compareAndSet(null, route.getKey()))) {
                     // This clause is in case if we finished all remote segment retrievals and now we need to send
                     // a new one due to rehash
                     if (trace) {
                        log.tracef("There is no pending remote request for identifier %s, sending new one for segments %s",
                                   identifier, route.getValue());
                     }
                  }
                  if (sendRequest) {
                     if (status.ongoingIterator != null) {
                        // We don't have to call the eventuallySendRequest, because if the node we are sending to
                        // is now gone we will get another topology update and retry again - also this is async
                        // so we aren't blocking during an update
                        sendRequest(false, route, identifier, status);
                     } else {
                        // Just in case if we did the putIfAbsent to free up reference if the iterator was shutdown
                        awaitingResponsefrom.set(null);
                        if (trace) {
                           log.tracef("Not sending request since iterator has been closed for identifier %s", identifier);
                        }
                     }
                  }
               } else {
                  // If we get in here it means that all remaining segments are local - so we aren't waiting
                  // for a response any longer
                  details.getValue().awaitingResponseFrom.set(null);
               }


               Set processSegments = findMissingLocalSegments(status.processedKeys, afterHash);
               if (!processSegments.isEmpty()) {
                  if (trace) {
                     log.tracef("Rehash caused our local node to acquire new segments %s for iteration %s processing",
                                processSegments, identifier);
                  }

                  startRetrievingValuesLocal(identifier, processSegments, status, new SegmentBatchHandler() {
                     @Override
                     public void handleBatch(UUID identifier, boolean complete, Set completedSegments, Set inDoubtSegments, Collection> entries) {
                        processData(identifier, localAddress, completedSegments, inDoubtSegments, entries);
                     }

                     @Override
                     public void handleException(CacheException e) {
                        status.ongoingIterator.close(e);;
                     }
                  });
               }
            }
         }
      }
   }

   @Inject
   public void initialize(DistributionManager distributionManager,
                          PersistenceManager persistenceManager, CommandsFactory commandsFactory,
                          RpcManager rpcManager,
                          @ComponentName(REMOTE_COMMAND_EXECUTOR) ExecutorService remoteExecutorService) {
      this.distributionManager = distributionManager;
      this.persistenceManager = persistenceManager;
      this.commandsFactory = commandsFactory;
      this.rpcManager = rpcManager;
      this.remoteExecutorService = remoteExecutorService;
   }

   @Start
   public void start() {
      super.start();
      cache.addListener(this);
      localAddress = rpcManager.getAddress();
   }

   @Override
   public  void startRetrievingValues(final UUID identifier, final Address origin, final Set segments,
                                        KeyValueFilter filter,
                                        Converter converter, Set flags) {
      if (log.isTraceEnabled()) {
         log.tracef("Received entry request for %s from node %s for segments %s", identifier, origin, segments);
      }

      wireFilterAndConverterDependencies(filter, converter);

      startRetrievingValues(identifier, segments, filter, converter, flags, new SegmentBatchHandler() {
         @Override
         public void handleBatch(UUID identifier, boolean complete, Set completedSegments,
                                 Set inDoubtSegments, Collection> entries) {
            if (cache.getStatus() != ComponentStatus.RUNNING) {
               if (log.isTraceEnabled()) {
                  log.tracef("Cache status is no longer running, all segments are now suspect");
               }
               inDoubtSegments.addAll(completedSegments);
               completedSegments.clear();
            }
            if (log.isTraceEnabled()) {
               log.tracef("Sending batch response for %s to origin %s with %s completed segments, %s in doubt segments and %s values",
                          identifier, origin, completedSegments, inDoubtSegments, entries.size());
            }

            EntryResponseCommand command = commandsFactory.buildEntryResponseCommand(identifier, completedSegments,
                                                                                     inDoubtSegments, entries, null);
            rpcManager.invokeRemotely(Collections.singleton(origin), command, rpcManager.getRpcOptionsBuilder(
                  ResponseMode.SYNCHRONOUS).timeout(Long.MAX_VALUE, TimeUnit.SECONDS).build());
         }

         @Override
         public void handleException(CacheException e) {
            EntryResponseCommand command = commandsFactory.buildEntryResponseCommand(identifier, null, null, null, e);
            rpcManager.invokeRemotely(Collections.singleton(origin), command, rpcManager.getRpcOptionsBuilder(
                  ResponseMode.SYNCHRONOUS).timeout(Long.MAX_VALUE, TimeUnit.SECONDS).build());
         }
      });
   }

   private  void startRetrievingValues(final UUID identifier, final Set segments,
                                         final KeyValueFilter filter,
                                         final Converter converter,
                                         final Set flags, final SegmentBatchHandler handler) {
      ConsistentHash hash = getCurrentHash();
      final Set inDoubtSegments = new HashSet<>(segments.size());
      boolean canTryProcess = false;
      Iterator iter = segments.iterator();
      while (iter.hasNext()) {
         Integer segment = iter.next();
         // If we still own any segments try to process
         if (localAddress.equals(hash.locatePrimaryOwnerForSegment(segment))) {
            canTryProcess = true;
         } else {
            inDoubtSegments.add(segment);
            iter.remove();
         }
      }
      if (canTryProcess) {
         executorService.execute(new Runnable() {

            @Override
            public void run() {
               Set segmentsToUse = segments;
               Set inDoubtSegmentsToUse = inDoubtSegments;
               ConsistentHash hashToUse = getCurrentHash();
               // this will stay as true for a local invocation until all local segments have been processed
               // a non local will set this to false at the end every time
               boolean repeat = true;
               while (repeat) {
                  if (log.isTraceEnabled()) {
                     log.tracef("Starting retrieval of values for identifier %s", identifier);
                  }
                  SegmentChangeListener segmentChangeListener = new SegmentChangeListener();
                  changeListener.put(identifier, segmentChangeListener);
                  try {
                     final Set processedKeys = CollectionFactory.makeSet(keyEquivalence);
                     Queue> queue = new ConcurrentLinkedQueue>() {
                        @Override
                        public boolean add(CacheEntry kcEntry) {
                           processedKeys.add(kcEntry.getKey());
                           return super.add(kcEntry);
                        }
                     };
                     ParallelIterableMap.KeyValueAction> action =
                           new MapAction(identifier, segmentsToUse, inDoubtSegmentsToUse, batchSize, converter, handler,
                                         queue);

                     PassivationListener listener = null;
                     long currentTime = timeService.wallClockTime();
                     try {
                        for (InternalCacheEntry entry : dataContainer) {
                           if (!entry.isExpired(currentTime)) {
                              InternalCacheEntry clone = entryFactory.create(unwrapMarshalledvalue(entry.getKey()),
                                                                                   unwrapMarshalledvalue(entry.getValue()), entry);
                              K key = clone.getKey();
                              if (filter != null) {
                                 if (converter == null && filter instanceof KeyValueFilterConverter) {
                                    C converted = ((KeyValueFilterConverter)filter).filterAndConvert(
                                          key, clone.getValue(), clone.getMetadata());
                                    if (converted != null) {
                                       clone.setValue((V) converted);
                                    } else {
                                       continue;
                                    }
                                 }
                                 else if (!filter.accept(key, clone.getValue(), clone.getMetadata())) {
                                    continue;
                                 }
                              }
                              action.apply(key, clone);
                           }
                        }
                        if (shouldUseLoader(flags) && persistenceManager.getStoresAsString().size() > 0) {
                           KeyFilter loaderFilter;
                           if (passivationEnabled) {
                              listener = new PassivationListener();
                              cache.addListener(listener);
                           }
                           if (filter == null || converter == null && filter instanceof KeyValueFilterConverter) {
                              loaderFilter = new CompositeKeyFilter(new SegmentFilter(hashToUse, segmentsToUse),
                                                                       // We rely on this keeping a reference and not copying
                                                                       // contents
                                                                       new CollectionKeyFilter(processedKeys));
                           } else {
                              loaderFilter = new CompositeKeyFilter(new SegmentFilter(hashToUse, segmentsToUse),
                                                                       new CollectionKeyFilter(processedKeys),
                                                                       new KeyValueFilterAsKeyFilter(filter));
                           }
                           if (converter == null && filter instanceof KeyValueFilterConverter) {
                              action = new MapAction(identifier, segmentsToUse, inDoubtSegmentsToUse, batchSize, (KeyValueFilterConverter) filter, handler, queue);
                           }
                           persistenceManager.processOnAllStores(withinThreadExecutor, loaderFilter,
                                                                 new KeyValueActionForCacheLoaderTask(action), true, true);
                        }
                     } finally {
                        if (listener != null) {
                           cache.removeListener(listener);
                           AdvancedCache advancedCache = cache.getAdvancedCache();
                           // Now we have to check all the activated keys, as it is possible it got promoted to the
                           // in memory data container after we would have seen it
                           for (K key : listener.activatedKeys) {
                              // If we didn't process it already we have to look it up
                              if (!processedKeys.contains(key)) {
                                 CacheEntry entry = advancedCache.getCacheEntry(key);
                                 if (entry != null) {
                                    // We don't want to modify the entry itself
                                    CacheEntry clone = entry.clone();
                                    if (filter != null) {
                                       if (converter == null && filter instanceof KeyValueFilterConverter) {
                                          C converted = ((KeyValueFilterConverter)filter).filterAndConvert(
                                                key, clone.getValue(), clone.getMetadata());
                                          if (converted != null) {
                                             clone.setValue((V) converted);
                                          } else {
                                             continue;
                                          }
                                       }
                                       else if (!filter.accept(key, clone.getValue(), clone.getMetadata())) {
                                          continue;
                                       }
                                    }
                                    action.apply(clone.getKey(), clone);
                                 }
                              }
                           }
                        }
                     }
                     Set completedSegments = new HashSet();
                     for (Integer segment : segmentsToUse) {
                        if (localAddress.equals(getCurrentHash().locatePrimaryOwnerForSegment(segment)) &&
                              !segmentChangeListener.changedSegments.contains(segment)) {
                           // this segment should be complete then.
                           completedSegments.add(segment);
                        } else {
                           inDoubtSegmentsToUse.add(segment);
                        }
                     }
                     // No type to work around generics in sub sub types :)
                     Collection entriesToSend = new ArrayList<>(queue);

                     handler.handleBatch(identifier, true, completedSegments, inDoubtSegmentsToUse, entriesToSend);
                     if (log.isTraceEnabled()) {
                        log.tracef("Completed data iteration for request %s with segments %s", identifier, segmentsToUse);
                     }
                  } catch (Throwable t) {
                     CacheException e = log.exceptionProcessingEntryRetrievalValues(t);
                     handler.handleException(e);
                  } finally {
                     changeListener.remove(identifier);
                  }
                  repeat = shouldRepeatApplication(identifier);
                  if (repeat) {
                     // Only local would ever go into here
                     hashToUse = getCurrentHash();
                     IterationStatus status = iteratorDetails.get(identifier);
                     if (status != null) {
                        segmentsToUse = findMissingLocalSegments(status.processedKeys, hashToUse);
                        inDoubtSegmentsToUse.clear();

                        if (log.isTraceEnabled()) {
                           if (!segmentsToUse.isEmpty()) {
                                 log.tracef("Local retrieval found it should rerun - now finding segments %s for identifier %s",
                                            segmentsToUse, identifier);
                           } else {
                              log.tracef("Local retrieval for identifier %s was told to rerun - however no new segments " +
                                               "were found, looping around to try again", identifier);
                           }
                        }
                     } else {
                        log.tracef("Not repeating local retrieval since iteration was completed");
                        repeat = false;
                     }
                  } else {
                     if (log.isTraceEnabled()) {
                        log.tracef("Completed request %s for segments %s", identifier, segmentsToUse);
                     }
                     repeat = false;
                  }
               }
            }
         });
      } else {
         if (log.isTraceEnabled()) {
            log.tracef("Our node no longer has any of the segments %s that were requested for %s", inDoubtSegments,
                       identifier);
         }
         executorService.execute(new Runnable() {

            @Override
            public void run() {
               // If we don't have any of those segments just send back a response saying they are suspect with no values
               Collection> emptyEntries = Collections.emptySet();
               handler.handleBatch(identifier, true, segments, inDoubtSegments, emptyEntries);
            }
         });
      }
   }

   private  void startRetrievingValuesLocal(final UUID identifier, final Set segments,
                                                          IterationStatus status,
                                                          final SegmentBatchHandler handler) {
      boolean shouldRun = updatedLocalAndRun(identifier);
      if (shouldRun) {
         if (log.isTraceEnabled()) {
            log.tracef("Starting local request to retrieve segments %s for identifier %s", segments, identifier);
         }
         startRetrievingValues(identifier, segments, status.filter, status.converter, status.flags, handler);
      } else if (log.isTraceEnabled()) {
         log.tracef("Not running local retrieval as another thread is handling it for identifier %s.", identifier);
      }
   }

   @Override
   public  CloseableIterator> retrieveEntries(KeyValueFilter filter,
                                                    Converter converter,
                                                    Set flags,
                                                    SegmentListener listener) {
      // If we are marked as local don't process distributed entries
      if (flags != null && flags.contains(Flag.CACHE_MODE_LOCAL)) {
         log.trace("Skipping distributed entry retrieval and processing local only as CACHE_MODE_LOCAL flag was set");
         return super.retrieveEntries(filter, converter, flags, listener);
      }

      ConsistentHash hash = getCurrentHash();
      // If we aren't in the hash then just run the command locally
      if (!hash.getMembers().contains(localAddress)) {
         log.trace("Skipping distributed entry retrieval and processing local since we are not part of the consistent hash");
         return super.retrieveEntries(filter, converter, flags, listener);
      }

      UUID identifier = UUID.randomUUID();
      final Converter usedConverter = checkForKeyValueFilterConverter(filter,
                                                                                                         converter);
      if (log.isTraceEnabled()) {
         log.tracef("Processing entry retrieval request with identifier %s with filter %s and converter %s", identifier,
                    filter, usedConverter);
      }

      DistributedItr itr = new DistributedItr<>(batchSize, identifier, listener, hash);
      registerIterator(itr, flags);
      Set remoteSegments = new HashSet<>();
      AtomicReferenceArray> processedKeys = new AtomicReferenceArray>(hash.getNumSegments());
      for (int i = 0; i < processedKeys.length(); ++i) {
         // Normally we only work on a single segment per thread.  But since there is an edge case where
         // a node that has left can still send a response, we need this to be a CHS.
         processedKeys.set(i, new ConcurrentHashSet());
         remoteSegments.add(i);
      }

      final IterationStatus status = new IterationStatus<>(itr, listener, filter, usedConverter, flags, processedKeys);
      iteratorDetails.put(identifier, status);

      Set ourSegments = hash.getPrimarySegmentsForOwner(localAddress);
      remoteSegments.removeAll(ourSegments);
      if (!remoteSegments.isEmpty()) {
         eventuallySendRequest(identifier, status);
      }
      if (!ourSegments.isEmpty()) {
         wireFilterAndConverterDependencies(filter, usedConverter);
         startRetrievingValuesLocal(identifier, ourSegments, status, new SegmentBatchHandler() {
            @Override
            public void handleBatch(UUID identifier, boolean complete, Set completedSegments, Set inDoubtSegments, Collection> entries) {
               processData(identifier, localAddress, completedSegments, inDoubtSegments, entries);
            }

            @Override
            public void handleException(CacheException e) {
               status.ongoingIterator.close(e);
            }
         });
      }
      return itr;
   }

   private ConsistentHash getCurrentHash() {
      ConsistentHash hash = currentHash.get();
      if (hash == null) {
         currentHash.compareAndSet(null, distributionManager.getReadConsistentHash());
         hash = currentHash.get();
      }
      return hash;
   }

   private  boolean eventuallySendRequest(UUID identifier, IterationStatus status) {
      boolean sent = false;
      while (!sent) {
         // This means our iterator was closed explicitly
         if (!iteratorDetails.containsKey(identifier)) {
            if (log.isTraceEnabled()) {
               log.tracef("Cannot send remote request as our iterator was concurrently closed for %s", identifier);
            }
            return false;
         }

         ConsistentHash hash = getCurrentHash();
         Set missingRemoteSegments = findMissingRemoteSegments(status.processedKeys, hash);
         if (!missingRemoteSegments.isEmpty()) {
            Map.Entry> route = findOptimalRoute(missingRemoteSegments, hash);

            // If another request came in we don't want to keep on trying.  This could happen if a rehash caused
            // our existing node request to go away.
            if (status.awaitingResponseFrom.compareAndSet(null, route.getKey())) {
               sent = sendRequest(true, route, identifier, status);
            } else {
               break;
            }
         } else {
            if (log.isTraceEnabled()) {
               log.tracef("Cannot send remote request as there are no longer any remote segments missing for %s", identifier);
            }
            break;
         }
      }
      return sent;
   }

   private  boolean sendRequest(boolean sync, Map.Entry> route, final UUID identifier,
                                   IterationStatus status) {
      if (log.isTraceEnabled()) {
         log.tracef("Sending request to %s for identifier %s", route, identifier);
      }
      Address address = route.getKey();
      status.awaitingResponseFrom.set(address);
      Set segments = route.getValue();
      Set keysToFilter = new HashSet();

      AtomicReferenceArray> ourEntries = status.processedKeys;
      for (Integer segment : segments) {
         Set valuesSeen = ourEntries.get(segment);
         if (valuesSeen != null) {
            keysToFilter.addAll(valuesSeen);
         }
      }
      KeyValueFilter filterToSend;
      if (status.filter == null) {
         if (!keysToFilter.isEmpty()) {
            if (log.isTraceEnabled()) {
               log.tracef("Applying filter for %s of keys", keysToFilter.size());
            }
            filterToSend = new KeyFilterAsKeyValueFilter(new CollectionKeyFilter(keysToFilter));
         } else {
            if (log.isTraceEnabled()) {
               log.trace("No filter applied");
            }
            filterToSend = null;
         }
      } else {
         if (!keysToFilter.isEmpty()) {
            if (log.isTraceEnabled()) {
               log.tracef("Applying filter for %s of keys with provided filter %s" , keysToFilter.size(), status.filter);
            }
            filterToSend = new CompositeKeyValueFilter(
                  new KeyFilterAsKeyValueFilter(new CollectionKeyFilter(keysToFilter)), status.filter);
         } else {
            if (log.isTraceEnabled()) {
               log.tracef("Using provided filter %s", status.filter);
            }
            filterToSend = status.filter;
         }
      }

      EntryRequestCommand command = commandsFactory.buildEntryRequestCommand(identifier, segments,
                                                                                      filterToSend, status.converter,
                                                                                      status.flags);
      try {
         // We don't want async with sync marshalling as we don't want the extra overhead time
         RpcOptions options = rpcManager.getRpcOptionsBuilder(sync ? ResponseMode.SYNCHRONOUS :
                                                                    ResponseMode.ASYNCHRONOUS).build();
         Map responseMap = rpcManager.invokeRemotely(Collections.singleton(address), command, options);
         if (sync) {
            Response response = responseMap.values().iterator().next();
            if (!response.isSuccessful()) {
               Throwable cause = response instanceof ExceptionResponse ? ((ExceptionResponse) response).getException() : null;
               if (log.isTraceEnabled()) {
                  log.tracef(cause, "Unsuccessful response received from node %s for %s, must resend to a new node!",
                             route.getKey(), identifier);
               }
               atomicRemove(status.awaitingResponseFrom, address);
               return false;
            }
         }
         return true;
      } catch (SuspectException e) {
         if (log.isTraceEnabled()) {
            log.tracef("Request to %s for %s was suspect, must resend to a new node!", route, identifier);
         }
         atomicRemove(status.awaitingResponseFrom, address);
         return false;
      }
   }

   private Set findMissingLocalSegments(AtomicReferenceArray> processValues, ConsistentHash hash) {
      Set ourSegments = hash.getPrimarySegmentsForOwner(localAddress);
      Set returnSegments = new HashSet<>();
      for (Integer segment : ourSegments) {
         if (processValues.get(segment) != null) {
            returnSegments.add(segment);
         }
      }

      return returnSegments;
   }

   private boolean updatedLocalAndRun(UUID identifier) {
      boolean shouldRun = false;
      boolean updated = false;
      IterationStatus details = iteratorDetails.get(identifier);
      if (details != null) {
         AtomicReference localRunning = details.localRunning;
         while (!updated) {
            LocalStatus status = localRunning.get();
            // Just ignore a null status
            if (status == null) {
               updated = true;
            }
            switch (status) {
               case IDLE:
                  // If idle we try to update to running which means we should fire it off
                  updated = shouldRun = localRunning.compareAndSet(LocalStatus.IDLE, LocalStatus.RUNNING);
                  break;
               case REPEAT:
                  // If is repeat then we don't worry since it will have to be repeated still
                  updated = true;
                  break;
               case RUNNING:
                  // If it is running try to set to repeat to make sure they know about the new segments
                  updated = localRunning.compareAndSet(LocalStatus.RUNNING, LocalStatus.REPEAT);
                  break;
            }
         }
      }
      return shouldRun;
   }

   private boolean shouldRepeatApplication(UUID identifier) {
      boolean shouldRun = false;
      boolean updated = false;
      IterationStatus details = iteratorDetails.get(identifier);
      if (details != null) {
         AtomicReference localRunning = details.localRunning;
         while (!updated) {
            LocalStatus status = localRunning.get();
            if (status == null) {
               throw new IllegalStateException("Status should never be null");
            } else {
               switch (status) {
                  case IDLE:
                     throw new IllegalStateException("This should never be seen as IDLE by the running thread");
                  case REPEAT:
                     updated = shouldRun = localRunning.compareAndSet(LocalStatus.REPEAT, LocalStatus.RUNNING);
                     break;
                  case RUNNING:
                     updated = localRunning.compareAndSet(LocalStatus.RUNNING, LocalStatus.IDLE);
                     break;
               }
            }
         }
      }
      return shouldRun;
   }

   private boolean missingRemoteSegment(AtomicReferenceArray> processValues, ConsistentHash hash) {
      boolean missingRemote = false;
      if (processValues != null) {
         Set localSegments = hash.getPrimarySegmentsForOwner(localAddress);
         for (int i = 0; i < processValues.length(); ++i) {
            if (processValues.get(i) != null) {
               if (!localSegments.contains(i)) {
                  missingRemote = true;
                  break;
               }
            }
         }
      }
      return missingRemote;
   }

   private Set findMissingRemoteSegments(AtomicReferenceArray> processValues, ConsistentHash hash) {
      Set localSegments = hash.getPrimarySegmentsForOwner(localAddress);
      Set segments = new HashSet<>();
      if (processValues != null) {
         for (int i = 0; i < processValues.length(); ++i) {
            if (processValues.get(i) != null) {
               if (!localSegments.contains(i)) {
                  segments.add(i);
               }
            }
         }
      }
      return segments;
   }

   /**
    * Finds the address with the most amount of segments to request and returns it - note this will never return
    * the local address
    * @param segmentsToFind The segments to find or null if all segments are desired
    * @return
    */
   private Map.Entry> findOptimalRoute(Set segmentsToFind, ConsistentHash hash) {
      Map.Entry> route = null;
      Map> routes;
      int segmentCount = hash.getNumSegments();
      routes = new HashMap<>();
      for (int i = 0; i < segmentCount; ++i) {
         if (segmentsToFind == null || segmentsToFind.contains(i)) {
            Address address = hash.locatePrimaryOwnerForSegment(i);
            Set segments = routes.get(address);
            if (segments == null) {
               segments = new HashSet<>();
               routes.put(address, segments);
            }
            segments.add(i);
         }
      }
      for (Map.Entry> mappedRoute : routes.entrySet()) {
         if (mappedRoute.getKey().equals(localAddress)) {
            continue;
         }
         if (route == null) {
            route = mappedRoute;
         } else if (route.getValue().size() > mappedRoute.getValue().size()) {
            route = mappedRoute;
         }
      }
      return route;
   }


   @Override
   public  void receiveResponse(UUID identifier, Address origin, Set completedSegments,
                                   Set inDoubtSegments, Collection> entries, CacheException e) {
      if (log.isTraceEnabled()) {
         log.tracef("Processing response for identifier %s", identifier);
      }
      
      if (e != null) {
         log.tracef("Response for identifier %s contained exception", identifier, e);
      } else {
         try {
            processData(identifier, origin, completedSegments, inDoubtSegments, entries);
         } catch (Throwable t) {
            e = log.exceptionProcessingIteratorResponse(identifier, e);
         }
      }
      if (e != null) {
         IterationStatus status = iteratorDetails.get(identifier);
         if (status != null) {
            status.ongoingIterator.close(e);
         }
      }
   }

   /**
    * This method is only called on the originator node to process values either retrieved remotely or locally.
    * After processing data this method then determines if it needs to send another request to another remote node
    * and also if it needs to do another local data mine in case if the topology changed.
    * @param origin Where the data request came from
    * @param identifier The unique identifier for this iteration request
    * @param completedSegments The segments that were completed
    * @param inDoubtSegments The segments that were found to be in doubt due to a rehash while iterating over them
    * @param entries The entries retrieved
    * @param  The type for the entries value
    */
   private  void processData(final UUID identifier, Address origin, Set completedSegments, Set inDoubtSegments,
                            Collection> entries) {
      final IterationStatus status = (IterationStatus) iteratorDetails.get(identifier);
      // This is possible if the iterator was closed early or we had duplicate requests due to a rehash.
      if (status != null) {
         final AtomicReferenceArray> processedKeys = status.processedKeys;

         final DistributedItr itr = status.ongoingIterator;
         if (log.isTraceEnabled()) {
            log.tracef("Processing data for identifier %s completedSegments: %s inDoubtSegments: %s entryCount: %s", identifier,
                       completedSegments, inDoubtSegments, entries.size());
         }
         // Normally we shouldn't have duplicates, but rehash can cause that
         Collection> nonDuplicateEntries = new ArrayList<>(entries.size());
         Map> finishedKeysForSegment = new HashMap<>();
         // We have to put the empty hash set, or else segments with no values would complete
         for (int completedSegment : completedSegments) {
            // Only notify segments that have completed once! Technically this can still occur twice, since the
            // segments aren't completed until later, but this happening is not an issue since we only raise a key once,
            // but this is here to reduce tracing output and false positives in tests.
            if (processedKeys.get(completedSegment) != null) {
               finishedKeysForSegment.put(completedSegment, new ConcurrentHashSet());
            }
         }
         // We need to keep track of what we have seen in case if they become in doubt
         ConsistentHash hash = getCurrentHash();
         for (CacheEntry entry : entries) {
            K key = entry.getKey();
            int segment = hash.getSegment(key);
            Set seenSet = processedKeys.get(segment);
            // If the set is null means that this segment was already finished... so don't worry about those values
            if (seenSet != null) {
               // If we already saw the value don't raise it again
               if (seenSet.add(key)) {
                  ConcurrentHashSet finishedKeys = finishedKeysForSegment.get(segment);
                  if (finishedKeys != null) {
                     finishedKeys.add(key);
                  }
                  nonDuplicateEntries.add(entry);
               }
            }
         }

         itr.addKeysForSegment(finishedKeysForSegment);

         try {
            itr.addEntries(nonDuplicateEntries);
         } catch (InterruptedException e) {
            if (log.isTraceEnabled()) {
               // If we were interrupted then just shut down this processing completely
               log.tracef("Iteration thread was interrupted, stopping iteration for identifier %s", identifier);
            }
            completeIteration(identifier);
         }

         // We complete the segments after setting the entries
         if (!completedSegments.isEmpty()) {
            if (log.isTraceEnabled()) {
               log.tracef("Completing segments %s for identifier %s", completedSegments, identifier);
            }
            for (Integer completeSegment : completedSegments) {
               // Null out the set saying we completed this segment
               processedKeys.set(completeSegment, null);
            }
         }

         // If we are finished we need to request the next segments - currently the indoubt and completed are
         // sent at the end
         // Also don't check completion if we know we are waiting for another node to respond
         if (!completedSegments.isEmpty() || !inDoubtSegments.isEmpty()) {
            boolean complete = true;
            // We have to use the same has for both local and remote just in case - note both will check the updated
            // hash later
            hash = getCurrentHash();

            boolean isMissingRemoteSegments = missingRemoteSegment(processedKeys, hash);
            if (isMissingRemoteSegments) {
               if (log.isTraceEnabled()) {
                  // Note if a rehash occurs here and all our segments become local this could be an empty set
                  log.tracef("Request %s not yet complete, remote segments %s are still missing", identifier,
                             findMissingRemoteSegments(processedKeys, hash));
               }
               complete = false;
               if (origin != localAddress) {
                  // Only perform if the awaitingResponse is still not null, which is our current response.  If it
                  // is null that means the iterator was closed, if it was non null means this node went down while
                  // processing response
                  if (atomicRemove(status.awaitingResponseFrom, origin)) {
                     if (log.isTraceEnabled()) {
                        log.tracef("Sending request for %s via remote transport thread", identifier);
                     }
                     remoteExecutorService.submit(new Runnable() {
                        @Override
                        public void run() {
                           eventuallySendRequest(identifier, status);
                        }
                     });
                  } else if (log.isTraceEnabled()) {
                     log.tracef("Not sending new remote request as %s was either stopped or %s went down", identifier,
                                origin);
                  }
               }
            } else if (origin != localAddress) {
               // If we don't have another node to send to mark the response as no longer required
               status.awaitingResponseFrom.set(null);

               remoteExecutorService.submit(new Runnable() {
                  @Override
                  public void run() {
                     // We have to keep trying until either there are no more missing segments or we have sent a request
                     while (missingRemoteSegment(processedKeys, getCurrentHash()) && iteratorDetails.containsKey(identifier)) {
                        if (!eventuallySendRequest(identifier, status)) {
                           // We couldn't send a remote request, so remove the awaitingResponse and make sure there
                           // are no more missing remote segments
                           status.awaitingResponseFrom.set(null);
                        } else {
                           // This means we successfully sent the request so our job is done!
                           break;
                        }
                     }
                  }
               });
            }

            Set localSegments = findMissingLocalSegments(processedKeys, hash);
            if (!localSegments.isEmpty()) {
               if (log.isTraceEnabled()) {
                  log.tracef("Request %s not yet complete, local segments %s are still missing", identifier, localSegments);
               }
               complete = false;
               // Have the local request check it's values again
               startRetrievingValuesLocal(identifier, localSegments, status, new SegmentBatchHandler() {
                  @Override
                  public void handleBatch(UUID identifier, boolean complete, Set completedSegments, Set inDoubtSegments, Collection> entries) {
                     processData(identifier, localAddress, completedSegments, inDoubtSegments, entries);
                  }

                  @Override
                  public void handleException(CacheException e) {
                     itr.close(e);
                  }
               });
            }

            if (complete) {
               completeIteration(identifier);
            }
         }
      } else if (log.isTraceEnabled()) {
         log.tracef("Ignoring values as identifier %s was marked as complete", identifier);
      }
   }

   private static  boolean atomicRemove(AtomicReference ref, V object) {
      V refObject = ref.get();
      if (object.equals(refObject)) {
         return ref.compareAndSet(refObject, null);
      } else {
         return false;
      }
   }

   private void completeIteration(UUID identifier) {
      if (log.isTraceEnabled()) {
         log.tracef("Processing complete for identifier %s", identifier);
      }
      IterationStatus status = iteratorDetails.get(identifier);
      if (status != null) {
         Itr itr = status.ongoingIterator;
         partitionListener.iterators.remove(itr);
         itr.close();
      }
   }

   protected class DistributedItr extends Itr {
      private final UUID identifier;
      private final ConsistentHash hash;
      private final ConcurrentMap> keysNeededToComplete = new ConcurrentHashMap<>();
      private final SegmentListener segmentListener;

      public DistributedItr(int batchSize, UUID identifier, SegmentListener segmentListener, ConsistentHash hash) {
         super(batchSize);
         this.identifier = identifier;
         this.hash = hash;
         this.segmentListener = segmentListener;
      }

      @Override
      public CacheEntry next() {
         CacheEntry entry = super.next();
         K key = entry.getKey();
         int segment = hash.getSegment(key);
         Set keys = keysNeededToComplete.get(segment);
         if (keys != null) {
            keys.remove(key);
            if (keys.isEmpty()) {
               notifyListenerCompletedSegment(segment, true);
            }
         }
         return entry;
      }

      private void notifyListenerCompletedSegment(int segment, boolean fromIterator) {
         if (segmentListener != null) {
            if (log.isTraceEnabled()) {
               log.tracef("Notifying listener of segment %s being completed for %s", segment, identifier);
            }
            segmentListener.segmentTransferred(segment, fromIterator);
         }
      }

      public void addKeysForSegment(Map> keysForSegment) {
         for (Map.Entry> entry : keysForSegment.entrySet()) {
            Set values = entry.getValue();
            // If it is empty just notify right away
            if (values.isEmpty()) {
               // If we have keys to be notified, then don't complete the segment due to this response having no valid
               // keys.  This means a previous response came for this segment that had keys.
               if (!keysNeededToComplete.containsKey(entry.getKey())) {
                  notifyListenerCompletedSegment(entry.getKey(), false);
               } else {
                  if (log.isTraceEnabled()) {
                     log.tracef("No keys found for segment %s, but previous response had keys - so cannot complete " +
                                      "segment", entry.getKey());
                  }
               }
            }
            // Else we have to wait until we iterate over the values first
            else {
               Set prevValues = keysNeededToComplete.putIfAbsent(entry.getKey(), values);
               if (prevValues != null) {
                  // Can't use addAll due to CHS impl
                  for (K value : values) {
                     prevValues.add(value);
                  }
               }
            }
         }
      }

      protected void close(CacheException e) {
         super.close(e);
         // When the iterator is closed we have to stop all other processing and remove any references to our identifier
         iteratorDetails.remove(identifier);
      }

      @Override
      protected void finalize() throws Throwable {
         super.finalize();
         close();
      }
   }

   private class MapAction implements ParallelIterableMap.KeyValueAction> {
      final UUID identifier;
      final Set segments;
      final int batchSize;
      final Converter converter;
      final SegmentBatchHandler handler;
      final Queue> queue;

      final AtomicInteger insertionCount = new AtomicInteger();

      public MapAction(UUID identifier, Set segments, Set inDoubtSegments,
                       int batchSize, Converter converter, SegmentBatchHandler handler,
                       Queue> queue)  {
         this.identifier = identifier;
         this.segments = segments;
         this.batchSize = batchSize;
         this.converter = converter;
         this.handler = handler;
         this.queue = queue;
      }

      @Override
      public void apply(K k, CacheEntry kvInternalCacheEntry) {
         ConsistentHash hash = getCurrentHash();
         if (segments.contains(hash.getSegment(k))) {
            CacheEntry clone = (CacheEntry)kvInternalCacheEntry.clone();
            if (converter != null) {
               C value = converter.convert(k, kvInternalCacheEntry.getValue(), kvInternalCacheEntry.getMetadata());
               if (value == null && converter instanceof KeyValueFilterConverter) {
                  return;
               }
               clone.setValue(value);
            }
            queue.add(clone);
            if (insertionCount.incrementAndGet() % batchSize == 0) {
               Collection> entriesToSend = new ArrayList<>(batchSize);
               while (entriesToSend.size() != batchSize) {
                  entriesToSend.add(queue.poll());
               }

               Set emptySet = Collections.emptySet();
               // We always send back empty set for both completed and in doubt segments
               handler.handleBatch(identifier, false, emptySet, emptySet, entriesToSend);
            }
         }
      }
   }

   interface SegmentBatchHandler {
      public void handleBatch(UUID identifier, boolean complete, Set completedSegments,
                              Set inDoubtSegments, Collection> entries);
      
      public void handleException(CacheException e);
   }

   private static class SegmentFilter implements KeyFilter {
      private final ConsistentHash hash;
      private final Set segments;

      public SegmentFilter(ConsistentHash hash, Set segments) {
         this.hash = hash;
         this.segments = segments;
      }

      @Override
      public boolean accept(K key) {
         return segments.contains(hash.getSegment(key));
      }
   }

   private class SegmentChangeListener {
      private final Set changedSegments = new ConcurrentHashSet();

      public void changedSegments(Set changedSegments) {
         if (log.isTraceEnabled()) {
            log.tracef("Adding changed segments %s so iteration can properly suspect them", changedSegments);
         }
         for (Integer segment : changedSegments) {
            changedSegments.add(segment);
         }
      }
   }
}