All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.infinispan.stream.impl.LocalStreamManagerImpl Maven / Gradle / Ivy

There is a newer version: 9.1.7.Final
Show newest version
package org.infinispan.stream.impl;

import org.infinispan.AdvancedCache;
import org.infinispan.Cache;
import org.infinispan.CacheSet;
import org.infinispan.commands.CommandsFactory;
import org.infinispan.commons.CacheException;
import org.infinispan.commons.equivalence.AnyEquivalence;
import org.infinispan.commons.equivalence.AnyServerEquivalence;
import org.infinispan.commons.equivalence.Equivalence;
import org.infinispan.commons.equivalence.EquivalentHashSet;
import org.infinispan.commons.util.CollectionFactory;
import org.infinispan.configuration.cache.Configuration;
import org.infinispan.container.entries.CacheEntry;
import org.infinispan.context.Flag;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.factories.ComponentRegistry;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.factories.annotations.Start;
import org.infinispan.lifecycle.ComponentStatus;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachelistener.annotation.DataRehashed;
import org.infinispan.notifications.cachelistener.event.DataRehashedEvent;
import org.infinispan.persistence.manager.PersistenceManager;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.transport.Address;
import org.infinispan.statetransfer.StateTransferManager;
import org.infinispan.topology.CacheTopology;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ForkJoinPool;
import java.util.stream.Stream;

/**
 * Local stream manager implementation that handles injection of the stream supplier, invoking the operation and
 * subsequently notifying the operation if a rehash has changed one of its segments.
 * @param  key type of underlying cache
 * @param  value type of underlying cache
 */
@Listener(observation = Listener.Observation.POST)
public class LocalStreamManagerImpl implements LocalStreamManager {
   private final static Log log = LogFactory.getLog(LocalStreamManagerImpl.class);
   private static final boolean trace = log.isTraceEnabled();

   private AdvancedCache cache;
   private ComponentRegistry registry;
   private StateTransferManager stm;
   private RpcManager rpc;
   private CommandsFactory factory;
   private boolean hasLoader;
   private Equivalence keyEquivalence;

   private Address localAddress;

   private final ConcurrentMap changeListener = CollectionFactory.makeConcurrentMap();

   class SegmentListener {
      private final Set segments;
      private final SegmentAwareOperation op;
      private final Set segmentsLost;

      SegmentListener(Set segments, SegmentAwareOperation op) {
         this.segments = new HashSet<>(segments);
         this.op = op;
         this.segmentsLost = new HashSet<>();
      }

      public void localSegments(Set localSegments) {
         segments.forEach(s -> {
            if (!localSegments.contains(s)) {
               log.tracef("Could not process segment %s", s);
               segmentsLost.add(s);
            }
         });
      }

      public void lostSegments(Set lostSegments) {
         for (Integer segment : lostSegments) {
            if (segments.contains(segment)) {
               log.tracef("Lost segment %s", segment);
               if (op.lostSegment(false)) {
                  if (segmentsLost.add(segment) && segmentsLost.size() == segments.size()) {
                     log.tracef("All segments %s are now lost", segments);
                     op.lostSegment(true);
                  }
               }
            }
         }
      }
   }

   @Inject
   public void inject(Cache cache, ComponentRegistry registry, StateTransferManager stm, RpcManager rpc,
           Configuration configuration, CommandsFactory factory) {
      this.cache = cache.getAdvancedCache().withFlags(Flag.CACHE_MODE_LOCAL);
      this.registry = registry;
      this.stm = stm;
      this.rpc = rpc;
      this.factory = factory;
      this.hasLoader = configuration.persistence().usingStores();
      this.keyEquivalence = configuration.dataContainer().keyEquivalence();
   }

   @Start
   public void start() {
      localAddress = rpc.getAddress();
      cache.addListener(this);
   }

   /**
    * We need to listen to data rehash events in case if data moves while we are iterating over it.
    * If a rehash occurs causing this node to lose a segment and there is something iterating over the stream looking
    * for values of that segment, we can't guarantee that the data has all been seen correctly, so we must therefore
    * suspect that node by sending it back to the owner.
    * @param event The data rehash event
    */
   @DataRehashed
   public void dataRehashed(DataRehashedEvent event) {
      ConsistentHash startHash = event.getConsistentHashAtStart();
      ConsistentHash endHash = event.getConsistentHashAtEnd();
      boolean trace = log.isTraceEnabled();
      if (startHash != null && endHash != null) {
         log.tracef("Data rehash occurred startHash: %s and endHash: %s with new topology %s and was pre %s", startHash, endHash,
                 event.getNewTopologyId(), event.isPre());

         if (!changeListener.isEmpty()) {
            if (trace) {
               log.tracef("Previous segments %s ", startHash.getSegmentsForOwner(localAddress));
               log.tracef("After segments %s ", endHash.getSegmentsForOwner(localAddress));
            }
            // we don't care about newly added segments, since that means our run wouldn't include them anyways
            Set beforeSegments = new HashSet<>(startHash.getSegmentsForOwner(localAddress));
            // Now any that were there before but aren't there now should be added - we don't care about new segments
            // since our current request shouldn't be working on it - it will have to retrieve it later
            beforeSegments.removeAll(endHash.getSegmentsForOwner(localAddress));
            if (!beforeSegments.isEmpty()) {
               // We have to make sure all current listeners get the newest hashes updated.  This has to occur for
               // new nodes and nodes leaving as the hash segments will change in both cases.
               for (Map.Entry entry : changeListener.entrySet()) {
                  if (trace) {
                     log.tracef("Notifying %s through SegmentChangeListener", entry.getKey());
                  }
                  entry.getValue().lostSegments(beforeSegments);
               }
            } else if (trace) {
               log.tracef("No segments have been removed from data rehash, no notification required");
            }
         } else {
            log.tracef("No change listeners present!");
         }
      }
   }

   private AdvancedCache getCacheRespectingLoader(boolean includeLoader) {
      // We only need to "skip" the loader if there is one and we were told to skip it
      if (hasLoader && !includeLoader) {
         return cache.getAdvancedCache().withFlags(Flag.SKIP_CACHE_LOAD);
      }
      return cache;
   }

   private Stream> getStream(CacheSet> cacheEntrySet, boolean parallelStream,
           Set segments, Set keysToInclude, Set keysToExclude) {
      Stream> stream = (parallelStream ? cacheEntrySet.parallelStream() : cacheEntrySet.stream())
              .filterKeys(keysToInclude).filterKeySegments(segments);
      if (!keysToExclude.isEmpty()) {
         // AnyEquivalence is how HashSet works so we don't need to worry then
         if (!(keyEquivalence instanceof AnyEquivalence)) {
            // We have to add all the keys into an equivalent hash set to make sure we are excluding them properly
            Set equivKeys = new EquivalentHashSet<>(keyEquivalence);
            keysToExclude.forEach(equivKeys::add);
            return stream.filter(e -> !equivKeys.contains(e.getKey()));
         } else {
            return stream.filter(e -> !keysToExclude.contains(e.getKey()));
         }
      }
      return stream;
   }

   private Stream> getRehashStream(CacheSet> cacheEntrySet, Object requestId,
           SegmentListener listener, boolean parallelStream, Set segments, Set keysToInclude,
           Set keysToExclude) {
      CacheTopology topology = stm.getCacheTopology();
      log.tracef("Topology for supplier is %s for id %s", topology, requestId);
      ConsistentHash readCH = topology.getCurrentCH();
      ConsistentHash pendingCH = topology.getPendingCH();
      if (pendingCH != null) {
         Set lostSegments = new HashSet<>();
         Iterator iterator = segments.iterator();
         while (iterator.hasNext()) {
            Integer segment = iterator.next();
            // If the segment is not owned by both CHs we can't use it during rehash
            if (!pendingCH.locateOwnersForSegment(segment).contains(localAddress)
                    || !readCH.locateOwnersForSegment(segment).contains(localAddress)) {
               iterator.remove();
               lostSegments.add(segment);
            }
         }
         if (!lostSegments.isEmpty()) {
            log.tracef("Lost segments %s during rehash for id %s", lostSegments, requestId);
            listener.lostSegments(lostSegments);
         } else {
            log.tracef("Currently in the middle of a rehash for id %s", requestId);
         }
      } else {
         Set ourSegments = readCH.getSegmentsForOwner(localAddress);
         if (segments.retainAll(ourSegments)) {
            log.tracef("We found to be missing some segments requested for id %s", requestId);
            listener.localSegments(ourSegments);
         } else {
            log.tracef("Hash %s for id %s", readCH, requestId);
         }
      }

      return getStream(cacheEntrySet, parallelStream, segments, keysToInclude, keysToExclude);
   }

   @Override
   public  void streamOperation(Object requestId, Address origin, boolean parallelStream, Set segments,
           Set keysToInclude, Set keysToExclude, boolean includeLoader, TerminalOperation operation) {
      log.tracef("Received operation request for id %s from %s for segments %s", requestId, origin, segments);
      CacheSet> cacheEntrySet = getCacheRespectingLoader(includeLoader).cacheEntrySet();
      operation.setSupplier(() -> getStream(cacheEntrySet, parallelStream, segments, keysToInclude, keysToExclude));
      operation.handleInjection(registry);
      R value = operation.performOperation();
      rpc.invokeRemotely(Collections.singleton(origin), factory.buildStreamResponseCommand(requestId, true,
              Collections.emptySet(), value), rpc.getDefaultRpcOptions(true));
   }

   @Override
   public  void streamOperationRehashAware(Object requestId, Address origin, boolean parallelStream,
           Set segments, Set keysToInclude, Set keysToExclude, boolean includeLoader,
           TerminalOperation operation) {
      log.tracef("Received rehash aware operation request for id %s from %s for segments %s", requestId, origin, segments);
      CacheSet> cacheEntrySet = getCacheRespectingLoader(includeLoader).cacheEntrySet();
      SegmentListener listener = new SegmentListener(segments, operation);
      R value;

      operation.handleInjection(registry);
      // We currently only allow 1 request per id (we may change this later)
      changeListener.put(requestId, listener);
      log.tracef("Registered change listener for %s", requestId);
      try {
         operation.setSupplier(() -> getRehashStream(cacheEntrySet, requestId, listener, parallelStream, segments,
                 keysToInclude, keysToExclude));
         value = operation.performOperation();
         log.tracef("Request %s completed for segments %s with %s suspected segments", requestId, segments,
                 listener.segmentsLost);
      } finally {
         changeListener.remove(requestId);
         log.tracef("UnRegistered change listener for %s", requestId);
      }
      if (cache.getStatus() != ComponentStatus.RUNNING) {
         if (trace) {
            log.tracef("Cache status is no longer running, all segments are now suspect for %s", requestId);
         }
         listener.segmentsLost.addAll(segments);
         value = null;
      }

      log.tracef("Sending response for %s", requestId);
      rpc.invokeRemotely(Collections.singleton(origin), factory.buildStreamResponseCommand(requestId, true,
              listener.segmentsLost, value), rpc.getDefaultRpcOptions(true));
      log.tracef("Sent response for %s", requestId);
   }

   @Override
   public  void streamOperation(Object requestId, Address origin, boolean parallelStream, Set segments,
           Set keysToInclude, Set keysToExclude, boolean includeLoader,
           KeyTrackingTerminalOperation operation) {
      log.tracef("Received key aware operation request for id %s from %s for segments %s", requestId, origin, segments);
      CacheSet> cacheEntrySet = getCacheRespectingLoader(includeLoader).cacheEntrySet();
      operation.setSupplier(() -> getStream(cacheEntrySet, parallelStream, segments, keysToInclude, keysToExclude));
      operation.handleInjection(registry);
      Collection value = operation.performOperation(new NonRehashIntermediateCollector<>(origin, requestId,
              parallelStream));
      rpc.invokeRemotely(Collections.singleton(origin), factory.buildStreamResponseCommand(requestId, true,
              Collections.emptySet(), value), rpc.getDefaultRpcOptions(true));
   }

   @Override
   public  void streamOperationRehashAware(Object requestId, Address origin, boolean parallelStream,
           Set segments, Set keysToInclude, Set keysToExclude, boolean includeLoader,
           KeyTrackingTerminalOperation operation) {
      log.tracef("Received key rehash aware operation request for id %s from %s for segments %s", requestId, origin, segments);
      CacheSet> cacheEntrySet = getCacheRespectingLoader(includeLoader).cacheEntrySet();
      SegmentListener listener = new SegmentListener(segments, operation);
      Collection> results;

      operation.handleInjection(registry);
      // We currently only allow 1 request per id (we may change this later)
      changeListener.put(requestId, listener);
      if (trace) log.tracef("Registered change listener for %s", requestId);
      try {
         operation.setSupplier(() -> getRehashStream(cacheEntrySet, requestId, listener, parallelStream, segments,
                 keysToInclude, keysToExclude));
         results = operation.performOperationRehashAware(new NonRehashIntermediateCollector<>(origin, requestId,
                 parallelStream));
         // TODO: need to remove the full trace later
         if (trace) log.tracef("Request %s completed segments %s with %s suspected segments", requestId, segments,
                 listener.segmentsLost);
      } finally {
         changeListener.remove(requestId);
         if (trace) log.tracef("UnRegistered change listener for %s", requestId);
      }
      if (cache.getStatus() != ComponentStatus.RUNNING) {
         if (trace) {
            log.tracef("Cache status is no longer running, all segments are now suspect for %s", requestId);
         }
         listener.segmentsLost.addAll(segments);
         results = null;
      }

      rpc.invokeRemotely(Collections.singleton(origin), factory.buildStreamResponseCommand(requestId, true,
              listener.segmentsLost, results), rpc.getDefaultRpcOptions(true));
   }

   class NonRehashIntermediateCollector implements KeyTrackingTerminalOperation.IntermediateCollector {
      private final Address origin;
      private final Object requestId;
      private final boolean useManagedBlocker;

      NonRehashIntermediateCollector(Address origin, Object requestId, boolean useManagedBlocker) {
         this.origin = origin;
         this.requestId = requestId;
         this.useManagedBlocker = useManagedBlocker;
      }

      @Override
      public void sendDataResonse(R response) {
         // If we know we were in a parallel stream we should use a managed blocker to not consume core fork join
         // threads if applicable.
         if (useManagedBlocker) {
            try {
               // We use a managed blocker just in case if this invoked in the common thread pool
               ForkJoinPool.managedBlock(new ResponseBlocker(response));
            } catch (InterruptedException e) {
               throw new CacheException(e);
            }
         } else {
            rpc.invokeRemotely(Collections.singleton(origin), new StreamResponseCommand<>(cache.getName(), localAddress,
                    requestId, false, response), rpc.getDefaultRpcOptions(true));
         }
      }

      class ResponseBlocker implements ForkJoinPool.ManagedBlocker {
         private final R response;
         private boolean completed = false;

         ResponseBlocker(R response) {
            this.response = response;
         }

         @Override
         public boolean block() throws InterruptedException {
            if (!completed) {
               // This way we don't send more than 1 response to the originating node but still inside managed blocker
               // so we don't consume a thread
               synchronized (NonRehashIntermediateCollector.this) {
                  rpc.invokeRemotely(Collections.singleton(origin), new StreamResponseCommand<>(cache.getName(), localAddress,
                          requestId, false, response), rpc.getDefaultRpcOptions(true));
               }
            }
            completed = true;
            return completed;
         }

         @Override
         public boolean isReleasable() {
            return completed;
         }
      }
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy