All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.infinispan.topology.ClusterTopologyManagerImpl Maven / Gradle / Ivy

/*
 * JBoss, Home of Professional Open Source
 * Copyright 2012 Red Hat Inc. and/or its affiliates and other contributors
 * as indicated by the @author tags. All rights reserved.
 * See the copyright.txt in the distribution for a
 * full listing of individual contributors.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU Lesser General Public License, v. 2.1.
 * This program is distributed in the hope that it will be useful, but WITHOUT A
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
 * You should have received a copy of the GNU Lesser General Public License,
 * v.2.1 along with this distribution; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA  02110-1301, USA.
 */

package org.infinispan.topology;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

import org.infinispan.CacheException;
import org.infinispan.commands.ReplicableCommand;
import org.infinispan.configuration.global.GlobalConfiguration;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.distribution.ch.ConsistentHashFactory;
import org.infinispan.factories.GlobalComponentRegistry;
import org.infinispan.factories.annotations.ComponentName;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.factories.annotations.Start;
import org.infinispan.factories.annotations.Stop;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachemanagerlistener.CacheManagerNotifier;
import org.infinispan.notifications.cachemanagerlistener.annotation.Merged;
import org.infinispan.notifications.cachemanagerlistener.annotation.ViewChanged;
import org.infinispan.notifications.cachemanagerlistener.event.ViewChangedEvent;
import org.infinispan.remoting.responses.ExceptionResponse;
import org.infinispan.remoting.responses.Response;
import org.infinispan.remoting.responses.SuccessfulResponse;
import org.infinispan.remoting.rpc.ResponseMode;
import org.infinispan.remoting.transport.Address;
import org.infinispan.remoting.transport.Transport;
import org.infinispan.util.concurrent.ConcurrentMapFactory;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;

import static org.infinispan.factories.KnownComponentNames.ASYNC_TRANSPORT_EXECUTOR;

/**
 * The {@code ClusterTopologyManager} implementation.
 *
 * @author Dan Berindei
 * @since 5.2
 */
public class ClusterTopologyManagerImpl implements ClusterTopologyManager {
   private static Log log = LogFactory.getLog(ClusterTopologyManagerImpl.class);

   private Transport transport;
   private RebalancePolicy rebalancePolicy;
   private GlobalConfiguration globalConfiguration;
   private GlobalComponentRegistry gcr;
   private CacheManagerNotifier cacheManagerNotifier;
   private ExecutorService asyncTransportExecutor;
   private volatile boolean isCoordinator;
   private volatile boolean isShuttingDown;
   private volatile int viewId = -1;
   private final Object viewUpdateLock = new Object();
   private final Object viewHandlingLock = new Object();


   private final ConcurrentMap cacheStatusMap = ConcurrentMapFactory.makeConcurrentMap();
   private ClusterViewListener viewListener;

   @Inject
   public void inject(Transport transport, RebalancePolicy rebalancePolicy,
                      @ComponentName(ASYNC_TRANSPORT_EXECUTOR) ExecutorService asyncTransportExecutor,
                      GlobalConfiguration globalConfiguration, GlobalComponentRegistry gcr,
                      CacheManagerNotifier cacheManagerNotifier) {
      this.transport = transport;
      this.rebalancePolicy = rebalancePolicy;
      this.asyncTransportExecutor = asyncTransportExecutor;
      this.globalConfiguration = globalConfiguration;
      this.gcr = gcr;
      this.cacheManagerNotifier = cacheManagerNotifier;
   }

   @Start(priority = 100)
   public void start() {
      isShuttingDown = false;
      isCoordinator = transport.isCoordinator();

      viewListener = new ClusterViewListener();
      cacheManagerNotifier.addListener(viewListener);
      // The listener already missed the initial view
      handleNewView(transport.getMembers(), false, transport.getViewId());
   }

   @Stop(priority = 100)
   public void stop() {
      isShuttingDown = true;
      cacheManagerNotifier.removeListener(viewListener);

      // Stop blocking cache topology commands.
      // The synchronization also ensures that the listener has finished executing
      // so we don't get InterruptedExceptions when the notification thread pool shuts down
      synchronized (viewUpdateLock) {
         viewId = Integer.MAX_VALUE;
         viewUpdateLock.notifyAll();
      }
   }

   @Override
   public void triggerRebalance(final String cacheName) {
      asyncTransportExecutor.submit(new Callable() {
         @Override
         public Object call() throws Exception {
            try {
               startRebalance(cacheName);
               return null;
            } catch (Throwable t) {
               log.rebalanceStartError(cacheName, t);
               throw new Exception(t);
            }
         }
      });
   }

   @Override
   public CacheTopology handleJoin(String cacheName, Address joiner, CacheJoinInfo joinInfo, int viewId) throws Exception {
      waitForView(viewId);
      if (isShuttingDown) {
         log.debugf("Ignoring join request from %s for cache %s, the local cache manager is shutting down",
               joiner, cacheName);
         return null;
      }

      ClusterCacheStatus cacheStatus = initCacheStatusIfAbsent(cacheName, joinInfo);
      boolean hadEmptyConsistentHashes;
      synchronized (cacheStatus) {
         hadEmptyConsistentHashes = cacheStatus.getCacheTopology().getMembers().isEmpty();
         cacheStatus.addMember(joiner);
         if (hadEmptyConsistentHashes) {
            // This node was the first to join. We need to install the initial CH
            int newTopologyId = cacheStatus.getCacheTopology().getTopologyId() + 1;
            List
initialMembers = cacheStatus.getMembers(); ConsistentHash initialCH = joinInfo.getConsistentHashFactory().create( joinInfo.getHashFunction(), joinInfo.getNumOwners(), joinInfo.getNumSegments(), initialMembers); CacheTopology initialTopology = new CacheTopology(newTopologyId, initialCH, null); cacheStatus.updateCacheTopology(initialTopology); // Don't need to broadcast the initial CH, just return the cache topology to the joiner } else { // Do nothing. The rebalance policy will trigger a rebalance later. } } if (hadEmptyConsistentHashes) { rebalancePolicy.initCache(cacheName, cacheStatus); } else { rebalancePolicy.updateCacheStatus(cacheName, cacheStatus); } return cacheStatus.getCacheTopology(); } @Override public void handleLeave(String cacheName, Address leaver, int viewId) throws Exception { if (isShuttingDown) { log.debugf("Ignoring leave request from %s for cache %s, the local cache manager is shutting down", leaver, cacheName); return; } ClusterCacheStatus cacheStatus = cacheStatusMap.get(cacheName); if (cacheStatus == null) { // This can happen if we've just become coordinator log.tracef("Ignoring leave request from %s for cache %s because it doesn't have a cache status entry"); return; } boolean actualLeaver = cacheStatus.removeMember(leaver); if (!actualLeaver) return; onCacheMembershipChange(cacheName, cacheStatus); } @Override public void handleRebalanceCompleted(String cacheName, Address node, int topologyId, Throwable throwable, int viewId) throws Exception { if (throwable != null) { // TODO We could try to update the pending CH such that nodes reporting errors are not considered to hold any state // For now we are just logging the error and proceeding as if the rebalance was successful everywhere log.rebalanceError(cacheName, node, throwable); } log.debugf("Finished local rebalance for cache %s on node %s, topology id = %d", cacheName, node, topologyId); ClusterCacheStatus cacheStatus = cacheStatusMap.get(cacheName); if (cacheStatus == null || !cacheStatus.isRebalanceInProgress()) { throw new CacheException(String.format("Received invalid rebalance confirmation from %s " + "for cache %s, we don't have a rebalance in progress", node, cacheName)); } boolean rebalanceCompleted = cacheStatus.confirmRebalanceOnNode(node, topologyId); if (rebalanceCompleted) { endRebalance(cacheName, cacheStatus); broadcastConsistentHashUpdate(cacheName, cacheStatus); rebalancePolicy.updateCacheStatus(cacheName, cacheStatus); } } protected void handleNewView(List
ignored, boolean mergeView, int newViewId) { synchronized (viewHandlingLock) { // check to ensure this is not an older view if (newViewId <= viewId) { log.tracef("Ignoring old cluster view notification: %s", newViewId); return; } boolean becameCoordinator = !isCoordinator && transport.isCoordinator(); isCoordinator = transport.isCoordinator(); log.tracef("Received new cluster view: %s, isCoordinator = %s, becameCoordinator = %s", newViewId, isCoordinator, becameCoordinator); if ((isCoordinator && mergeView) || becameCoordinator) { try { Map> clusterCacheMap = recoverClusterStatus(newViewId); for (Map.Entry> entry : clusterCacheMap.entrySet()) { String cacheName = entry.getKey(); List topologyList = entry.getValue(); try { updateCacheStatusAfterMerge(cacheName, transport.getMembers(), topologyList); } catch (Exception e) { log.failedToRecoverCacheState(cacheName, e); } } } catch (InterruptedException e) { log.tracef("Cluster state recovery interrupted because the coordinator is shutting down"); // the CTMI has already stopped, no need to update the view id or notify waiters return; } catch (Exception e) { // TODO Retry? log.failedToRecoverClusterState(e); } } else if (isCoordinator) { try { updateClusterMembers(transport.getMembers()); } catch (Exception e) { log.errorUpdatingMembersList(e); } } // update the view id last, so join requests from other nodes wait until we recovered existing members' info synchronized (viewUpdateLock) { viewId = newViewId; viewUpdateLock.notifyAll(); } } } private ClusterCacheStatus initCacheStatusIfAbsent(String cacheName, CacheJoinInfo joinInfo) { ClusterCacheStatus cacheStatus = cacheStatusMap.get(cacheName); if (cacheStatus == null) { ClusterCacheStatus newCacheStatus = new ClusterCacheStatus(cacheName, joinInfo); cacheStatus = cacheStatusMap.putIfAbsent(cacheName, newCacheStatus); if (cacheStatus == null) { cacheStatus = newCacheStatus; } } return cacheStatus; } public void updateCacheStatusAfterMerge(String cacheName, List
clusterMembers, List partitionTopologies) throws Exception { log.tracef("Initializing rebalance policy for cache %s, pre-existing partitions are %s", cacheName, partitionTopologies); ClusterCacheStatus cacheStatus = cacheStatusMap.get(cacheName); if (partitionTopologies.isEmpty()) return; synchronized (cacheStatus) { int unionTopologyId = 0; // We only use the currentCH, we ignore any ongoing rebalance in the partitions ConsistentHash currentCHUnion = null; ConsistentHashFactory chFactory = cacheStatus.getJoinInfo().getConsistentHashFactory(); for (CacheTopology topology : partitionTopologies) { if (topology.getTopologyId() > unionTopologyId) { unionTopologyId = topology.getTopologyId(); } if (currentCHUnion == null) { currentCHUnion = topology.getCurrentCH(); } else { currentCHUnion = chFactory.union(currentCHUnion, topology.getCurrentCH()); } } // We have added each node to the cache status when we received its status response // Prune those that have left the cluster. cacheStatus.updateClusterMembers(clusterMembers); List
members = cacheStatus.getMembers(); if (members.isEmpty()) { log.tracef("Cache %s has no members left, skipping topology update", cacheName); return; } if (currentCHUnion != null) { currentCHUnion = chFactory.updateMembers(currentCHUnion, members); } // Make sure the topology id is higher than any topology id we had before in the cluster unionTopologyId += 2; CacheTopology cacheTopology = new CacheTopology(unionTopologyId, currentCHUnion, null); // End any running rebalance if (cacheStatus.isRebalanceInProgress()) { cacheStatus.endRebalance(); } cacheStatus.updateCacheTopology(cacheTopology); } // End any rebalance that was running in the other partitions broadcastConsistentHashUpdate(cacheName, cacheStatus); // Trigger another rebalance in case the CH is not balanced rebalancePolicy.updateCacheStatus(cacheName, cacheStatus); } private void broadcastConsistentHashUpdate(String cacheName, ClusterCacheStatus cacheStatus) throws Exception { CacheTopology cacheTopology = cacheStatus.getCacheTopology(); log.debugf("Updating cluster-wide consistent hash for cache %s, topology = %s", cacheName, cacheTopology); ReplicableCommand command = new CacheTopologyControlCommand(cacheName, CacheTopologyControlCommand.Type.CH_UPDATE, transport.getAddress(), cacheTopology, transport.getViewId()); executeOnClusterAsync(command, getGlobalTimeout()); } private void startRebalance(String cacheName) throws Exception { ClusterCacheStatus cacheStatus = cacheStatusMap.get(cacheName); synchronized (cacheStatus) { CacheTopology cacheTopology = cacheStatus.getCacheTopology(); if (cacheStatus.isRebalanceInProgress()) { log.tracef("Ignoring request to rebalance cache %s, there's already a rebalance in progress: %s", cacheName, cacheTopology); return; } List
newMembers = new ArrayList
(cacheStatus.getMembers()); if (newMembers.isEmpty()) { log.tracef("Ignoring request to rebalance cache %s, it doesn't have any member", cacheName); return; } log.tracef("Rebalancing consistent hash for cache %s, members are %s", cacheName, newMembers); int newTopologyId = cacheTopology.getTopologyId() + 1; ConsistentHash currentCH = cacheTopology.getCurrentCH(); if (currentCH == null) { // There was one node in the cache before, and it left after the rebalance was triggered // but before the rebalance actually started. log.tracef("Ignoring request to rebalance cache %s, it doesn't have a consistent hash", cacheName); return; } if (!newMembers.containsAll(currentCH.getMembers())) { newMembers.removeAll(currentCH.getMembers()); log.tracef("Ignoring request to rebalance cache %s, we have new leavers: %s", cacheName, newMembers); return; } ConsistentHashFactory chFactory = cacheStatus.getJoinInfo().getConsistentHashFactory(); // This update will only add the joiners to the CH, we have already checked that we don't have leavers ConsistentHash updatedMembersCH = chFactory.updateMembers(currentCH, newMembers); ConsistentHash balancedCH = chFactory.rebalance(updatedMembersCH); if (balancedCH.equals(currentCH)) { log.tracef("The balanced CH is the same as the current CH, not rebalancing"); return; } CacheTopology newTopology = new CacheTopology(newTopologyId, currentCH, balancedCH); log.tracef("Updating cache %s topology for rebalance: %s", cacheName, newTopology); newTopology.logRoutingTableInformation(); cacheStatus.startRebalance(newTopology); } rebalancePolicy.updateCacheStatus(cacheName, cacheStatus); broadcastRebalanceStart(cacheName, cacheStatus); } private void broadcastRebalanceStart(String cacheName, ClusterCacheStatus cacheStatus) throws Exception { CacheTopology cacheTopology = cacheStatus.getCacheTopology(); log.debugf("Starting cluster-wide rebalance for cache %s, topology = %s", cacheName, cacheTopology); ReplicableCommand command = new CacheTopologyControlCommand(cacheName, CacheTopologyControlCommand.Type.REBALANCE_START, transport.getAddress(), cacheTopology, transport.getViewId()); executeOnClusterAsync(command, getGlobalTimeout()); } private void endRebalance(String cacheName, ClusterCacheStatus cacheStatus) { synchronized (cacheStatus) { CacheTopology currentTopology = cacheStatus.getCacheTopology(); int currentTopologyId = currentTopology.getTopologyId(); log.debugf("Finished cluster-wide rebalance for cache %s, topology id = %d", cacheName, currentTopologyId); int newTopologyId = currentTopologyId + 1; ConsistentHash newCurrentCH = currentTopology.getPendingCH(); CacheTopology newTopology = new CacheTopology(newTopologyId, newCurrentCH, null); cacheStatus.updateCacheTopology(newTopology); cacheStatus.endRebalance(); } } private HashMap> recoverClusterStatus(int newViewId) throws Exception { log.debugf("Recovering running caches in the cluster"); ReplicableCommand command = new CacheTopologyControlCommand(null, CacheTopologyControlCommand.Type.GET_STATUS, transport.getAddress(), newViewId); Map statusResponses = executeOnClusterSync(command, getGlobalTimeout()); HashMap> clusterCacheMap = new HashMap>(); for (Map.Entry responseEntry : statusResponses.entrySet()) { Address sender = responseEntry.getKey(); Map nodeStatus = (Map) responseEntry.getValue(); for (Map.Entry statusEntry : nodeStatus.entrySet()) { String cacheName = statusEntry.getKey(); CacheJoinInfo joinInfo = (CacheJoinInfo) statusEntry.getValue()[0]; CacheTopology cacheTopology = (CacheTopology) statusEntry.getValue()[1]; List topologyList = clusterCacheMap.get(cacheName); if (topologyList == null) { // This is the first CacheJoinInfo we got for this cache, initialize its ClusterCacheStatus initCacheStatusIfAbsent(cacheName, joinInfo); topologyList = new ArrayList(); clusterCacheMap.put(cacheName, topologyList); } // The cache topology could be null if the new node sent a join request to the old coordinator // but didn't get a response back yet if (cacheTopology != null) { topologyList.add(cacheTopology); } // Add all the members of the topology that have sent responses first // If we only added the sender, we could end up with a different member order for (Address member : cacheTopology.getMembers()) { if (statusResponses.containsKey(member)) { cacheStatusMap.get(cacheName).addMember(member); } } // This node may have joined, and still not be in the current or pending CH // because the old coordinator didn't manage to start the rebalance before shutting down cacheStatusMap.get(cacheName).addMember(sender); } } return clusterCacheMap; } public void updateClusterMembers(List
newClusterMembers) throws Exception { log.tracef("Updating cluster members for all the caches. New list is %s", newClusterMembers); for (Map.Entry e : cacheStatusMap.entrySet()) { String cacheName = e.getKey(); ClusterCacheStatus cacheStatus = e.getValue(); boolean cacheMembersModified = cacheStatus.updateClusterMembers(newClusterMembers); if (cacheMembersModified) { onCacheMembershipChange(cacheName, cacheStatus); } } } private boolean onCacheMembershipChange(String cacheName, ClusterCacheStatus cacheStatus) throws Exception { boolean topologyChanged = updateTopologyAfterMembershipChange(cacheName, cacheStatus); if (!topologyChanged) return true; boolean rebalanceCompleted = cacheStatus.updateRebalanceMembersList(); if (rebalanceCompleted) { endRebalance(cacheName, cacheStatus); } // We need a consistent hash update even when rebalancing did end broadcastConsistentHashUpdate(cacheName, cacheStatus); rebalancePolicy.updateCacheStatus(cacheName, cacheStatus); return false; } /** * @return {@code true} if the topology was changed, {@code false} otherwise */ private boolean updateTopologyAfterMembershipChange(String cacheName, ClusterCacheStatus cacheStatus) { synchronized (cacheStatus) { ConsistentHashFactory consistentHashFactory = cacheStatus.getJoinInfo().getConsistentHashFactory(); int topologyId = cacheStatus.getCacheTopology().getTopologyId(); ConsistentHash currentCH = cacheStatus.getCacheTopology().getCurrentCH(); ConsistentHash pendingCH = cacheStatus.getCacheTopology().getPendingCH(); if (!cacheStatus.needConsistentHashUpdate()) { log.tracef("Cache %s members list was updated, but the cache topology doesn't need to change: %s", cacheName, cacheStatus.getCacheTopology()); return false; } List
newCurrentMembers = cacheStatus.pruneInvalidMembers(currentCH.getMembers()); if (newCurrentMembers.isEmpty()) { CacheTopology newTopology = new CacheTopology(topologyId + 1, null, null); cacheStatus.updateCacheTopology(newTopology); log.tracef("Initial topology installed for cache %s: %s", cacheName, newTopology); return false; } ConsistentHash newCurrentCH = consistentHashFactory.updateMembers(currentCH, newCurrentMembers); ConsistentHash newPendingCH = null; if (pendingCH != null) { List
newPendingMembers = cacheStatus.pruneInvalidMembers(pendingCH.getMembers()); newPendingCH = consistentHashFactory.updateMembers(pendingCH, newPendingMembers); } CacheTopology newTopology = new CacheTopology(topologyId + 1, newCurrentCH, newPendingCH); cacheStatus.updateCacheTopology(newTopology); log.tracef("Cache %s topology updated: %s", cacheName, newTopology); newTopology.logRoutingTableInformation(); return true; } } private void waitForView(int viewId) throws InterruptedException { if (this.viewId < viewId) { log.tracef("Received a cache topology command with a higher view id: %s, our view id is %s", viewId, this.viewId); } synchronized (viewUpdateLock) { while (this.viewId < viewId) { // break out of the loop after state transfer timeout expires viewUpdateLock.wait(1000); } } } private Map executeOnClusterSync(final ReplicableCommand command, final int timeout) throws Exception { // first invoke remotely Future> remoteFuture = asyncTransportExecutor.submit(new Callable>() { @Override public Map call() throws Exception { return transport.invokeRemotely(null, command, ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, true, null); } }); // invoke the command on the local node gcr.wireDependencies(command); Response localResponse; try { if (log.isTraceEnabled()) log.tracef("Attempting to execute command on self: %s", command); localResponse = (Response) command.perform(null); } catch (Throwable throwable) { throw new Exception(throwable); } if (!localResponse.isSuccessful()) { throw new CacheException("Unsuccessful local response"); } // wait for the remote commands to finish Map responseMap = remoteFuture.get(timeout, TimeUnit.MILLISECONDS); // parse the responses Map responseValues = new HashMap(transport.getMembers().size()); for (Map.Entry entry : responseMap.entrySet()) { Address address = entry.getKey(); Response response = entry.getValue(); if (!response.isSuccessful()) { Throwable cause = response instanceof ExceptionResponse ? ((ExceptionResponse) response).getException() : null; throw new CacheException("Unsuccessful response received from node " + address + ": " + response, cause); } responseValues.put(address, ((SuccessfulResponse) response).getResponseValue()); } responseValues.put(transport.getAddress(), ((SuccessfulResponse) localResponse).getResponseValue()); return responseValues; } private void executeOnClusterAsync(final ReplicableCommand command, final long timeout) throws Exception { // invoke the command on the local node asyncTransportExecutor.submit(new Runnable() { @Override public void run() { gcr.wireDependencies(command); try { if (log.isTraceEnabled()) log.tracef("Attempting to execute command on self: %s", command); command.perform(null); } catch (Throwable throwable) { // The command already logs any exception in perform() } } }); // invoke remotely transport.invokeRemotely(null, command, ResponseMode.ASYNCHRONOUS_WITH_SYNC_MARSHALLING, timeout, true, null); } private int getGlobalTimeout() { // TODO Rename setting to something like globalRpcTimeout return (int) globalConfiguration.transport().distributedSyncTimeout(); } @Listener(sync = true) public class ClusterViewListener { @SuppressWarnings("unused") @Merged @ViewChanged public void handleViewChange(final ViewChangedEvent e) { // need to recover existing caches asynchronously (in case we just became the coordinator) asyncTransportExecutor.submit(new Runnable() { public void run() { handleNewView(e.getNewMembers(), e.isMergeView(), e.getViewId()); } }); } } }