All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.internal.partition.impl.MigrationManager Maven / Gradle / Ivy

/*
 * Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.internal.partition.impl;

import com.hazelcast.cluster.ClusterState;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.core.MigrationEvent;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.InternalPartition;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.partition.MigrationInfo;
import com.hazelcast.internal.partition.MigrationInfo.MigrationStatus;
import com.hazelcast.internal.partition.PartitionRuntimeState;
import com.hazelcast.internal.partition.PartitionStateVersionMismatchException;
import com.hazelcast.internal.partition.impl.InternalMigrationListener.MigrationParticipant;
import com.hazelcast.internal.partition.impl.MigrationPlanner.MigrationDecisionCallback;
import com.hazelcast.internal.partition.operation.FinalizeMigrationOperation;
import com.hazelcast.internal.partition.operation.MigrationCommitOperation;
import com.hazelcast.internal.partition.operation.MigrationRequestOperation;
import com.hazelcast.internal.partition.operation.PromotionCommitOperation;
import com.hazelcast.internal.partition.operation.ShutdownResponseOperation;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.spi.ExecutionService;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.partition.MigrationEndpoint;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.util.Clock;
import com.hazelcast.util.MutableInteger;
import com.hazelcast.util.Preconditions;
import com.hazelcast.util.scheduler.CoalescingDelayedTrigger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.logging.Level;

import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.spi.partition.IPartitionService.SERVICE_NAME;

/**
 *
 * Maintains migration system state and manages migration operations performed within the cluster.
 *
 */
@SuppressWarnings({"checkstyle:classdataabstractioncoupling", "checkstyle:methodcount"})
public class MigrationManager {

    private static final boolean ASSERTION_ENABLED = MigrationManager.class.desiredAssertionStatus();
    private static final int PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE = 2;
    private static final int MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE = 3;
    private static final String INVALID_UUID = "";

    final long partitionMigrationInterval;

    private final Node node;
    private final NodeEngineImpl nodeEngine;
    private final InternalPartitionServiceImpl partitionService;
    private final ILogger logger;
    private final PartitionStateManager partitionStateManager;

    private final MigrationQueue migrationQueue = new MigrationQueue();

    private final MigrationThread migrationThread;

    private final AtomicBoolean migrationAllowed = new AtomicBoolean(true);

    @Probe(name = "lastRepartitionTime")
    private final AtomicLong lastRepartitionTime = new AtomicLong();

    private final long partitionMigrationTimeout;

    private final CoalescingDelayedTrigger delayedResumeMigrationTrigger;

    private final Set
shutdownRequestedAddresses = new HashSet
(); // updates will be done under lock, but reads will be multithreaded. private volatile MigrationInfo activeMigrationInfo; // both reads and updates will be done under lock! private final LinkedHashSet completedMigrations = new LinkedHashSet(); @Probe private final AtomicLong completedMigrationCounter = new AtomicLong(); private volatile InternalMigrationListener internalMigrationListener = new InternalMigrationListener.NopInternalMigrationListener(); private final Lock partitionServiceLock; private final MigrationPlanner migrationPlanner; MigrationManager(Node node, InternalPartitionServiceImpl service, Lock partitionServiceLock) { this.node = node; this.nodeEngine = node.nodeEngine; this.partitionService = service; this.logger = node.getLogger(getClass()); this.partitionServiceLock = partitionServiceLock; migrationPlanner = new MigrationPlanner(node.getLogger(MigrationPlanner.class)); HazelcastProperties properties = node.getProperties(); long intervalMillis = properties.getMillis(GroupProperty.PARTITION_MIGRATION_INTERVAL); partitionMigrationInterval = (intervalMillis > 0 ? intervalMillis : 0); partitionMigrationTimeout = properties.getMillis(GroupProperty.PARTITION_MIGRATION_TIMEOUT); partitionStateManager = partitionService.getPartitionStateManager(); ILogger migrationThreadLogger = node.getLogger(MigrationThread.class); migrationThread = new MigrationThread(this, node.getHazelcastThreadGroup(), migrationThreadLogger, migrationQueue); long migrationPauseDelayMs = TimeUnit.SECONDS.toMillis(MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE); ExecutionService executionService = nodeEngine.getExecutionService(); delayedResumeMigrationTrigger = new CoalescingDelayedTrigger( executionService, migrationPauseDelayMs, 2 * migrationPauseDelayMs, new Runnable() { @Override public void run() { resumeMigration(); } }); } @Probe(name = "migrationActive") private int migrationActiveProbe() { return migrationAllowed.get() ? 1 : 0; } void pauseMigration() { migrationAllowed.set(false); } void resumeMigration() { migrationAllowed.set(true); } private void resumeMigrationEventually() { delayedResumeMigrationTrigger.executeWithDelay(); } boolean isMigrationAllowed() { return migrationAllowed.get(); } private void finalizeMigration(MigrationInfo migrationInfo) { try { Address thisAddress = node.getThisAddress(); int partitionId = migrationInfo.getPartitionId(); boolean source = thisAddress.equals(migrationInfo.getSource()); boolean destination = thisAddress.equals(migrationInfo.getDestination()); assert migrationInfo.getStatus() == MigrationStatus.SUCCESS || migrationInfo.getStatus() == MigrationStatus.FAILED : "Invalid migration: " + migrationInfo; if (source || destination) { boolean success = migrationInfo.getStatus() == MigrationStatus.SUCCESS; MigrationParticipant participant = source ? MigrationParticipant.SOURCE : MigrationParticipant.DESTINATION; if (success) { internalMigrationListener.onMigrationCommit(participant, migrationInfo); } else { internalMigrationListener.onMigrationRollback(participant, migrationInfo); } MigrationEndpoint endpoint = source ? MigrationEndpoint.SOURCE : MigrationEndpoint.DESTINATION; FinalizeMigrationOperation op = new FinalizeMigrationOperation(migrationInfo, endpoint, success); op.setPartitionId(partitionId).setNodeEngine(nodeEngine).setValidateTarget(false) .setService(partitionService); nodeEngine.getOperationService().execute(op); removeActiveMigration(partitionId); } else { final Address partitionOwner = partitionStateManager.getPartitionImpl(partitionId).getOwnerOrNull(); if (node.getThisAddress().equals(partitionOwner)) { removeActiveMigration(partitionId); partitionStateManager.clearMigratingFlag(partitionId); } else { logger.severe("Failed to finalize migration because this member " + thisAddress + " is not a participant of the migration: " + migrationInfo); } } } catch (Exception e) { logger.warning(e); } finally { migrationInfo.doneProcessing(); } } public MigrationInfo setActiveMigration(MigrationInfo migrationInfo) { partitionServiceLock.lock(); try { if (activeMigrationInfo == null) { activeMigrationInfo = migrationInfo; return null; } if (logger.isFineEnabled()) { logger.fine("Active migration is not set: " + migrationInfo + ". Existing active migration: " + activeMigrationInfo + "\n"); } return activeMigrationInfo; } finally { partitionServiceLock.unlock(); } } MigrationInfo getActiveMigration() { return activeMigrationInfo; } private boolean removeActiveMigration(int partitionId) { partitionServiceLock.lock(); try { if (activeMigrationInfo != null) { if (activeMigrationInfo.getPartitionId() == partitionId) { activeMigrationInfo = null; return true; } if (logger.isFineEnabled()) { logger.fine("Active migration is not removed, because it has different partitionId! " + "partitionId=" + partitionId + ", active migration=" + activeMigrationInfo); } } } finally { partitionServiceLock.unlock(); } return false; } void scheduleActiveMigrationFinalization(final MigrationInfo migrationInfo) { partitionServiceLock.lock(); try { final MigrationInfo activeMigrationInfo = this.activeMigrationInfo; if (activeMigrationInfo != null && migrationInfo.equals(activeMigrationInfo)) { if (activeMigrationInfo.startProcessing()) { activeMigrationInfo.setStatus(migrationInfo.getStatus()); finalizeMigration(activeMigrationInfo); } else { logger.info("Scheduling finalization of " + migrationInfo + ", because migration process is currently running."); nodeEngine.getExecutionService().schedule(new Runnable() { @Override public void run() { scheduleActiveMigrationFinalization(migrationInfo); } }, 3, TimeUnit.SECONDS); } return; } if (migrationInfo.getSourceCurrentReplicaIndex() > 0 && node.getThisAddress().equals(migrationInfo.getSource())) { // OLD BACKUP finalizeMigration(migrationInfo); return; } } finally { partitionServiceLock.unlock(); } } private boolean commitMigrationToDestination(Address destination, MigrationInfo migration) { assert migration != null : "No migrations to commit! destination=" + destination; if (node.getThisAddress().equals(destination)) { if (logger.isFinestEnabled()) { logger.finest("Shortcutting migration commit, since destination is master. -> " + migration); } return true; } MemberImpl member = node.getClusterService().getMember(destination); if (member == null) { logger.warning("Destination " + destination + " is not member anymore"); return false; } try { if (logger.isFinestEnabled()) { logger.finest("Sending commit operation to " + destination + " for " + migration); } PartitionRuntimeState partitionState = partitionService.createMigrationCommitPartitionState(migration); String destinationUuid = member.getUuid(); MigrationCommitOperation operation = new MigrationCommitOperation(partitionState, destinationUuid); Future future = nodeEngine.getOperationService() .createInvocationBuilder(SERVICE_NAME, operation, destination) .setTryCount(Integer.MAX_VALUE) .setCallTimeout(Long.MAX_VALUE).invoke(); boolean result = future.get(); if (logger.isFinestEnabled()) { logger.finest("Migration commit result " + result + " from " + destination + " for " + migration); } return result; } catch (Throwable t) { logMigrationCommitFailure(destination, migration, t); } return false; } private void logMigrationCommitFailure(Address destination, MigrationInfo migration, Throwable t) { boolean memberLeft = t instanceof MemberLeftException || t.getCause() instanceof TargetNotMemberException || t.getCause() instanceof HazelcastInstanceNotActiveException; if (memberLeft) { if (node.getThisAddress().equals(destination)) { logger.fine("Migration commit failed for " + migration + " since this node is shutting down."); return; } logger.warning("Migration commit failed for " + migration + " since destination " + destination + " left the cluster"); } else { logger.severe("Migration commit to " + destination + " failed for " + migration, t); } } boolean addCompletedMigration(MigrationInfo migrationInfo) { if (migrationInfo.getStatus() != MigrationStatus.SUCCESS && migrationInfo.getStatus() != MigrationStatus.FAILED) { throw new IllegalArgumentException("Migration doesn't seem completed: " + migrationInfo); } partitionServiceLock.lock(); try { boolean added = completedMigrations.add(migrationInfo); if (added) { completedMigrationCounter.incrementAndGet(); } return added; } finally { partitionServiceLock.unlock(); } } void retainCompletedMigrations(Collection migrations) { partitionServiceLock.lock(); try { completedMigrations.retainAll(migrations); } finally { partitionServiceLock.unlock(); } } private void evictCompletedMigrations(MigrationInfo currentMigration) { partitionServiceLock.lock(); try { assert completedMigrations.contains(currentMigration) : currentMigration + " to evict is not in completed migrations"; Iterator iter = completedMigrations.iterator(); while (iter.hasNext()) { MigrationInfo migration = iter.next(); iter.remove(); // evict completed migrations including current migration if (migration.equals(currentMigration)) { return; } } } finally { partitionServiceLock.unlock(); } } void triggerControlTask() { migrationQueue.clear(); if (!node.joined()) { logger.fine("Node is not joined, will not trigger ControlTask"); return; } migrationQueue.add(new ControlTask()); if (logger.isFinestEnabled()) { logger.finest("Migration queue is cleared and control task is scheduled"); } } InternalMigrationListener getInternalMigrationListener() { return internalMigrationListener; } void setInternalMigrationListener(InternalMigrationListener listener) { Preconditions.checkNotNull(listener); internalMigrationListener = listener; } void resetInternalMigrationListener() { internalMigrationListener = new InternalMigrationListener.NopInternalMigrationListener(); } void onShutdownRequest(Address address) { if (!partitionStateManager.isInitialized()) { sendShutdownOperation(address); return; } ClusterState clusterState = node.getClusterService().getClusterState(); if (clusterState == ClusterState.FROZEN || clusterState == ClusterState.PASSIVE) { sendShutdownOperation(address); return; } if (shutdownRequestedAddresses.add(address)) { logger.info("Shutdown request of " + address + " is handled"); triggerControlTask(); } } void onMemberRemove(MemberImpl member) { Address deadAddress = member.getAddress(); shutdownRequestedAddresses.remove(deadAddress); final MigrationInfo activeMigration = activeMigrationInfo; if (activeMigration != null) { if (deadAddress.equals(activeMigration.getSource()) || deadAddress.equals(activeMigration.getDestination())) { activeMigration.setStatus(MigrationStatus.INVALID); } } } void schedule(MigrationRunnable runnable) { migrationQueue.add(runnable); } List getCompletedMigrationsCopy() { partitionServiceLock.lock(); try { return new ArrayList(completedMigrations); } finally { partitionServiceLock.unlock(); } } boolean hasOnGoingMigration() { return activeMigrationInfo != null || migrationQueue.hasMigrationTasks(); } int getMigrationQueueSize() { return migrationQueue.migrationTaskCount(); } void reset() { migrationQueue.clear(); activeMigrationInfo = null; completedMigrations.clear(); } void start() { migrationThread.start(); } void stop() { migrationThread.stopNow(); } void scheduleMigration(MigrationInfo migrationInfo) { migrationQueue.add(new MigrateTask(migrationInfo)); } void applyMigration(InternalPartitionImpl partition, MigrationInfo migrationInfo) { final Address[] addresses = Arrays.copyOf(partition.getReplicaAddresses(), InternalPartition.MAX_REPLICA_COUNT); if (migrationInfo.getSourceCurrentReplicaIndex() > -1) { addresses[migrationInfo.getSourceCurrentReplicaIndex()] = null; } if (migrationInfo.getDestinationCurrentReplicaIndex() > -1) { addresses[migrationInfo.getDestinationCurrentReplicaIndex()] = null; } addresses[migrationInfo.getDestinationNewReplicaIndex()] = migrationInfo.getDestination(); if (migrationInfo.getSourceNewReplicaIndex() > -1) { addresses[migrationInfo.getSourceNewReplicaIndex()] = migrationInfo.getSource(); } partition.setReplicaAddresses(addresses); } Set
getShutdownRequestedAddresses() { return shutdownRequestedAddresses; } private void sendShutdownOperation(Address address) { if (node.getThisAddress().equals(address)) { assert !node.isRunning() : "Node state: " + node.getState(); partitionService.onShutdownResponse(); } else { nodeEngine.getOperationService().send(new ShutdownResponseOperation(), address); } } MigrationRunnable getActiveTask() { return migrationThread.getActiveTask(); } private String getMemberUuid(Address address) { MemberImpl member = node.getClusterService().getMember(address); return member != null ? member.getUuid() : INVALID_UUID; } private class RepartitioningTask implements MigrationRunnable { @Override public void run() { if (!node.isMaster()) { return; } partitionServiceLock.lock(); try { Address[][] newState = repartition(); if (newState == null) { return; } lastRepartitionTime.set(Clock.currentTimeMillis()); processNewPartitionState(newState); if (ASSERTION_ENABLED) { migrationQueue.add(new AssertPartitionTableTask(partitionService.getMaxAllowedBackupCount())); } migrationQueue.add(new ProcessShutdownRequestsTask()); partitionService.syncPartitionRuntimeState(); } finally { partitionServiceLock.unlock(); } } private Address[][] repartition() { if (!isAllowed()) { return null; } Address[][] newState = partitionStateManager.repartition(shutdownRequestedAddresses); if (newState == null) { migrationQueue.add(new ProcessShutdownRequestsTask()); return null; } if (!isAllowed()) { return null; } return newState; } private void processNewPartitionState(Address[][] newState) { final MutableInteger lostCount = new MutableInteger(); final MutableInteger migrationCount = new MutableInteger(); final List> migrations = new ArrayList>(newState.length); for (int partitionId = 0; partitionId < newState.length; partitionId++) { InternalPartitionImpl currentPartition = partitionStateManager.getPartitionImpl(partitionId); Address[] currentReplicas = currentPartition.getReplicaAddresses(); Address[] newReplicas = newState[partitionId]; MigrationCollector migrationCollector = new MigrationCollector(currentPartition, migrationCount, lostCount); if (logger.isFinestEnabled()) { logger.finest("Planning migrations for partitionId=" + partitionId + ". Current replicas: " + Arrays.toString(currentReplicas) + ", New replicas: " + Arrays.toString(newReplicas)); } migrationPlanner.planMigrations(currentReplicas, newReplicas, migrationCollector); migrationPlanner.prioritizeCopiesAndShiftUps(migrationCollector.migrations); migrations.add(migrationCollector.migrations); } scheduleMigrations(migrations); logMigrationStatistics(migrationCount.value, lostCount.value); } private void scheduleMigrations(List> migrations) { boolean migrationScheduled; do { migrationScheduled = false; for (Queue queue : migrations) { MigrationInfo migration = queue.poll(); if (migration != null) { migrationScheduled = true; scheduleMigration(migration); } } } while (migrationScheduled); } private void logMigrationStatistics(int migrationCount, int lostCount) { if (lostCount > 0) { logger.warning("Assigning new owners for " + lostCount + " LOST partitions!"); } if (migrationCount > 0) { logger.info("Re-partitioning cluster data... Migration queue size: " + migrationCount); } else { logger.info("Partition balance is ok, no need to re-partition cluster data... "); } } private void assignNewPartitionOwner(int partitionId, InternalPartitionImpl currentPartition, Address newOwner) { String destinationUuid = getMemberUuid(newOwner); MigrationInfo migrationInfo = new MigrationInfo(partitionId, null, null, newOwner, destinationUuid, -1, -1, -1, 0); PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager(); partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.STARTED); currentPartition.setReplicaAddress(0, newOwner); partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.COMPLETED); } private boolean isAllowed() { boolean migrationAllowed = isClusterActiveAndMigrationAllowed(); boolean hasMigrationTasks = migrationQueue.migrationTaskCount() > 1; if (migrationAllowed && !hasMigrationTasks) { return true; } triggerControlTask(); return false; } private boolean isClusterActiveAndMigrationAllowed() { if (isMigrationAllowed()) { ClusterState clusterState = node.getClusterService().getClusterState(); return clusterState == ClusterState.ACTIVE; } return false; } private class MigrationCollector implements MigrationDecisionCallback { private final int partitionId; private final InternalPartitionImpl partition; private final MutableInteger migrationCount; private final MutableInteger lostCount; private final LinkedList migrations = new LinkedList(); MigrationCollector(InternalPartitionImpl partition, MutableInteger migrationCount, MutableInteger lostCount) { partitionId = partition.getPartitionId(); this.partition = partition; this.migrationCount = migrationCount; this.lostCount = lostCount; } @Override public void migrate(Address source, int sourceCurrentReplicaIndex, int sourceNewReplicaIndex, Address destination, int destinationCurrentReplicaIndex, int destinationNewReplicaIndex) { if (logger.isFineEnabled()) { logger.fine("Planned migration -> partitionId=" + partitionId + ", source=" + source + ", sourceCurrentReplicaIndex=" + sourceCurrentReplicaIndex + ", sourceNewReplicaIndex=" + sourceNewReplicaIndex + ", destination=" + destination + ", destinationCurrentReplicaIndex=" + destinationCurrentReplicaIndex + ", destinationNewReplicaIndex=" + destinationNewReplicaIndex); } if (source == null && destinationCurrentReplicaIndex == -1 && destinationNewReplicaIndex == 0) { assert destination != null : "partitionId=" + partitionId + " destination is null"; assert sourceCurrentReplicaIndex == -1 : "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex; assert sourceNewReplicaIndex == -1 : "partitionId=" + partitionId + " invalid index: " + sourceNewReplicaIndex; lostCount.value++; assignNewPartitionOwner(partitionId, partition, destination); } else if (destination == null && sourceNewReplicaIndex == -1) { assert source != null : "partitionId=" + partitionId + " source is null"; assert sourceCurrentReplicaIndex != -1 : "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex; assert sourceCurrentReplicaIndex != 0 : "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex; final Address currentSource = partition.getReplicaAddress(sourceCurrentReplicaIndex); assert source.equals(currentSource) : "partitionId=" + partitionId + " current source=" + source + " is different than expected source=" + source; partition.setReplicaAddress(sourceCurrentReplicaIndex, null); } else { String sourceUuid = getMemberUuid(source); String destinationUuid = getMemberUuid(destination); MigrationInfo migration = new MigrationInfo(partitionId, source, sourceUuid, destination, destinationUuid, sourceCurrentReplicaIndex, sourceNewReplicaIndex, destinationCurrentReplicaIndex, destinationNewReplicaIndex); migrationCount.value++; migrations.add(migration); } } } } @SuppressWarnings({"checkstyle:npathcomplexity"}) private final class AssertPartitionTableTask implements MigrationRunnable { final int maxBackupCount; private AssertPartitionTableTask(int maxBackupCount) { this.maxBackupCount = maxBackupCount; } @Override public void run() { if (!ASSERTION_ENABLED) { return; } if (!node.isMaster()) { return; } partitionServiceLock.lock(); try { if (!partitionStateManager.isInitialized()) { logger.info("Skipping partition table assertions since partition table state is reset"); return; } final InternalPartition[] partitions = partitionStateManager.getPartitions(); final Set
replicas = new HashSet
(); for (InternalPartition partition : partitions) { replicas.clear(); for (int index = 0; index < InternalPartition.MAX_REPLICA_COUNT; index++) { final Address address = partition.getReplicaAddress(index); if (index <= maxBackupCount) { if (shutdownRequestedAddresses.isEmpty()) { assert address != null : "Repartitioning problem, missing replica! " + "Current replica: " + index + ", Max backups: " + maxBackupCount + " -> " + partition; } } else { assert address == null : "Repartitioning problem, leaking replica! " + "Current replica: " + index + ", Max backups: " + maxBackupCount + " -> " + partition; } if (address != null) { assert replicas.add(address) : "Duplicate address in " + partition; } } } } finally { partitionServiceLock.unlock(); } } } class MigrateTask implements MigrationRunnable { final MigrationInfo migrationInfo; MigrateTask(MigrationInfo migrationInfo) { this.migrationInfo = migrationInfo; migrationInfo.setMaster(node.getThisAddress()); } @Override public void run() { if (!node.isMaster()) { return; } if (migrationInfo.getSource() == null && migrationInfo.getDestinationCurrentReplicaIndex() > 0 && migrationInfo.getDestinationNewReplicaIndex() == 0) { throw new AssertionError("Promotion migrations should be handled by " + RepairPartitionTableTask.class.getSimpleName() + "! -> " + migrationInfo); } try { MemberImpl partitionOwner = checkMigrationParticipantsAndGetPartitionOwner(); if (partitionOwner == null) { return; } beforeMigration(); Boolean result = executeMigrateOperation(partitionOwner); processMigrationResult(result); } catch (Throwable t) { final Level level = migrationInfo.isValid() ? Level.WARNING : Level.FINE; logger.log(level, "Error [" + t.getClass() + ": " + t.getMessage() + "] during " + migrationInfo); logger.finest(t); migrationOperationFailed(); } } private void beforeMigration() { internalMigrationListener.onMigrationStart(MigrationParticipant.MASTER, migrationInfo); partitionService.getPartitionEventManager() .sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.STARTED); if (logger.isFineEnabled()) { logger.fine("Starting Migration: " + migrationInfo); } } private MemberImpl checkMigrationParticipantsAndGetPartitionOwner() { MemberImpl partitionOwner = getPartitionOwner(); if (partitionOwner == null) { logger.fine("Partition owner is null. Ignoring " + migrationInfo); triggerRepartitioningAfterMigrationFailure(); return null; } if (migrationInfo.getSource() != null) { if (node.getClusterService().getMember(migrationInfo.getSource()) == null) { logger.fine("Source is not member anymore. Ignoring " + migrationInfo); triggerRepartitioningAfterMigrationFailure(); return null; } } if (node.getClusterService().getMember(migrationInfo.getDestination()) == null) { logger.fine("Destination is not member anymore. Ignoring " + migrationInfo); triggerRepartitioningAfterMigrationFailure(); return null; } return partitionOwner; } private MemberImpl getPartitionOwner() { InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migrationInfo.getPartitionId()); Address owner = partition.getOwnerOrNull(); if (owner == null) { if (migrationInfo.isValid()) { logger.severe("Skipping migration! Partition owner is not set! -> partitionId=" + migrationInfo.getPartitionId() + " , " + partition + " -VS- " + migrationInfo); } return null; } return node.getClusterService().getMember(owner); } private void processMigrationResult(Boolean result) { if (Boolean.TRUE.equals(result)) { if (logger.isFineEnabled()) { logger.fine("Finished Migration: " + migrationInfo); } migrationOperationSucceeded(); } else { Level level = nodeEngine.isRunning() && migrationInfo.isValid() ? Level.WARNING : Level.FINE; if (logger.isLoggable(level)) { logger.log(level, "Migration failed: " + migrationInfo); } migrationOperationFailed(); } } private Boolean executeMigrateOperation(MemberImpl fromMember) { MigrationRequestOperation migrationRequestOp = new MigrationRequestOperation(migrationInfo, partitionService.getPartitionStateVersion()); Future future = nodeEngine.getOperationService().createInvocationBuilder(SERVICE_NAME, migrationRequestOp, fromMember.getAddress()) .setCallTimeout(partitionMigrationTimeout) .setTryCount(InternalPartitionService.MIGRATION_RETRY_COUNT) .setTryPauseMillis(InternalPartitionService.MIGRATION_RETRY_PAUSE).invoke(); try { Object response = future.get(); return (Boolean) nodeEngine.toObject(response); } catch (Throwable e) { Level level = nodeEngine.isRunning() && migrationInfo.isValid() ? Level.WARNING : Level.FINE; if (e instanceof ExecutionException && e.getCause() instanceof PartitionStateVersionMismatchException) { level = Level.FINE; } if (logger.isLoggable(level)) { logger.log(level, "Failed migration from " + fromMember + " for " + migrationRequestOp.getMigrationInfo(), e); } } return Boolean.FALSE; } private void migrationOperationFailed() { migrationInfo.setStatus(MigrationStatus.FAILED); internalMigrationListener.onMigrationComplete(MigrationParticipant.MASTER, migrationInfo, false); partitionServiceLock.lock(); try { addCompletedMigration(migrationInfo); internalMigrationListener.onMigrationRollback(MigrationParticipant.MASTER, migrationInfo); scheduleActiveMigrationFinalization(migrationInfo); int delta = PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE; partitionService.getPartitionStateManager().incrementVersion(delta); if (partitionService.syncPartitionRuntimeState()) { evictCompletedMigrations(migrationInfo); } triggerRepartitioningAfterMigrationFailure(); } finally { partitionServiceLock.unlock(); } partitionService.getPartitionEventManager().sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.FAILED); } private void triggerRepartitioningAfterMigrationFailure() { // Migration failed. // Pause migration process for a small amount of time, if a migration attempt is failed. // Otherwise, migration failures can do a busy spin until migration problem is resolved. // Migration can fail either a node's just joined and not completed start yet or it's just left the cluster. // Re-execute RepartitioningTask when all other migration tasks are done, // an imbalance may occur because of this failure. partitionServiceLock.lock(); try { pauseMigration(); triggerControlTask(); resumeMigrationEventually(); } finally { partitionServiceLock.unlock(); } } private void migrationOperationSucceeded() { internalMigrationListener.onMigrationComplete(MigrationParticipant.MASTER, migrationInfo, true); boolean commitSuccessful = commitMigrationToDestination(migrationInfo.getDestination(), migrationInfo); partitionServiceLock.lock(); try { if (commitSuccessful) { migrationInfo.setStatus(MigrationStatus.SUCCESS); internalMigrationListener.onMigrationCommit(MigrationParticipant.MASTER, migrationInfo); // updates partition table after successful commit InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migrationInfo.getPartitionId()); applyMigration(partition, migrationInfo); } else { migrationInfo.setStatus(MigrationStatus.FAILED); internalMigrationListener.onMigrationRollback(MigrationParticipant.MASTER, migrationInfo); int delta = PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE; partitionService.getPartitionStateManager().incrementVersion(delta); triggerRepartitioningAfterMigrationFailure(); } addCompletedMigration(migrationInfo); scheduleActiveMigrationFinalization(migrationInfo); if (partitionService.syncPartitionRuntimeState()) { evictCompletedMigrations(migrationInfo); } } finally { partitionServiceLock.unlock(); } PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager(); partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.COMPLETED); } @Override public String toString() { return getClass().getSimpleName() + "{" + "migrationInfo=" + migrationInfo + '}'; } } private class RepairPartitionTableTask implements MigrationRunnable { @Override public void run() { if (!partitionStateManager.isInitialized()) { return; } Map> promotions = removeUnknownAddressesAndCollectPromotions(); boolean success = promoteBackupsForMissingOwners(promotions); partitionServiceLock.lock(); try { if (success) { if (logger.isFinestEnabled()) { logger.finest("RepartitioningTask scheduled"); } migrationQueue.add(new RepartitioningTask()); } else { triggerControlTask(); } } finally { partitionServiceLock.unlock(); } } private Map> removeUnknownAddressesAndCollectPromotions() { partitionServiceLock.lock(); try { partitionStateManager.removeUnknownAddresses(); Map> promotions = new HashMap>(); for (int partitionId = 0; partitionId < partitionService.getPartitionCount(); partitionId++) { MigrationInfo migration = createPromotionMigrationIfOwnerIsNull(partitionId); if (migration == null) { continue; } Collection migrations = promotions.get(migration.getDestination()); if (migrations == null) { migrations = new ArrayList(); promotions.put(migration.getDestination(), migrations); } migrations.add(migration); } return promotions; } finally { partitionServiceLock.unlock(); } } private boolean promoteBackupsForMissingOwners(Map> promotions) { boolean allSucceeded = true; for (Map.Entry> entry : promotions.entrySet()) { Address destination = entry.getKey(); Collection migrations = entry.getValue(); allSucceeded &= commitPromotionMigrations(destination, migrations); } return allSucceeded; } private boolean commitPromotionMigrations(Address destination, Collection migrations) { boolean success = commitPromotionsToDestination(destination, migrations); boolean local = node.getThisAddress().equals(destination); if (!local) { processPromotionCommitResult(destination, migrations, success); } partitionService.syncPartitionRuntimeState(); return success; } private void processPromotionCommitResult(Address destination, Collection migrations, boolean success) { partitionServiceLock.lock(); try { if (!partitionStateManager.isInitialized()) { // node reset/terminated while running task return; } if (success) { for (MigrationInfo migration : migrations) { InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migration.getPartitionId()); assert partition.getOwnerOrNull() == null : "Owner should be null: " + partition; assert destination.equals(partition.getReplicaAddress(migration.getDestinationCurrentReplicaIndex())) : "Invalid replica! Destination: " + destination + ", index: " + migration.getDestinationCurrentReplicaIndex() + ", " + partition; // single partition update increments partition state version by 1 partition.swapAddresses(0, migration.getDestinationCurrentReplicaIndex()); } } else { int delta = migrations.size() + 1; partitionService.getPartitionStateManager().incrementVersion(delta); } } finally { partitionServiceLock.unlock(); } } private MigrationInfo createPromotionMigrationIfOwnerIsNull(int partitionId) { InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(partitionId); if (partition.getOwnerOrNull() == null) { Address destination = null; int index = 1; for (int i = index; i < InternalPartition.MAX_REPLICA_COUNT; i++) { destination = partition.getReplicaAddress(i); if (destination != null) { index = i; break; } } if (logger.isFinestEnabled()) { if (destination != null) { logger.finest("partitionId=" + partition.getPartitionId() + " owner is removed. replicaIndex=" + index + " will be shifted up to 0. " + partition); } else { logger.finest("partitionId=" + partition.getPartitionId() + " owner is removed. there is no other replica to shift up. " + partition); } } if (destination != null) { String destinationUuid = getMemberUuid(destination); MigrationInfo migration = new MigrationInfo(partitionId, null, null, destination, destinationUuid, -1, -1, index, 0); migration.setMaster(node.getThisAddress()); migration.setStatus(MigrationInfo.MigrationStatus.SUCCESS); return migration; } } if (partition.getOwnerOrNull() == null) { logger.warning("partitionId=" + partitionId + " is completely lost!"); PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager(); partitionEventManager.sendPartitionLostEvent(partitionId, InternalPartition.MAX_BACKUP_COUNT); } return null; } private boolean commitPromotionsToDestination(Address destination, Collection migrations) { assert migrations.size() > 0 : "No promotions to commit! destination=" + destination; MemberImpl member = node.getClusterService().getMember(destination); if (member == null) { logger.warning("Destination " + destination + " is not member anymore"); return false; } try { if (logger.isFinestEnabled()) { logger.finest("Sending commit operation to " + destination + " for " + migrations); } PartitionRuntimeState partitionState = partitionService.createPromotionCommitPartitionState(migrations); String destinationUuid = member.getUuid(); PromotionCommitOperation op = new PromotionCommitOperation(partitionState, migrations, destinationUuid); Future future = nodeEngine.getOperationService() .createInvocationBuilder(SERVICE_NAME, op, destination) .setTryCount(Integer.MAX_VALUE) .setCallTimeout(Long.MAX_VALUE).invoke(); boolean result = future.get(); if (logger.isFinestEnabled()) { logger.finest("Promotion commit result " + result + " from " + destination + " for migrations " + migrations); } return result; } catch (Throwable t) { logPromotionCommitFailure(destination, migrations, t); } return false; } private void logPromotionCommitFailure(Address destination, Collection migrations, Throwable t) { boolean memberLeft = t instanceof MemberLeftException || t.getCause() instanceof TargetNotMemberException || t.getCause() instanceof HazelcastInstanceNotActiveException; int migrationsSize = migrations.size(); if (memberLeft) { if (node.getThisAddress().equals(destination)) { logger.fine("Promotion commit failed for " + migrationsSize + " migrations" + " since this node is shutting down."); return; } if (logger.isFinestEnabled()) { logger.warning("Promotion commit failed for " + migrations + " since destination " + destination + " left the cluster"); } else { logger.warning("Promotion commit failed for " + (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations") + " since destination " + destination + " left the cluster"); } return; } if (logger.isFinestEnabled()) { logger.severe("Promotion commit to " + destination + " failed for " + migrations, t); } else { logger.severe("Promotion commit to " + destination + " failed for " + (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations"), t); } } } private class ControlTask implements MigrationRunnable { @Override public void run() { partitionServiceLock.lock(); try { migrationQueue.clear(); if (partitionService.scheduleFetchMostRecentPartitionTableTaskIfRequired()) { if (logger.isFinestEnabled()) { logger.finest("FetchMostRecentPartitionTableTask scheduled"); } migrationQueue.add(new ControlTask()); return; } if (logger.isFinestEnabled()) { logger.finest("RepairPartitionTableTask scheduled"); } migrationQueue.add(new RepairPartitionTableTask()); } finally { partitionServiceLock.unlock(); } } } private class ProcessShutdownRequestsTask implements MigrationRunnable { @Override public void run() { if (!node.isMaster()) { return; } partitionServiceLock.lock(); try { final int shutdownRequestCount = shutdownRequestedAddresses.size(); if (shutdownRequestCount > 0) { if (shutdownRequestCount == nodeEngine.getClusterService().getSize(DATA_MEMBER_SELECTOR)) { for (Address address : shutdownRequestedAddresses) { sendShutdownOperation(address); } } else { boolean present = false; for (Address address : shutdownRequestedAddresses) { if (partitionStateManager.isAbsentInPartitionTable(address)) { sendShutdownOperation(address); } else { logger.warning(address + " requested to shutdown but still in partition table"); present = true; } } if (present) { triggerControlTask(); } } } } finally { partitionServiceLock.unlock(); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy