
com.hazelcast.internal.partition.impl.MigrationManagerImpl Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.internal.partition.impl;
import com.hazelcast.cluster.Address;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.Member;
import com.hazelcast.config.PersistenceConfig;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.core.OperationTimeoutException;
import com.hazelcast.instance.impl.Node;
import com.hazelcast.internal.cluster.impl.ClusterServiceImpl;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.IPartitionLostEvent;
import com.hazelcast.internal.partition.InternalPartition;
import com.hazelcast.internal.partition.MigrationEndpoint;
import com.hazelcast.internal.partition.MigrationInfo;
import com.hazelcast.internal.partition.MigrationInfo.MigrationStatus;
import com.hazelcast.internal.partition.MigrationStateImpl;
import com.hazelcast.internal.partition.PartitionReplica;
import com.hazelcast.internal.partition.PartitionRuntimeState;
import com.hazelcast.internal.partition.PartitionStateVersionMismatchException;
import com.hazelcast.internal.partition.PartitionTableView;
import com.hazelcast.internal.partition.impl.MigrationInterceptor.MigrationParticipant;
import com.hazelcast.internal.partition.impl.MigrationPlanner.MigrationDecisionCallback;
import com.hazelcast.internal.partition.operation.DemoteResponseOperation;
import com.hazelcast.internal.partition.operation.FinalizeMigrationOperation;
import com.hazelcast.internal.partition.operation.MigrationCommitOperation;
import com.hazelcast.internal.partition.operation.MigrationRequestOperation;
import com.hazelcast.internal.partition.operation.PromotionCommitOperation;
import com.hazelcast.internal.partition.operation.PublishCompletedMigrationsOperation;
import com.hazelcast.internal.partition.operation.ShutdownResponseOperation;
import com.hazelcast.internal.util.Clock;
import com.hazelcast.internal.util.FutureUtil;
import com.hazelcast.internal.util.Preconditions;
import com.hazelcast.internal.util.Timer;
import com.hazelcast.internal.util.collection.Int2ObjectHashMap;
import com.hazelcast.internal.util.collection.IntHashSet;
import com.hazelcast.internal.util.collection.PartitionIdSet;
import com.hazelcast.internal.util.scheduler.CoalescingDelayedTrigger;
import com.hazelcast.logging.ILogger;
import com.hazelcast.partition.ReplicaMigrationEvent;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.impl.InternalCompletableFuture;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.executionservice.ExecutionService;
import com.hazelcast.spi.impl.operationservice.Operation;
import com.hazelcast.spi.impl.operationservice.OperationService;
import com.hazelcast.spi.impl.operationservice.impl.InvocationFuture;
import com.hazelcast.spi.impl.operationservice.impl.OperationServiceImpl;
import com.hazelcast.spi.properties.ClusterProperty;
import com.hazelcast.spi.properties.HazelcastProperties;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.function.BiFunction;
import java.util.function.IntConsumer;
import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.MIGRATION_METRIC_MIGRATION_MANAGER_MIGRATION_ACTIVE;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.PARTITIONS_PREFIX;
import static com.hazelcast.internal.metrics.ProbeUnit.BOOLEAN;
import static com.hazelcast.internal.partition.IPartitionService.SERVICE_NAME;
import static com.hazelcast.internal.util.ConcurrencyUtil.CALLER_RUNS;
import static com.hazelcast.memory.MemoryUnit.MEGABYTES;
import static com.hazelcast.spi.impl.executionservice.ExecutionService.ASYNC_EXECUTOR;
import static com.hazelcast.spi.properties.ClusterProperty.PARTITION_CHUNKED_MAX_MIGRATING_DATA_IN_MB;
import static com.hazelcast.spi.properties.ClusterProperty.PARTITION_CHUNKED_MIGRATION_ENABLED;
import static com.hazelcast.spi.properties.ClusterProperty.PARTITION_FRAGMENTED_MIGRATION_ENABLED;
import static com.hazelcast.spi.properties.ClusterProperty.PARTITION_MIGRATION_INTERVAL;
import static com.hazelcast.spi.properties.ClusterProperty.PARTITION_MIGRATION_TIMEOUT;
/**
* Maintains migration system state and manages migration operations performed within the cluster.
*/
@SuppressWarnings({"checkstyle:classdataabstractioncoupling",
"checkstyle:methodcount", "checkstyle:classfanoutcomplexity"})
public class MigrationManagerImpl implements MigrationManager {
private static final int MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE = 3;
private static final int PUBLISH_COMPLETED_MIGRATIONS_BATCH_SIZE = 10;
private static final long CHECK_CLUSTER_PARTITION_RUNTIME_STATES_SYNC_TIMEOUT_SECONDS = 2;
private static final int COMMIT_SUCCESS = 1;
private static final int COMMIT_RETRY = 0;
private static final int COMMIT_FAILURE = -1;
final long partitionMigrationInterval;
private final Node node;
private final NodeEngineImpl nodeEngine;
private final InternalPartitionServiceImpl partitionService;
private final ILogger logger;
private final PartitionStateManager partitionStateManager;
private final MigrationQueue migrationQueue = new MigrationQueue();
private final MigrationThread migrationThread;
private final AtomicBoolean migrationTasksAllowed = new AtomicBoolean(true);
private final long partitionMigrationTimeout;
private final CoalescingDelayedTrigger delayedResumeMigrationTrigger;
private final Set shutdownRequestedMembers = new HashSet<>();
private final Set demoteRequestedMembers = new HashSet<>();
// updates will be done under lock, but reads will be multithreaded.
private final ConcurrentMap activeMigrations = new ConcurrentHashMap<>();
// both reads and updates will be done under lock!
private final LinkedHashSet completedMigrations = new LinkedHashSet<>();
private final AtomicBoolean promotionPermit = new AtomicBoolean(false);
private final MigrationStats stats = new MigrationStats();
private volatile MigrationInterceptor migrationInterceptor = new MigrationInterceptor.NopMigrationInterceptor();
private final Lock partitionServiceLock;
private final MigrationPlanner migrationPlanner;
private final boolean fragmentedMigrationEnabled;
private final boolean chunkedMigrationEnabled;
private final int maxTotalChunkedDataInBytes;
private final long memberHeartbeatTimeoutMillis;
private boolean triggerRepartitioningWhenClusterStateAllowsMigration;
private final int maxParallelMigrations;
private final AtomicInteger migrationCount = new AtomicInteger();
private final Set finalizingMigrationsRegistry = Collections.newSetFromMap(new ConcurrentHashMap<>());
private final Executor asyncExecutor;
/**
* the positive number of seconds to delay triggering rebalancing
* or 0 when no delay should be applied
*/
private final int autoRebalanceDelaySeconds;
private volatile boolean delayNextRepartitioningExecution;
private volatile ScheduledFuture scheduledControlTaskFuture;
@SuppressWarnings("checkstyle:executablestatementcount")
MigrationManagerImpl(Node node, InternalPartitionServiceImpl service, Lock partitionServiceLock) {
this.node = node;
this.nodeEngine = node.getNodeEngine();
this.partitionService = service;
this.logger = node.getLogger(getClass());
this.partitionServiceLock = partitionServiceLock;
migrationPlanner = new MigrationPlanner(node.getLogger(MigrationPlanner.class));
HazelcastProperties properties = node.getProperties();
partitionMigrationInterval = properties.getPositiveMillisOrDefault(PARTITION_MIGRATION_INTERVAL, 0);
partitionMigrationTimeout = properties.getMillis(PARTITION_MIGRATION_TIMEOUT);
fragmentedMigrationEnabled = properties.getBoolean(PARTITION_FRAGMENTED_MIGRATION_ENABLED);
chunkedMigrationEnabled = properties.getBoolean(PARTITION_CHUNKED_MIGRATION_ENABLED);
maxTotalChunkedDataInBytes = (int) MEGABYTES.toBytes(properties.getInteger(PARTITION_CHUNKED_MAX_MIGRATING_DATA_IN_MB));
maxParallelMigrations = properties.getInteger(ClusterProperty.PARTITION_MAX_PARALLEL_MIGRATIONS);
partitionStateManager = partitionService.getPartitionStateManager();
ILogger migrationThreadLogger = node.getLogger(MigrationThread.class);
String hzName = nodeEngine.getHazelcastInstance().getName();
migrationThread = new MigrationThread(this, hzName, migrationThreadLogger, migrationQueue);
long migrationPauseDelayMs = TimeUnit.SECONDS.toMillis(MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE);
ExecutionService executionService = nodeEngine.getExecutionService();
delayedResumeMigrationTrigger = new CoalescingDelayedTrigger(
executionService, migrationPauseDelayMs, 2 * migrationPauseDelayMs, this::resumeMigration);
this.memberHeartbeatTimeoutMillis = properties.getMillis(ClusterProperty.MAX_NO_HEARTBEAT_SECONDS);
nodeEngine.getMetricsRegistry().registerStaticMetrics(stats, PARTITIONS_PREFIX);
this.autoRebalanceDelaySeconds =
node.getConfig().getPersistenceConfig().isEnabled()
? node.getConfig().getPersistenceConfig().getRebalanceDelaySeconds()
: PersistenceConfig.DEFAULT_REBALANCE_DELAY;
this.asyncExecutor = node.getNodeEngine().getExecutionService().getExecutor(ASYNC_EXECUTOR);
}
@Override
public long getPartitionMigrationInterval() {
return partitionMigrationInterval;
}
@Probe(name = MIGRATION_METRIC_MIGRATION_MANAGER_MIGRATION_ACTIVE, unit = BOOLEAN)
private int migrationActiveProbe() {
return migrationTasksAllowed.get() ? 1 : 0;
}
@Override
public void pauseMigration() {
migrationTasksAllowed.set(false);
}
@Override
public void resumeMigration() {
migrationTasksAllowed.set(true);
}
private void resumeMigrationEventually() {
delayedResumeMigrationTrigger.executeWithDelay();
}
@Override
public boolean areMigrationTasksAllowed() {
return migrationTasksAllowed.get();
}
@Override
public void finalizeMigration(MigrationInfo migrationInfo) {
try {
PartitionReplica localReplica = PartitionReplica.from(node.getLocalMember());
int partitionId = migrationInfo.getPartitionId();
boolean source = localReplica.equals(migrationInfo.getSource());
boolean destination = localReplica.equals(migrationInfo.getDestination());
assert migrationInfo.getStatus() == MigrationStatus.SUCCESS
|| migrationInfo.getStatus() == MigrationStatus.FAILED : "Invalid migration: " + migrationInfo;
if (source || destination) {
boolean success = migrationInfo.getStatus() == MigrationStatus.SUCCESS;
MigrationParticipant participant = source ? MigrationParticipant.SOURCE : MigrationParticipant.DESTINATION;
if (success) {
migrationInterceptor.onMigrationCommit(participant, migrationInfo);
} else {
migrationInterceptor.onMigrationRollback(participant, migrationInfo);
}
MigrationEndpoint endpoint = source ? MigrationEndpoint.SOURCE : MigrationEndpoint.DESTINATION;
FinalizeMigrationOperation op = new FinalizeMigrationOperation(migrationInfo, endpoint, success);
op.setPartitionId(partitionId).setNodeEngine(nodeEngine).setValidateTarget(false).setService(partitionService);
registerFinalizingMigration(migrationInfo);
OperationServiceImpl operationService = nodeEngine.getOperationService();
if (logger.isFineEnabled()) {
logger.fine("Finalizing " + migrationInfo);
}
if (operationService.isRunAllowed(op)) {
// When migration finalization is triggered by subsequent migrations
// on partition thread, finalization may run directly on calling thread.
// Otherwise, if previously completed migration and newly submitted migration
// are on the same partition, then finalizing previous migration later will
// break the order, hence safety.
operationService.run(op);
} else {
operationService.execute(op);
}
removeActiveMigration(migrationInfo);
} else {
PartitionReplica partitionOwner = partitionStateManager.getPartitionImpl(partitionId).getOwnerReplicaOrNull();
if (localReplica.equals(partitionOwner)) {
// This is the primary owner of the partition and the migration was a backup replica migration.
// In this case, primary owner has nothing to do anymore,
// just remove the active migration and clear the migrating flag.
removeActiveMigration(migrationInfo);
partitionStateManager.clearMigratingFlag(partitionId);
} else {
logger.severe("Failed to finalize migration because " + localReplica
+ " is not a participant of the migration: " + migrationInfo);
}
}
} catch (Exception e) {
logger.warning(e);
}
}
private void registerFinalizingMigration(MigrationInfo migration) {
finalizingMigrationsRegistry.add(migration);
}
@Override
public boolean isChunkedMigrationEnabled() {
return chunkedMigrationEnabled;
}
@Override
public int getMaxTotalChunkedDataInBytes() {
return maxTotalChunkedDataInBytes;
}
@Override
public boolean removeFinalizingMigration(MigrationInfo migration) {
return finalizingMigrationsRegistry.remove(migration);
}
@Override
public boolean isFinalizingMigrationRegistered(int partitionId) {
return finalizingMigrationsRegistry.stream().anyMatch(m -> partitionId == m.getPartitionId());
}
@Override
public MigrationInfo addActiveMigration(MigrationInfo migrationInfo) {
return activeMigrations.putIfAbsent(migrationInfo.getPartitionId(), migrationInfo);
}
@Override
public MigrationInfo getActiveMigration(int partitionId) {
return activeMigrations.get(partitionId);
}
@Override
public Collection getActiveMigrations() {
return Collections.unmodifiableCollection(activeMigrations.values());
}
/**
* Removes the current {@code activeMigration} if the {@code migration} is the same
* and returns {@code true} if removed.
*
* @param migration migration
*/
private boolean removeActiveMigration(MigrationInfo migration) {
MigrationInfo activeMigration =
activeMigrations.computeIfPresent(migration.getPartitionId(),
(k, currentMigration) -> currentMigration.equals(migration) ? null : currentMigration);
if (activeMigration != null) {
logger.warning("Active migration could not be removed! "
+ "Current migration=" + migration + ", active migration=" + activeMigration);
return false;
}
return true;
}
@Override
public boolean acquirePromotionPermit() {
return promotionPermit.compareAndSet(false, true);
}
@Override
public void releasePromotionPermit() {
promotionPermit.set(false);
}
@Override
public void scheduleActiveMigrationFinalization(final MigrationInfo migrationInfo) {
partitionServiceLock.lock();
try {
MigrationInfo activeMigrationInfo = getActiveMigration(migrationInfo.getPartitionId());
if (migrationInfo.equals(activeMigrationInfo)) {
activeMigrationInfo.setStatus(migrationInfo.getStatus());
if (logger.isFineEnabled()) {
logger.fine("Scheduled finalization of " + activeMigrationInfo);
}
finalizeMigration(activeMigrationInfo);
return;
}
PartitionReplica source = migrationInfo.getSource();
if (source != null && migrationInfo.getSourceCurrentReplicaIndex() > 0
&& source.isIdentical(node.getLocalMember())) {
// This is former backup replica owner.
// Former backup owner does not participate in migration transaction, data always copied
// from the primary replica. Former backup replica is not notified about this migration
// until the migration is committed on destination. Active migration is not set
// for this migration.
// This path can be executed multiple times,
// when a periodic update (latest completed migrations or the whole partition table) is received
// and a new migration request is submitted concurrently.
// That's why, migration should be validated by partition table, to determine whether
// this migration finalization is already processed or not.
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migrationInfo.getPartitionId());
if (migrationInfo.getStatus() == MigrationStatus.SUCCESS
&& migrationInfo.getSourceNewReplicaIndex() != partition.getReplicaIndex(source)) {
if (logger.isFinestEnabled()) {
logger.finest("Already finalized " + migrationInfo + " on former backup replica. -> " + partition);
}
return;
}
if (logger.isFineEnabled()) {
logger.fine("Scheduled finalization of " + migrationInfo + " on former backup replica.");
}
finalizeMigration(migrationInfo);
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Sends a {@link MigrationCommitOperation} to the destination and returns {@code true} if the new partition state
* was applied on the destination.
*/
@SuppressWarnings({"checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity", "checkstyle:methodlength"})
private CompletionStage commitMigrationToDestinationAsync(final MigrationInfo migration) {
PartitionReplica destination = migration.getDestination();
if (destination.isIdentical(node.getLocalMember())) {
if (logger.isFinestEnabled()) {
logger.finest("Shortcutting migration commit, since destination is master. -> " + migration);
}
return CompletableFuture.completedFuture(Boolean.TRUE);
}
Member member = node.getClusterService().getMember(destination.address(), destination.uuid());
if (member == null) {
logger.warning("Cannot commit " + migration + ". Destination " + destination + " is not a member anymore");
return CompletableFuture.completedFuture(Boolean.FALSE);
}
try {
if (logger.isFinestEnabled()) {
logger.finest("Sending migration commit operation to " + destination + " for " + migration);
}
migration.setStatus(MigrationStatus.SUCCESS);
UUID destinationUuid = member.getUuid();
MigrationCommitOperation operation = new MigrationCommitOperation(migration, destinationUuid);
InvocationFuture future = nodeEngine.getOperationService()
.createInvocationBuilder(SERVICE_NAME, operation, destination.address())
.setTryCount(Integer.MAX_VALUE)
.setCallTimeout(memberHeartbeatTimeoutMillis).invoke();
return future.handleAsync((done, t) -> {
// Inspect commit result;
// - if there's an exception, either retry or fail
// - if result is true then success, otherwise failure
logger.fine("Migration commit response received -> " + migration + ", success: " + done + ", failure: " + t);
if (t != null) {
logMigrationCommitFailure(migration, t);
if (t instanceof OperationTimeoutException || t.getCause() instanceof OperationTimeoutException) {
return COMMIT_RETRY;
}
return COMMIT_FAILURE;
}
return done ? COMMIT_SUCCESS : COMMIT_FAILURE;
}, asyncExecutor).thenComposeAsync(result -> {
switch (result) {
case COMMIT_SUCCESS:
return CompletableFuture.completedFuture(true);
case COMMIT_FAILURE:
return CompletableFuture.completedFuture(false);
case COMMIT_RETRY:
logger.fine("Retrying migration commit for -> " + migration);
return commitMigrationToDestinationAsync(migration);
default:
throw new IllegalArgumentException("Unknown migration commit result: " + result);
}
}, asyncExecutor).handleAsync((result, t) -> {
if (t != null) {
logMigrationCommitFailure(migration, t);
return false;
}
if (logger.isFineEnabled()) {
logger.fine("Migration commit result " + result + " from " + destination + " for " + migration);
}
return result;
}, asyncExecutor);
} catch (Throwable t) {
logMigrationCommitFailure(migration, t);
return CompletableFuture.completedFuture(Boolean.FALSE);
}
}
private void logMigrationCommitFailure(MigrationInfo migration, Throwable t) {
boolean memberLeft = t instanceof MemberLeftException
|| t.getCause() instanceof TargetNotMemberException
|| t.getCause() instanceof HazelcastInstanceNotActiveException;
PartitionReplica destination = migration.getDestination();
if (memberLeft) {
if (destination.isIdentical(node.getLocalMember())) {
logger.fine("Migration commit failed for " + migration
+ " since this node is shutting down.");
return;
}
logger.warning("Migration commit failed for " + migration
+ " since destination " + destination + " left the cluster");
} else {
logger.severe("Migration commit to " + destination + " failed for " + migration, t);
}
}
@Override
public boolean addCompletedMigration(MigrationInfo migrationInfo) {
if (migrationInfo.getStatus() != MigrationStatus.SUCCESS
&& migrationInfo.getStatus() != MigrationStatus.FAILED) {
throw new IllegalArgumentException("Migration doesn't seem completed: " + migrationInfo);
}
if (migrationInfo.getInitialPartitionVersion() <= 0 || migrationInfo.getPartitionVersionIncrement() <= 0) {
throw new IllegalArgumentException("Partition state versions are not set: " + migrationInfo);
}
partitionServiceLock.lock();
try {
boolean added = completedMigrations.add(migrationInfo);
if (added) {
stats.incrementCompletedMigrations();
}
return added;
} finally {
partitionServiceLock.unlock();
}
}
@Override
public void retainCompletedMigrations(Collection migrations) {
partitionServiceLock.lock();
try {
completedMigrations.retainAll(migrations);
} finally {
partitionServiceLock.unlock();
}
}
/**
* Evicts completed migrations from the list
*
* @param migrations completed migrations to evict
*/
private void evictCompletedMigrations(Collection migrations) {
partitionServiceLock.lock();
try {
completedMigrations.removeAll(migrations);
} finally {
partitionServiceLock.unlock();
}
}
@Override
public void triggerControlTask() {
migrationQueue.clear();
migrationThread.abortMigrationTask();
if (stats.getRemainingMigrations() > 0) {
// triggered control task before current migrations are completed
migrationQueue.add(new PublishCompletedMigrationsTask());
}
if (!node.getClusterService().isJoined()) {
logger.fine("Node is not joined, will not trigger ControlTask");
return;
}
if (!partitionService.isLocalMemberMaster()) {
logger.fine("Node is not master, will not trigger ControlTask");
return;
}
migrationQueue.add(new ControlTask());
if (logger.isFinestEnabled()) {
logger.finest("Migration queue is cleared and control task is scheduled");
}
}
@Override
public void triggerControlTaskWithDelay() {
if (autoRebalanceDelaySeconds > 0) {
delayNextRepartitioningExecution = true;
}
triggerControlTask();
}
@Override
public MigrationInterceptor getMigrationInterceptor() {
return migrationInterceptor;
}
@Override
public void setMigrationInterceptor(MigrationInterceptor interceptor) {
Preconditions.checkNotNull(interceptor);
migrationInterceptor = interceptor;
}
@Override
public void resetMigrationInterceptor() {
migrationInterceptor = new MigrationInterceptor.NopMigrationInterceptor();
}
@Override
public boolean onDemoteRequest(Member member) {
if (this.node.getClusterService().getSize(DATA_MEMBER_SELECTOR) - getDataDisownRequestedMembers().size() <= 1) {
logger.info("Demote is not possible because there would be no data member left after demotion");
return false;
}
if (!partitionStateManager.isInitialized()) {
if (demoteRequestedMembers.add(member)) {
logger.info("Demote request of " + member + " is handled");
}
sendDemoteResponseOperation(member);
return true;
}
ClusterState clusterState = node.getClusterService().getClusterState();
if (!clusterState.isMigrationAllowed() && clusterState != ClusterState.IN_TRANSITION) {
logger.info("Demote is not possible in cluster state " + clusterState);
return false;
}
if (demoteRequestedMembers.add(member)) {
logger.info("Demote request of " + member + " is handled");
triggerControlTask();
}
return true;
}
@Override
public void onShutdownRequest(Member member) {
if (!partitionStateManager.isInitialized()) {
sendShutdownResponseOperation(member);
return;
}
ClusterState clusterState = node.getClusterService().getClusterState();
if (!clusterState.isMigrationAllowed() && clusterState != ClusterState.IN_TRANSITION) {
sendShutdownResponseOperation(member);
return;
}
if (shutdownRequestedMembers.add(member)) {
logger.info("Shutdown request of " + member + " is handled");
triggerControlTask();
}
}
@Override
public void onMemberRemove(Member member) {
shutdownRequestedMembers.remove(member);
demoteRequestedMembers.remove(member);
}
@Override
public void schedule(MigrationRunnable runnable) {
migrationQueue.add(runnable);
}
@Override
public List getCompletedMigrationsCopy() {
partitionServiceLock.lock();
try {
return new ArrayList<>(completedMigrations);
} finally {
partitionServiceLock.unlock();
}
}
/**
* Returns a copy of the list of completed migrations for the specific partition.
* Runs under the partition service lock.
*/
private List getCompletedMigrations(int partitionId) {
partitionServiceLock.lock();
try {
List migrations = new LinkedList<>();
for (MigrationInfo migration : completedMigrations) {
if (partitionId == migration.getPartitionId()) {
migrations.add(migration);
}
}
return migrations;
} finally {
partitionServiceLock.unlock();
}
}
@Override
public boolean hasOnGoingMigration() {
return !activeMigrations.isEmpty() || getMigrationQueueSize() > 0;
}
@Override
public int getMigrationQueueSize() {
int migrations = migrationCount.get();
return migrations + migrationQueue.migrationTaskCount();
}
@Override
public void reset() {
try {
if (scheduledControlTaskFuture != null) {
scheduledControlTaskFuture.cancel(true);
}
} catch (Throwable t) {
logger.fine("Cancelling a scheduled control task threw an exception", t);
}
migrationQueue.clear();
migrationCount.set(0);
activeMigrations.clear();
completedMigrations.clear();
shutdownRequestedMembers.clear();
demoteRequestedMembers.clear();
migrationTasksAllowed.set(true);
}
@Override
public void start() {
migrationThread.start();
}
@Override
public void stop() {
migrationThread.stopNow();
}
@Override
public void scheduleMigration(MigrationInfo migrationInfo) {
migrationQueue.add(() -> new AsyncMigrationTask(migrationInfo).run().toCompletableFuture().join());
}
/**
* Mutates the partition state and applies the migration.
*/
static void applyMigration(InternalPartitionImpl partition, MigrationInfo migrationInfo) {
final PartitionReplica[] members = partition.getReplicasCopy();
if (migrationInfo.getSourceCurrentReplicaIndex() > -1) {
members[migrationInfo.getSourceCurrentReplicaIndex()] = null;
}
if (migrationInfo.getDestinationCurrentReplicaIndex() > -1) {
members[migrationInfo.getDestinationCurrentReplicaIndex()] = null;
}
members[migrationInfo.getDestinationNewReplicaIndex()] = migrationInfo.getDestination();
if (migrationInfo.getSourceNewReplicaIndex() > -1) {
members[migrationInfo.getSourceNewReplicaIndex()] = migrationInfo.getSource();
}
partition.setReplicas(members);
}
@Override
public Set getDataDisownRequestedMembers() {
Set result = new HashSet<>(shutdownRequestedMembers);
result.addAll(demoteRequestedMembers);
return result;
}
/**
* Sends a {@link DemoteResponseOperation} to the {@code address} or takes a shortcut if demote is local.
*/
private void sendDemoteResponseOperation(Member member) {
if (node.getThisAddress().equals(member.getAddress())) {
partitionService.onDemoteResponse();
} else {
nodeEngine.getOperationService().send(new DemoteResponseOperation(member.getUuid()), member.getAddress());
}
}
/**
* Sends a {@link ShutdownResponseOperation} to the {@code address} or takes a shortcut if shutdown is local.
*/
private void sendShutdownResponseOperation(Member member) {
if (node.getThisAddress().equals(member.getAddress())) {
assert !node.isRunning() : "Node state: " + node.getState();
partitionService.onShutdownResponse();
} else {
nodeEngine.getOperationService().send(new ShutdownResponseOperation(member.getUuid()), member.getAddress());
}
}
@Override
public boolean shouldTriggerRepartitioningWhenClusterStateAllowsMigration() {
return triggerRepartitioningWhenClusterStateAllowsMigration;
}
private void publishCompletedMigrations() {
if (!partitionService.isLocalMemberMaster()) {
return;
}
assert partitionStateManager.isInitialized();
final List migrations = getCompletedMigrationsCopy();
if (logger.isFineEnabled()) {
logger.fine("Publishing completed migrations [" + migrations.size() + "]: " + migrations);
}
OperationService operationService = nodeEngine.getOperationService();
ClusterServiceImpl clusterService = node.clusterService;
final Collection members = clusterService.getMembers();
final AtomicInteger latch = new AtomicInteger(members.size() - 1);
for (Member member : members) {
if ((member.localMember())) {
continue;
}
Operation operation = new PublishCompletedMigrationsOperation(migrations);
InternalCompletableFuture f
= operationService.invokeOnTarget(SERVICE_NAME, operation, member.getAddress());
f.whenCompleteAsync((response, t) -> {
if (t == null) {
if (!Boolean.TRUE.equals(response)) {
logger.fine(member + " rejected completed migrations with response " + response);
partitionService.sendPartitionRuntimeState(member.getAddress());
return;
}
if (latch.decrementAndGet() == 0) {
logger.fine("Evicting " + migrations.size() + " completed migrations.");
evictCompletedMigrations(migrations);
}
} else {
logger.fine("Failure while publishing completed migrations to " + member, t);
partitionService.sendPartitionRuntimeState(member.getAddress());
}
}, asyncExecutor);
}
}
@Override
public MigrationStats getStats() {
return stats;
}
/**
* Invoked on the master node. Rearranges the partition table if there is no recent activity in the cluster after
* this task has been scheduled, schedules migrations and syncs the partition state.
* Also schedules a {@link ProcessShutdownRequestsTask}. Acquires partition service lock.
*/
class RedoPartitioningTask implements MigrationRunnable {
@Override
public void run() {
if (!partitionService.isLocalMemberMaster()) {
return;
}
partitionServiceLock.lock();
try {
triggerRepartitioningWhenClusterStateAllowsMigration
= !node.getClusterService().getClusterState().isMigrationAllowed();
if (triggerRepartitioningWhenClusterStateAllowsMigration) {
if (logger.isFineEnabled()) {
logger.fine("Migrations are not allowed yet, "
+ "repartitioning will be triggered when cluster state allows migrations.");
}
assignCompletelyLostPartitions();
return;
}
PartitionReplica[][] newState = repartition();
if (newState == null) {
return;
}
processNewPartitionState(newState);
migrationQueue.add(new ProcessShutdownRequestsTask());
migrationQueue.add(new ProcessDemoteRequestsTask());
} finally {
partitionServiceLock.unlock();
}
}
/**
* Rearranges the partition table if the cluster is stable, returns the new partition table and schedules a
* {@link ProcessShutdownRequestsTask} if the repartitioning failed.
*
* @return the new partition table or {@code null} if the cluster is not stable or the repartitioning failed
*/
private PartitionReplica[][] repartition() {
if (!migrationsTasksAllowed()) {
return null;
}
PartitionReplica[][] newState = null;
if (node.getNodeExtension().getInternalHotRestartService().isEnabled()) {
// check partition table snapshots when persistence is enabled
newState = checkSnapshots();
}
if (newState != null) {
logger.info("Identified a snapshot of left member for repartition");
} else {
newState = partitionStateManager.repartition(getDataDisownRequestedMembers(), null);
}
if (newState == null) {
migrationQueue.add(new ProcessShutdownRequestsTask());
return null;
}
if (!migrationsTasksAllowed()) {
return null;
}
return newState;
}
PartitionReplica[][] checkSnapshots() {
Set shutdownRequestedReplicas = new HashSet<>();
Set currentReplicas = new HashSet<>();
Map currentAddressMapping = new HashMap<>();
getDataDisownRequestedMembers().forEach(member -> shutdownRequestedReplicas.add(member.getUuid()));
Collection currentMembers = node.getClusterService().getMembers(DATA_MEMBER_SELECTOR);
currentMembers.forEach(member -> currentReplicas.add(member.getUuid()));
currentMembers.forEach(member -> currentAddressMapping.put(member.getUuid(), member.getAddress()));
Set candidates = new TreeSet<>(new
PartitionTableViewDistanceComparator(partitionStateManager.getPartitionTable()));
for (PartitionTableView partitionTableView : partitionStateManager.snapshots()) {
if (partitionTableView.composedOf(currentReplicas, shutdownRequestedReplicas)) {
candidates.add(partitionTableView);
}
}
if (candidates.isEmpty()) {
return null;
}
// find least distant
return candidates.iterator().next().toArray(currentAddressMapping);
}
/**
* Assigns new owners to completely lost partitions (which do not have owners for any replica)
* when cluster state does not allow migrations/repartitioning but allows promotions.
*/
private void assignCompletelyLostPartitions() {
if (!node.getClusterService().getClusterState().isPartitionPromotionAllowed()) {
// Migrations and partition promotions are not allowed. We cannot modify partition table.
return;
}
logger.fine("Cluster state doesn't allow repartitioning. RedoPartitioningTask will only assign lost partitions.");
InternalPartition[] partitions = partitionStateManager.getPartitions();
PartitionIdSet partitionIds = Arrays.stream(partitions)
.filter(p -> InternalPartition.replicaIndices().allMatch(i -> p.getReplica(i) == null))
.map(InternalPartition::getPartitionId)
.collect(Collectors.toCollection(() -> new PartitionIdSet(partitions.length)));
if (!partitionIds.isEmpty()) {
PartitionReplica[][] state = partitionStateManager.repartition(getDataDisownRequestedMembers(), partitionIds);
if (state != null) {
logger.warning("Assigning new owners for " + partitionIds.size()
+ " LOST partitions, when migration is not allowed!");
int replicaUpdateCount = (int) partitionIds.stream()
.flatMap(partitionId -> Arrays.stream(state[partitionId]).filter(Objects::nonNull)).count();
MigrationStateImpl[] states = {new MigrationStateImpl(Clock.currentTimeMillis(), replicaUpdateCount, 0, 0L)};
PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager();
partitionEventManager.sendMigrationProcessStartedEvent(states[0]);
partitionIds.intIterator().forEachRemaining((IntConsumer) partitionId -> {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(partitionId);
PartitionReplica[] replicas = state[partitionId];
partition.setReplicas(replicas);
InternalPartition.replicaIndices()
.filter(i -> replicas[i] != null)
.forEach(i -> {
MigrationInfo migration = new MigrationInfo(partitionId, null, replicas[i], -1, -1, -1, i)
.setStatus(MigrationStatus.SUCCESS);
states[0] = states[0].onComplete(0L);
partitionEventManager.sendMigrationEvent(states[0], migration, 0L);
});
});
partitionEventManager.sendMigrationProcessCompletedEvent(states[0]);
node.getNodeExtension().onPartitionStateChange();
} else {
logger.warning("Unable to assign LOST partitions");
}
}
}
/**
* Processes the new partition state by planning and scheduling migrations.
*/
private void processNewPartitionState(PartitionReplica[][] newState) {
int migrationCount = 0;
// List of migration queues per-partition
List> partitionMigrationQueues = new ArrayList<>(newState.length);
Int2ObjectHashMap lostPartitions = new Int2ObjectHashMap<>();
for (int partitionId = 0; partitionId < newState.length; partitionId++) {
InternalPartitionImpl currentPartition = partitionStateManager.getPartitionImpl(partitionId);
PartitionReplica[] currentReplicas = currentPartition.replicas();
PartitionReplica[] newReplicas = newState[partitionId];
MigrationCollector migrationCollector = new MigrationCollector(currentPartition);
if (logger.isFinestEnabled()) {
logger.finest("Planning migrations for partitionId=" + partitionId
+ ". Current replicas: " + Arrays.toString(currentReplicas)
+ ", New replicas: " + Arrays.toString(newReplicas));
}
migrationPlanner.planMigrations(partitionId, currentReplicas, newReplicas, migrationCollector);
migrationPlanner.prioritizeCopiesAndShiftUps(migrationCollector.migrations);
if (migrationCollector.lostPartitionDestination != null) {
lostPartitions.put(partitionId, migrationCollector.lostPartitionDestination);
}
if (!migrationCollector.migrations.isEmpty()) {
partitionMigrationQueues.add(migrationCollector.migrations);
migrationCount += migrationCollector.migrations.size();
}
}
stats.markNewRepartition(migrationCount);
if (migrationCount > 0) {
partitionService.getPartitionEventManager().sendMigrationProcessStartedEvent(stats.toMigrationState());
}
if (!lostPartitions.isEmpty()) {
logger.warning("Assigning new owners for " + lostPartitions.size() + " LOST partitions!");
lostPartitions.forEach((partitionId, destination) -> {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(partitionId);
assignLostPartitionOwner(partition, destination);
});
node.getNodeExtension().onPartitionStateChange();
}
partitionService.publishPartitionRuntimeState();
if (migrationCount > 0) {
scheduleMigrations(partitionMigrationQueues);
// Schedule a task to publish completed migrations after all migrations tasks are completed.
schedule(new PublishCompletedMigrationsTask());
}
logMigrationStatistics(migrationCount);
}
/**
* Schedules all migrations.
*/
private void scheduleMigrations(List> partitionMigrationQueues) {
schedule(new MigrationPlanTask(partitionMigrationQueues));
}
private void logMigrationStatistics(int migrationCount) {
if (migrationCount > 0) {
logger.info("Repartitioning cluster data. Migration tasks count: " + migrationCount);
} else {
logger.info("Partition balance is ok, no need to repartition.");
}
}
private void assignLostPartitionOwner(InternalPartitionImpl partition, PartitionReplica newOwner) {
partition.setReplica(0, newOwner);
stats.incrementCompletedMigrations();
MigrationInfo migrationInfo = new MigrationInfo(partition.getPartitionId(), null, newOwner, -1, -1, -1, 0);
migrationInfo.setStatus(MigrationStatus.SUCCESS);
partitionService.getPartitionEventManager().sendMigrationEvent(stats.toMigrationState(), migrationInfo, 0L);
}
/**
* Returns {@code true} if there are no migrations in the migration queue, no new node is joining, there is no
* ongoing repartitioning,
* otherwise triggers the control task.
*/
private boolean migrationsTasksAllowed() {
boolean migrationTasksAllowed = areMigrationTasksAllowed();
boolean hasMigrationTasks = migrationQueue.migrationTaskCount() > 1;
if (migrationTasksAllowed && !hasMigrationTasks) {
return true;
}
triggerControlTask();
return false;
}
private class MigrationCollector implements MigrationDecisionCallback {
private final InternalPartitionImpl partition;
private final LinkedList migrations = new LinkedList<>();
private PartitionReplica lostPartitionDestination;
MigrationCollector(InternalPartitionImpl partition) {
this.partition = partition;
}
@Override
public void migrate(PartitionReplica source,
int sourceCurrentReplicaIndex,
int sourceNewReplicaIndex,
PartitionReplica destination,
int destinationCurrentReplicaIndex,
int destinationNewReplicaIndex) {
int partitionId = partition.getPartitionId();
if (logger.isFineEnabled()) {
logger.fine("Planned migration -> partitionId=" + partitionId
+ ", source=" + source + ", sourceCurrentReplicaIndex=" + sourceCurrentReplicaIndex
+ ", sourceNewReplicaIndex=" + sourceNewReplicaIndex + ", destination=" + destination
+ ", destinationCurrentReplicaIndex=" + destinationCurrentReplicaIndex
+ ", destinationNewReplicaIndex=" + destinationNewReplicaIndex);
}
if (source == null && destinationCurrentReplicaIndex == -1 && destinationNewReplicaIndex == 0) {
assert destination != null : "partitionId=" + partitionId + " destination is null";
assert sourceCurrentReplicaIndex == -1
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
assert sourceNewReplicaIndex == -1
: "partitionId=" + partitionId + " invalid index: " + sourceNewReplicaIndex;
assert lostPartitionDestination == null : "Current: " + lostPartitionDestination + ", New: " + destination;
lostPartitionDestination = destination;
} else if (destination == null && sourceNewReplicaIndex == -1) {
assert source != null : "partitionId=" + partitionId + " source is null";
assert sourceCurrentReplicaIndex != -1
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
assert sourceCurrentReplicaIndex != 0
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
PartitionReplica currentSource = partition.getReplica(sourceCurrentReplicaIndex);
assert source.equals(currentSource)
: "partitionId=" + partitionId + " current source="
+ source + " is different than expected source=" + source;
partition.setReplica(sourceCurrentReplicaIndex, null);
} else {
MigrationInfo migration = new MigrationInfo(partitionId, source, destination,
sourceCurrentReplicaIndex, sourceNewReplicaIndex,
destinationCurrentReplicaIndex, destinationNewReplicaIndex);
migrations.add(migration);
}
}
}
}
class MigrationPlanTask implements MigrationRunnable {
/**
* List of migration queues per-partition
*/
private final List> partitionMigrationQueues;
/**
* Queue for completed migrations.
* It will be processed concurrently while migrations are running.
*/
private final BlockingQueue completed;
/**
* Set of currently migrating partition IDs.
* It's illegal to have concurrent migrations on the same partition.
*/
private final IntHashSet migratingPartitions;
/**
* Map of endpoint -> migration-count.
* Only {@link #maxParallelMigrations} number of migrations are allowed on a single member.
*/
private final Map endpoint2MigrationCount = new HashMap<>();
private int ongoingMigrationCount;
private boolean failed;
private volatile boolean aborted;
MigrationPlanTask(List> partitionMigrationQueues) {
this.partitionMigrationQueues = partitionMigrationQueues;
this.completed = new ArrayBlockingQueue<>(partitionMigrationQueues.size());
this.migratingPartitions
= new IntHashSet(partitionMigrationQueues
.stream().mapToInt(Collection::size).sum(), -1);
}
@Override
public void run() {
migrationCount.set(partitionMigrationQueues
.stream().mapToInt(Collection::size).sum());
while (true) {
MigrationInfo migration = next();
if (migration == null) {
break;
}
if (failed | aborted) {
break;
}
onStart(migration);
try {
CompletionStage f = new AsyncMigrationTask(migration).run();
f.thenRunAsync(() -> {
logger.fine("AsyncMigrationTask completed: " + migration);
boolean offered = completed.offer(migration);
assert offered : "Failed to offer completed migration: " + migration;
}, CALLER_RUNS);
} catch (Throwable e) {
logger.warning("AsyncMigrationTask failed: " + migration, e);
boolean offered = completed.offer(migration);
assert offered : "Failed to offer completed migration: " + migration;
}
if (!migrationDelay()) {
break;
}
}
waitOngoingMigrations();
if (failed || aborted) {
logger.info("Rebalance process was " + (failed ? "failed" : "aborted")
+ ". Ignoring remaining migrations. Will recalculate the new migration plan. ("
+ stats.formatToString(logger.isFineEnabled()) + ")");
migrationCount.set(0);
partitionMigrationQueues.clear();
} else {
logger.info("All migration tasks have been completed. (" + stats.formatToString(logger.isFineEnabled()) + ")");
}
}
private void onStart(MigrationInfo migration) {
boolean added = migratingPartitions.add(migration.getPartitionId());
assert added : "Couldn't add partitionId to migrating partitions set: " + migration;
BiFunction inc = (address, current) -> current != null ? current + 1 : 1;
int count = endpoint2MigrationCount.compute(migration.getDestinationAddress(), inc);
assert count > 0 && count <= maxParallelMigrations : "Count: " + count + " -> " + migration;
count = endpoint2MigrationCount.compute(sourceAddress(migration), inc);
assert count > 0 && count <= maxParallelMigrations : "Count: " + count + " -> " + migration;
ongoingMigrationCount++;
migrationCount.decrementAndGet();
}
private void onComplete(MigrationInfo migration) {
boolean removed = migratingPartitions.remove(migration.getPartitionId());
assert removed : "Couldn't remove partitionId from migrating partitions set: " + migration;
BiFunction dec = (address, current) -> current != null ? current - 1 : -1;
long count = endpoint2MigrationCount.compute(migration.getDestinationAddress(), dec);
assert count >= 0 && count < maxParallelMigrations : "Count: " + count + " -> " + migration;
count = endpoint2MigrationCount.compute(sourceAddress(migration), dec);
assert count >= 0 && count < maxParallelMigrations : "Count: " + count + " -> " + migration;
if (migration.getStatus() != MigrationStatus.SUCCESS) {
failed = true;
}
ongoingMigrationCount--;
}
private boolean processCompleted() {
boolean ok = false;
MigrationInfo migration;
while ((migration = completed.poll()) != null) {
onComplete(migration);
ok = true;
}
return ok;
}
private MigrationInfo next() {
MigrationInfo m;
while ((m = next0()) == null) {
if (partitionMigrationQueues.isEmpty()) {
break;
}
if (!processCompleted()) {
try {
MigrationInfo migration = completed.take();
onComplete(migration);
} catch (InterruptedException e) {
onInterrupted(e);
break;
}
}
if (failed | aborted) {
break;
}
}
return m;
}
private MigrationInfo next0() {
Iterator> iter = partitionMigrationQueues.iterator();
while (iter.hasNext()) {
Queue q = iter.next();
if (q.isEmpty()) {
iter.remove();
continue;
}
if (!select(q.peek())) {
continue;
}
return q.poll();
}
return null;
}
private boolean select(MigrationInfo m) {
if (m == null) {
return true;
}
if (migratingPartitions.contains(m.getPartitionId())) {
return false;
}
if (endpoint2MigrationCount.getOrDefault(m.getDestinationAddress(), 0) == maxParallelMigrations) {
return false;
}
return endpoint2MigrationCount.getOrDefault(sourceAddress(m), 0) < maxParallelMigrations;
}
private Address sourceAddress(MigrationInfo m) {
if (m.getSourceCurrentReplicaIndex() == 0) {
return m.getSourceAddress();
}
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(m.getPartitionId());
return partition.getOwnerOrNull();
}
private boolean migrationDelay() {
if (partitionMigrationInterval > 0) {
try {
Thread.sleep(partitionMigrationInterval);
} catch (InterruptedException e) {
onInterrupted(e);
return false;
}
}
return true;
}
private void waitOngoingMigrations() {
boolean interrupted = false;
while (ongoingMigrationCount > 0) {
try {
MigrationInfo migration = completed.take();
onComplete(migration);
} catch (InterruptedException ignored) {
interrupted = true;
}
}
if (interrupted) {
Thread.currentThread().interrupt();
}
}
private void onInterrupted(InterruptedException e) {
logger.info("MigrationProcessTask is interrupted! Ignoring remaining migrations...", e);
Thread.currentThread().interrupt();
abort();
}
void abort() {
aborted = true;
}
}
/**
* Invoked on the master node to migrate a partition (excluding promotions).
* It will execute the {@link MigrationRequestOperation} on the partition owner.
*/
private class AsyncMigrationTask {
private final MigrationInfo migration;
AsyncMigrationTask(MigrationInfo migration) {
this.migration = migration;
migration.setMaster(node.getThisAddress());
}
CompletionStage run() {
if (!partitionService.isLocalMemberMaster()) {
return CompletableFuture.completedFuture(Boolean.FALSE);
}
if (migration.getSource() == null
&& migration.getDestinationCurrentReplicaIndex() > 0
&& migration.getDestinationNewReplicaIndex() == 0) {
throw new IllegalStateException("Promotion migrations must be handled by "
+ RepairPartitionTableTask.class.getSimpleName() + " -> " + migration);
}
Member partitionOwner = checkMigrationParticipantsAndGetPartitionOwner();
if (partitionOwner == null) {
return CompletableFuture.completedFuture(Boolean.FALSE);
}
return executeMigrateOperation(partitionOwner);
}
private void beforeMigration() {
migration.setInitialPartitionVersion(partitionStateManager.getPartitionVersion(migration.getPartitionId()));
migrationInterceptor.onMigrationStart(MigrationParticipant.MASTER, migration);
if (logger.isFineEnabled()) {
logger.fine("Starting Migration: " + migration);
}
}
/**
* Checks if the partition owner is not {@code null}, the source and destinations are still members and returns the owner.
* Returns {@code null} and reschedules the {@link ControlTask} if the checks failed.
*/
private Member checkMigrationParticipantsAndGetPartitionOwner() {
Member partitionOwner = getPartitionOwner();
if (partitionOwner == null) {
logger.fine("Partition owner is null. Ignoring " + migration);
triggerRepartitioningAfterMigrationFailure();
return null;
}
if (migration.getSource() != null) {
PartitionReplica source = migration.getSource();
if (node.getClusterService().getMember(source.address(), source.uuid()) == null) {
logger.fine("Source is not a member anymore. Ignoring " + migration);
triggerRepartitioningAfterMigrationFailure();
return null;
}
}
PartitionReplica destination = migration.getDestination();
if (node.getClusterService().getMember(destination.address(), destination.uuid()) == null) {
logger.fine("Destination is not a member anymore. Ignoring " + migration);
triggerRepartitioningAfterMigrationFailure();
return null;
}
return partitionOwner;
}
/**
* Returns the partition owner or {@code null} if it is not set.
*/
private Member getPartitionOwner() {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migration.getPartitionId());
PartitionReplica owner = partition.getOwnerReplicaOrNull();
if (owner == null) {
logger.warning("Skipping migration, since partition owner doesn't exist! -> " + migration + ", " + partition);
return null;
}
return node.getClusterService().getMember(owner.address(), owner.uuid());
}
/**
* Sends a {@link MigrationRequestOperation} to the {@code fromMember} and returns the migration result if the
* migration was successful.
*/
@SuppressWarnings({"checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity"})
private CompletionStage executeMigrateOperation(Member fromMember) {
long start = Timer.nanos();
CompletableFuture future;
try {
beforeMigration();
List completedMigrations = getCompletedMigrations(migration.getPartitionId());
Operation op = new MigrationRequestOperation(migration, completedMigrations, 0,
fragmentedMigrationEnabled, isChunkedMigrationEnabled(), maxTotalChunkedDataInBytes);
future = nodeEngine.getOperationService()
.createInvocationBuilder(SERVICE_NAME, op, fromMember.getAddress())
.setCallTimeout(partitionMigrationTimeout)
.invoke();
} catch (Throwable t) {
logger.warning("Error during " + migration, t);
future = InternalCompletableFuture.completedExceptionally(t);
}
return future.handleAsync((done, t) -> {
stats.recordMigrationOperationTime();
logger.fine("Migration operation response received -> " + migration + ", success: " + done + ", failure: " + t);
if (t != null) {
Level level = nodeEngine.isRunning() ? Level.WARNING : Level.FINE;
if (t instanceof ExecutionException && t.getCause() instanceof PartitionStateVersionMismatchException) {
level = Level.FINE;
}
if (logger.isLoggable(level)) {
logger.log(level, "Failed migration from " + fromMember + " for " + migration, t);
}
return Boolean.FALSE;
}
return done;
}, asyncExecutor).thenComposeAsync(result -> {
if (result) {
if (logger.isFineEnabled()) {
logger.fine("Finished Migration: " + migration);
}
return migrationOperationSucceeded();
} else {
Level level = nodeEngine.isRunning() ? Level.WARNING : Level.FINE;
if (logger.isLoggable(level)) {
logger.log(level, "Migration failed: " + migration);
}
migrationOperationFailed(fromMember);
return CompletableFuture.completedFuture(false);
}
}, asyncExecutor).handleAsync((result, t) -> {
stats.recordMigrationTaskTime();
partitionService.getPartitionEventManager().sendMigrationEvent(stats.toMigrationState(), migration,
TimeUnit.NANOSECONDS.toMillis(Timer.nanosElapsed(start)));
if (t != null) {
Level level = nodeEngine.isRunning() ? Level.WARNING : Level.FINE;
logger.log(level, "Error during " + migration, t);
return false;
}
return result;
}, asyncExecutor);
}
/**
* Called on the master node to complete the migration and notify the migration listeners that the migration completed.
* It will :
*
* - set the migration status
* - update the completed migration list
* - schedule the migration for finalization
* - update the local partition state version
* - sync the partition state with cluster members
* - triggers the {@link ControlTask}
* - publishes a {@link ReplicaMigrationEvent}
*
*
* Acquires the partition state lock.
*/
private void migrationOperationFailed(Member partitionOwner) {
migration.setStatus(MigrationStatus.FAILED);
migrationInterceptor.onMigrationComplete(MigrationParticipant.MASTER, migration, false);
partitionServiceLock.lock();
try {
migrationInterceptor.onMigrationRollback(MigrationParticipant.MASTER, migration);
scheduleActiveMigrationFinalization(migration);
int delta = migration.getPartitionVersionIncrement() + 1;
partitionStateManager.incrementPartitionVersion(migration.getPartitionId(), delta);
migration.setPartitionVersionIncrement(delta);
node.getNodeExtension().onPartitionStateChange();
addCompletedMigration(migration);
if (!partitionOwner.localMember()) {
partitionService.sendPartitionRuntimeState(partitionOwner.getAddress());
}
if (!migration.getDestination().isIdentical(node.getLocalMember())) {
partitionService.sendPartitionRuntimeState(migration.getDestination().address());
}
triggerRepartitioningAfterMigrationFailure();
} finally {
partitionServiceLock.unlock();
}
}
/**
* Waits for some time and rerun the {@link ControlTask}.
*/
private void triggerRepartitioningAfterMigrationFailure() {
// Migration failed.
// Pause migration process for a small amount of time, if a migration attempt is failed.
// Otherwise, migration failures can do a busy spin until migration problem is resolved.
// Migration can fail either a node's just joined and not completed start yet or it's just left the cluster.
// Re-execute RedoPartitioningTask when all other migration tasks are done,
// an imbalance may occur because of this failure.
partitionServiceLock.lock();
try {
pauseMigration();
triggerControlTask();
resumeMigrationEventually();
} finally {
partitionServiceLock.unlock();
}
}
/**
* Called on the master node to complete the migration and notify the migration listeners that the migration completed.
* It will :
*
* - commit the migration on the destination
* - set the migration status
* - update the local partition state
* - schedule the migration for finalization
* - sync the partition state with cluster members
* - update the completed migration list
* - publishes a {@link ReplicaMigrationEvent}
*
*
* Triggers the {@link ControlTask} if the migration failed. Acquires the partition state lock to process the result
* of the migration commit.
*/
private CompletionStage migrationOperationSucceeded() {
migrationInterceptor.onMigrationComplete(MigrationParticipant.MASTER, migration, true);
CompletionStage f = commitMigrationToDestinationAsync(migration);
f = f.thenApplyAsync(commitSuccessful -> {
stats.recordDestinationCommitTime();
partitionServiceLock.lock();
try {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migration.getPartitionId());
assert migration.getInitialPartitionVersion() == partition.version()
: "Migration initial version: " + migration.getInitialPartitionVersion()
+ ", Partition version: " + partition.version();
if (commitSuccessful) {
migration.setStatus(MigrationStatus.SUCCESS);
migrationInterceptor.onMigrationCommit(MigrationParticipant.MASTER, migration);
// updates partition table after successful commit
applyMigration(partition, migration);
} else {
migration.setStatus(MigrationStatus.FAILED);
migrationInterceptor.onMigrationRollback(MigrationParticipant.MASTER, migration);
// Increasing partition version +1 more.
// This is to avoid conflict on commit failure; which commit runs on destination
// but invocation fails and migration is not applied to the partition table.
// Normally this is not expected since a commit invocation retries when operation
// timeouts. Still this is safer...
int delta = migration.getPartitionVersionIncrement() + 1;
migration.setPartitionVersionIncrement(delta);
partitionStateManager.incrementPartitionVersion(partition.getPartitionId(), delta);
if (!migration.getDestination().isIdentical(node.getLocalMember())) {
partitionService.sendPartitionRuntimeState(migration.getDestination().address());
}
triggerRepartitioningAfterMigrationFailure();
}
assert migration.getFinalPartitionVersion() == partition.version()
: "Migration final version: " + migration.getFinalPartitionVersion()
+ ", Partition version: " + partition.version();
addCompletedMigration(migration);
scheduleActiveMigrationFinalization(migration);
node.getNodeExtension().onPartitionStateChange();
if (completedMigrations.size() >= PUBLISH_COMPLETED_MIGRATIONS_BATCH_SIZE) {
publishCompletedMigrations();
}
} finally {
partitionServiceLock.unlock();
}
return commitSuccessful;
}, asyncExecutor);
return f;
}
}
/**
* Checks if the partition table needs repairing once the partitions have been initialized (assigned).
* This means that it will:
* Remove unknown addresses from the partition table
* Promote the partition replicas if necessary (the partition owner is missing)
*
* If the promotions are successful, schedules the {@link RedoPartitioningTask}. If the process was not successful
* it will trigger a {@link ControlTask} to restart the partition table repair process.
*
* Invoked on the master node. Acquires partition service lock when scheduling the tasks on the migration queue.
*/
private class RepairPartitionTableTask implements MigrationRunnable {
@Override
public void run() {
if (!partitionStateManager.isInitialized()) {
return;
}
ClusterState clusterState = node.getClusterService().getClusterState();
if (!clusterState.isMigrationAllowed() && !clusterState.isPartitionPromotionAllowed()) {
// If migrations and promotions are not allowed, partition table cannot be modified and we should have
// the most recent partition table already. Because cluster state cannot be changed
// when our partition table is stale.
logger.fine("Will not repair partition table at the moment. "
+ "Cluster state does not allow to modify partition table.");
return;
}
// Schedule a control task after configured delay.
// Since backups are not promoted, if a member crashed then
// its previously owned partitions are not available.
// Recovery however will be faster as no partition migrations will occur
// when crashed member rejoins.
if (delayNextRepartitioningExecution) {
logger.fine("Delaying next repartitioning execution");
delayNextRepartitioningExecution = false;
ExecutionService executionService = nodeEngine.getExecutionService();
scheduledControlTaskFuture = (ScheduledFuture) executionService.schedule(() -> triggerControlTask(),
autoRebalanceDelaySeconds, TimeUnit.SECONDS);
return;
}
Map> promotions = removeUnknownMembersAndCollectPromotions();
boolean success = promoteBackupsForMissingOwners(promotions);
partitionServiceLock.lock();
try {
if (success) {
if (logger.isFinestEnabled()) {
logger.finest("RedoPartitioningTask scheduled");
}
migrationQueue.add(new RedoPartitioningTask());
} else {
triggerControlTask();
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Removes members from the partition table which are not registered as cluster data members and checks
* if any partitions need promotion (partition owners are missing).
* Invoked on the master node. Acquires partition service lock.
*
* @return promotions that need to be sent, grouped by target replica
*/
private Map> removeUnknownMembersAndCollectPromotions() {
partitionServiceLock.lock();
try {
partitionStateManager.removeUnknownAndLiteMembers();
Map> promotions = new HashMap<>();
for (int partitionId = 0; partitionId < partitionService.getPartitionCount(); partitionId++) {
MigrationInfo migration = createPromotionMigrationIfOwnerIsNull(partitionId);
if (migration == null) {
continue;
}
Collection migrations =
promotions.computeIfAbsent(migration.getDestination(), k -> new ArrayList<>());
migrations.add(migration);
}
return promotions;
} finally {
partitionServiceLock.unlock();
}
}
/**
* Sends promotions to the destinations and commits if the destinations successfully process these promotions.
* Called on the master node.
*
* @param promotions the promotions that need to be sent, grouped by target replica
* @return if all promotions were successful
*/
private boolean promoteBackupsForMissingOwners(Map> promotions) {
boolean allSucceeded = true;
for (Map.Entry> entry : promotions.entrySet()) {
PartitionReplica destination = entry.getKey();
Collection migrations = entry.getValue();
allSucceeded &= commitPromotionMigrations(destination, migrations);
}
return allSucceeded;
}
/**
* Sends promotions to the destination and commits the {@code migrations} if successful. Called on the master node.
*
* @param destination the promotion destination
* @param migrations the promotion migrations
* @return if the promotions were successful
*/
private boolean commitPromotionMigrations(PartitionReplica destination, Collection migrations) {
migrationInterceptor.onPromotionStart(MigrationParticipant.MASTER, migrations);
boolean success = commitPromotionsToDestination(destination, migrations);
boolean local = destination.isIdentical(node.getLocalMember());
if (!local) {
processPromotionCommitResult(destination, migrations, success);
}
migrationInterceptor.onPromotionComplete(MigrationParticipant.MASTER, migrations, success);
partitionService.publishPartitionRuntimeState();
return success;
}
/**
* Applies the {@code migrations} to the local partition table if {@code success} is {@code true}.
* In any case it will increase the partition state version.
* Called on the master node. This method will acquire the partition service lock.
*
* @param destination the promotion destination
* @param migrations the promotions for the destination
* @param success if the {@link PromotionCommitOperation} were successfully processed by the {@code destination}
*/
private void processPromotionCommitResult(PartitionReplica destination, Collection migrations,
boolean success) {
partitionServiceLock.lock();
try {
if (!partitionStateManager.isInitialized()) {
// node reset/terminated while running task
return;
}
if (success) {
for (MigrationInfo migration : migrations) {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migration.getPartitionId());
assert partition.getOwnerReplicaOrNull() == null : "Owner should be null: " + partition;
assert destination.equals(partition.getReplica(migration.getDestinationCurrentReplicaIndex()))
: "Invalid replica! Destination: " + destination + ", index: "
+ migration.getDestinationCurrentReplicaIndex() + ", " + partition;
// single partition replica swap, increments partition state version by 2
partition.swapReplicas(0, migration.getDestinationCurrentReplicaIndex());
}
} else {
// Increasing partition version +1 more.
// This is to avoid conflict on commit failure; which commit runs on destination
// but invocation fails and migration is not applied to the partition table.
// Normally this is not expected since a commit invocation retries when operation
// timeouts. Still this is safer...
PartitionStateManager partitionStateManager = partitionService.getPartitionStateManager();
for (MigrationInfo migration : migrations) {
int delta = migration.getPartitionVersionIncrement() + 1;
partitionStateManager.incrementPartitionVersion(migration.getPartitionId(), delta);
}
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Constructs a promotion migration if the partition owner is {@code null} and there exists a non-{@code null} replica.
* If there are no other replicas, it will send a {@link IPartitionLostEvent}.
*
* @param partitionId the partition ID to check
* @return the migration info or {@code null} if the partition owner is assigned
*/
private MigrationInfo createPromotionMigrationIfOwnerIsNull(int partitionId) {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(partitionId);
if (partition.getOwnerReplicaOrNull() == null) {
PartitionReplica destination = null;
int index = 1;
for (int i = index; i < InternalPartition.MAX_REPLICA_COUNT; i++) {
destination = partition.getReplica(i);
if (destination != null) {
index = i;
break;
}
}
if (logger.isFinestEnabled()) {
if (destination != null) {
logger.finest("partitionId=" + partition.getPartitionId() + " owner is removed. replicaIndex=" + index
+ " will be shifted up to 0. " + partition);
} else {
logger.finest("partitionId=" + partition.getPartitionId()
+ " owner is removed. there is no other replica to shift up. " + partition);
}
}
if (destination != null) {
MigrationInfo migration = new MigrationInfo(partitionId, null, destination, -1, -1, index, 0);
migration.setMaster(node.getThisAddress());
migration.setStatus(MigrationInfo.MigrationStatus.SUCCESS);
migration.setInitialPartitionVersion(partition.version());
return migration;
}
}
if (partition.getOwnerReplicaOrNull() == null) {
logger.warning("partitionId=" + partitionId + " is completely lost!");
PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager();
partitionEventManager.sendPartitionLostEvent(partitionId, InternalPartition.MAX_BACKUP_COUNT);
}
return null;
}
/**
* Creates a new partition table by applying the {@code migrations} and send them via {@link PromotionCommitOperation}
* to the destination.
*
* @return true if the promotions were applied on the destination
*/
private boolean commitPromotionsToDestination(PartitionReplica destination, Collection migrations) {
assert !migrations.isEmpty() : "No promotions to commit! destination=" + destination;
Member member = node.getClusterService().getMember(destination.address(), destination.uuid());
if (member == null) {
logger.warning("Cannot commit promotions. Destination " + destination + " is not a member anymore");
return false;
}
try {
if (logger.isFinestEnabled()) {
logger.finest("Sending promotion commit operation to " + destination + " for " + migrations);
}
PartitionRuntimeState partitionState = partitionService.createPromotionCommitPartitionState(migrations);
UUID destinationUuid = member.getUuid();
PromotionCommitOperation op = new PromotionCommitOperation(partitionState, migrations, destinationUuid);
Future future = nodeEngine.getOperationService()
.createInvocationBuilder(SERVICE_NAME, op, destination.address())
.setTryCount(Integer.MAX_VALUE)
.setCallTimeout(memberHeartbeatTimeoutMillis).invoke();
boolean result = future.get();
if (logger.isFinestEnabled()) {
logger.finest("Promotion commit result " + result + " from " + destination
+ " for migrations " + migrations);
}
return result;
} catch (Throwable t) {
logPromotionCommitFailure(destination, migrations, t);
if (t.getCause() instanceof OperationTimeoutException) {
return commitPromotionsToDestination(destination, migrations);
}
}
return false;
}
private void logPromotionCommitFailure(PartitionReplica destination, Collection migrations, Throwable t) {
boolean memberLeft = t instanceof MemberLeftException
|| t.getCause() instanceof TargetNotMemberException
|| t.getCause() instanceof HazelcastInstanceNotActiveException;
int migrationsSize = migrations.size();
if (memberLeft) {
if (destination.isIdentical(node.getLocalMember())) {
logger.fine("Promotion commit failed for " + migrationsSize + " migrations"
+ " since this node is shutting down.");
return;
}
if (logger.isFinestEnabled()) {
logger.warning("Promotion commit failed for " + migrations
+ " since destination " + destination + " left the cluster");
} else {
logger.warning("Promotion commit failed for "
+ (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations")
+ " since destination " + destination + " left the cluster");
}
return;
}
if (logger.isFinestEnabled()) {
logger.severe("Promotion commit to " + destination + " failed for " + migrations, t);
} else {
logger.severe("Promotion commit to " + destination + " failed for "
+ (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations"), t);
}
}
}
/**
* Task scheduled on the master node to fetch and repair the latest partition table.
* It will first check if we need to fetch the new partition table and schedule a task to do so, along with a new
* {@link ControlTask} to be executed afterwards. If we don't need to fetch the partition table it will send a
* {@link RepairPartitionTableTask} to repair the existing partition table.
* Invoked on the master node. It will acquire the partition service lock.
*
* @see InternalPartitionServiceImpl#isFetchMostRecentPartitionTableTaskRequired()
*/
private class ControlTask implements MigrationRunnable {
@Override
public void run() {
partitionServiceLock.lock();
try {
migrationQueue.clear();
if (partitionService.scheduleFetchMostRecentPartitionTableTaskIfRequired()) {
if (logger.isFinestEnabled()) {
logger.finest("FetchMostRecentPartitionTableTask scheduled");
}
migrationQueue.add(new ControlTask());
return;
}
if (logger.isFinestEnabled()) {
logger.finest("RepairPartitionTableTask scheduled");
}
migrationQueue.add(new RepairPartitionTableTask());
} finally {
partitionServiceLock.unlock();
}
}
}
/**
* Processes demote requests, either for this node or for other members of the cluster. Checks if any member is
* still in the partition table and triggers the control task.
* Invoked on the master node. Acquires partition service lock.
*/
private class ProcessDemoteRequestsTask implements MigrationRunnable {
@Override
public void run() {
if (!partitionService.isLocalMemberMaster()) {
return;
}
partitionServiceLock.lock();
try {
final int demoteRequestCount = demoteRequestedMembers.size();
if (demoteRequestCount > 0) {
boolean present = false;
List demotedMembers = new ArrayList<>(demoteRequestCount);
for (Member member : demoteRequestedMembers) {
if (partitionStateManager.isAbsentInPartitionTable(member)) {
demotedMembers.add(member);
} else {
logger.warning(member + " requested to demote but still in partition table");
present = true;
}
}
if (!demotedMembers.isEmpty()) {
migrationQueue.add(new SendDemoteResponses(demotedMembers));
}
if (present) {
triggerControlTask();
}
}
} finally {
partitionServiceLock.unlock();
}
}
}
private class SendDemoteResponses implements MigrationRunnable {
private final List members;
SendDemoteResponses(List members) {
this.members = members;
}
@Override
public void run() {
// make sure that the partition table is in sync on all members
List> futures = partitionService.checkClusterPartitionRuntimeStates();
FutureUtil.waitWithDeadline(futures, CHECK_CLUSTER_PARTITION_RUNTIME_STATES_SYNC_TIMEOUT_SECONDS,
TimeUnit.SECONDS, FutureUtil.RETHROW_ALL_EXCEPT_MEMBER_LEFT);
for (Member member : members) {
sendDemoteResponseOperation(member);
}
}
}
/**
* Processes shutdown requests, either for this node or for other members of the cluster. If all members requested
* shutdown it will simply send the shutdown response, otherwise checks if any member is still in the partition table
* and triggers the control task.
* Invoked on the master node. Acquires partition service lock.
*/
private class ProcessShutdownRequestsTask implements MigrationRunnable {
@Override
public void run() {
if (!partitionService.isLocalMemberMaster()) {
return;
}
partitionServiceLock.lock();
try {
final int shutdownRequestCount = shutdownRequestedMembers.size();
final int dataDisownRequestCount = getDataDisownRequestedMembers().size();
if (shutdownRequestCount > 0) {
if (dataDisownRequestCount == nodeEngine.getClusterService().getSize(DATA_MEMBER_SELECTOR)) {
for (Member member : shutdownRequestedMembers) {
sendShutdownResponseOperation(member);
}
} else {
boolean present = false;
for (Member member : shutdownRequestedMembers) {
if (partitionStateManager.isAbsentInPartitionTable(member)) {
sendShutdownResponseOperation(member);
} else {
logger.warning(member + " requested to shutdown but still in partition table");
present = true;
}
}
if (present) {
triggerControlTask();
}
}
}
} finally {
partitionServiceLock.unlock();
}
}
}
/**
* Task to publish completed migrations to cluster members after all migration tasks are consumed.
*/
private class PublishCompletedMigrationsTask implements MigrationRunnable {
@Override
public void run() {
partitionService.getPartitionEventManager().sendMigrationProcessCompletedEvent(stats.toMigrationState());
publishCompletedMigrations();
}
}
/**
* Comparator that compares distance of two {@link PartitionTableView}s against a base
* {@link PartitionTableView} that is provided at construction time.
* Distance of two {@link PartitionTableView}s is the sum of distances of their
* respective {link InternalPartition} distances. The distance between two
* {@link InternalPartition}s is calculated as follows:
*
* - If a {@link PartitionReplica} occurs in both {@link InternalPartition}s, then
* their distance is the absolute difference of their respective replica indices.
* - If {@code null} {@link PartitionReplica}s occur at the same replica index, then
* their distance is 0.
* - If a non-{@code null} {@link PartitionReplica} is present in one {@link InternalPartition}
* and not the other, then its distance is {@link InternalPartition#MAX_REPLICA_COUNT}.
*
*/
static class PartitionTableViewDistanceComparator implements Comparator {
final PartitionTableView basePartitionTableView;
PartitionTableViewDistanceComparator(PartitionTableView basePartitionTableView) {
this.basePartitionTableView = basePartitionTableView;
}
@Override
public int compare(PartitionTableView o1, PartitionTableView o2) {
return distanceFromBase(o1) - distanceFromBase(o2);
}
int distanceFromBase(PartitionTableView partitionTableView) {
return partitionTableView.distanceOf(basePartitionTableView);
}
}
}