All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.internal.partition.impl.InternalPartitionServiceImpl Maven / Gradle / Ivy

/*
 * Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.internal.partition.impl;

import com.hazelcast.cluster.ClusterState;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.core.Member;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.core.MigrationListener;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.cluster.impl.ClusterServiceImpl;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.InternalPartition;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.partition.MigrationInfo;
import com.hazelcast.internal.partition.MigrationInfo.MigrationStatus;
import com.hazelcast.internal.partition.PartitionListener;
import com.hazelcast.internal.partition.PartitionRuntimeState;
import com.hazelcast.internal.partition.PartitionServiceProxy;
import com.hazelcast.internal.partition.operation.AssignPartitions;
import com.hazelcast.internal.partition.operation.FetchPartitionStateOperation;
import com.hazelcast.internal.partition.operation.PartitionStateOperation;
import com.hazelcast.internal.partition.operation.ShutdownRequestOperation;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.nio.serialization.Data;
import com.hazelcast.partition.NoDataMemberInClusterException;
import com.hazelcast.partition.PartitionEvent;
import com.hazelcast.partition.PartitionEventListener;
import com.hazelcast.partition.PartitionLostListener;
import com.hazelcast.spi.EventPublishingService;
import com.hazelcast.spi.ExecutionService;
import com.hazelcast.spi.ManagedService;
import com.hazelcast.spi.NodeEngine;
import com.hazelcast.spi.OperationService;
import com.hazelcast.spi.PartitionAwareService;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.operationservice.InternalOperationService;
import com.hazelcast.spi.partition.IPartition;
import com.hazelcast.spi.partition.IPartitionLostEvent;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.util.EmptyStatement;
import com.hazelcast.util.ExceptionUtil;
import com.hazelcast.util.FutureUtil.ExceptionHandler;
import com.hazelcast.util.HashUtil;
import com.hazelcast.util.scheduler.ScheduledEntry;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;

import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.util.FutureUtil.logAllExceptions;
import static com.hazelcast.util.FutureUtil.returnWithDeadline;
import static java.lang.Math.ceil;
import static java.lang.Math.max;
import static java.lang.Math.min;

/**
 * The {@link InternalPartitionService} implementation.
 */
@SuppressWarnings({"checkstyle:methodcount", "checkstyle:classfanoutcomplexity", "checkstyle:classdataabstractioncoupling"})
public class InternalPartitionServiceImpl implements InternalPartitionService, ManagedService,
        EventPublishingService>, PartitionAwareService {

    private static final int PARTITION_OWNERSHIP_WAIT_MILLIS = 10;
    private static final String EXCEPTION_MSG_PARTITION_STATE_SYNC_TIMEOUT = "Partition state sync invocation timed out";
    private static final int PTABLE_SYNC_TIMEOUT_SECONDS = 10;
    private static final int SAFE_SHUTDOWN_MAX_AWAIT_STEP_MILLIS = 1000;

    private final Node node;
    private final NodeEngineImpl nodeEngine;
    private final ILogger logger;

    private final int partitionCount;

    private final long partitionMigrationTimeout;

    private final PartitionServiceProxy proxy;
    private final Lock lock = new ReentrantLock();
    private final InternalPartitionListener partitionListener;

    private final PartitionStateManager partitionStateManager;
    private final MigrationManager migrationManager;
    private final PartitionReplicaManager replicaManager;
    private final PartitionReplicaStateChecker partitionReplicaStateChecker;
    private final PartitionEventManager partitionEventManager;

    private final ExceptionHandler partitionStateSyncTimeoutHandler;

    // used to limit partition assignment requests sent to master
    private final AtomicBoolean triggerMasterFlag = new AtomicBoolean(false);

    private final AtomicReference shutdownLatchRef = new AtomicReference();

    private volatile Address lastMaster;

    private volatile boolean shouldFetchPartitionTables;

    public InternalPartitionServiceImpl(Node node) {
        HazelcastProperties properties = node.getProperties();
        this.partitionCount = properties.getInteger(GroupProperty.PARTITION_COUNT);
        this.node = node;
        this.nodeEngine = node.nodeEngine;
        this.logger = node.getLogger(InternalPartitionService.class);

        partitionListener = new InternalPartitionListener(node, this);

        partitionStateManager = new PartitionStateManager(node, this, partitionListener);
        migrationManager = new MigrationManager(node, this, lock);
        replicaManager = new PartitionReplicaManager(node, this);

        partitionReplicaStateChecker = new PartitionReplicaStateChecker(node, this);
        partitionEventManager = new PartitionEventManager(node);

        partitionStateSyncTimeoutHandler =
                logAllExceptions(logger, EXCEPTION_MSG_PARTITION_STATE_SYNC_TIMEOUT, Level.FINEST);

        partitionMigrationTimeout = properties.getMillis(GroupProperty.PARTITION_MIGRATION_TIMEOUT);

        proxy = new PartitionServiceProxy(nodeEngine, this);

        MetricsRegistry metricsRegistry = nodeEngine.getMetricsRegistry();
        metricsRegistry.scanAndRegister(this, "partitions");
        metricsRegistry.scanAndRegister(partitionStateManager, "partitions");
        metricsRegistry.scanAndRegister(migrationManager, "partitions");
        metricsRegistry.scanAndRegister(replicaManager, "partitions");
    }

    @Override
    public void init(NodeEngine nodeEngine, Properties properties) {
        int partitionTableSendInterval = node.getProperties().getSeconds(GroupProperty.PARTITION_TABLE_SEND_INTERVAL);
        if (partitionTableSendInterval <= 0) {
            partitionTableSendInterval = 1;
        }
        ExecutionService executionService = nodeEngine.getExecutionService();
        executionService.scheduleWithRepetition(new PublishPartitionRuntimeStateTask(node, this),
                partitionTableSendInterval, partitionTableSendInterval, TimeUnit.SECONDS);

        migrationManager.start();
        replicaManager.scheduleReplicaVersionSync(executionService);
    }

    @Override
    public Address getPartitionOwner(int partitionId) {
        if (!partitionStateManager.isInitialized()) {
            firstArrangement();
        }
        final InternalPartition partition = partitionStateManager.getPartitionImpl(partitionId);
        if (partition.getOwnerOrNull() == null && !node.isMaster()) {
            if (!isClusterFormedByOnlyLiteMembers()) {
                triggerMasterToAssignPartitions();
            }
        }
        return partition.getOwnerOrNull();
    }

    @Override
    public Address getPartitionOwnerOrWait(int partitionId) {
        Address owner;
        while ((owner = getPartitionOwner(partitionId)) == null) {
            if (!nodeEngine.isRunning()) {
                throw new HazelcastInstanceNotActiveException();
            }

            ClusterState clusterState = node.getClusterService().getClusterState();
            if (clusterState != ClusterState.ACTIVE) {
                throw new IllegalStateException("Partitions can't be assigned since cluster-state: " + clusterState);
            }
            if (isClusterFormedByOnlyLiteMembers()) {
                throw new NoDataMemberInClusterException(
                        "Partitions can't be assigned since all nodes in the cluster are lite members");
            }

            try {
                Thread.sleep(PARTITION_OWNERSHIP_WAIT_MILLIS);
            } catch (InterruptedException e) {
                throw ExceptionUtil.rethrow(e);
            }
        }
        return owner;
    }

    @Override
    public void firstArrangement() {
        if (partitionStateManager.isInitialized()) {
            return;
        }

        if (!node.isMaster()) {
            triggerMasterToAssignPartitions();
            return;
        }

        lock.lock();
        try {
            if (partitionStateManager.isInitialized()) {
                return;
            }
            Set
excludedAddresses = migrationManager.getShutdownRequestedAddresses(); if (!partitionStateManager.initializePartitionAssignments(excludedAddresses)) { return; } publishPartitionRuntimeState(); } finally { lock.unlock(); } } private void triggerMasterToAssignPartitions() { if (partitionStateManager.isInitialized()) { return; } if (!node.joined()) { return; } ClusterState clusterState = node.getClusterService().getClusterState(); if (clusterState != ClusterState.ACTIVE) { logger.warning("Partitions can't be assigned since cluster-state= " + clusterState); return; } if (!triggerMasterFlag.compareAndSet(false, true)) { return; } try { final Address masterAddress = node.getMasterAddress(); if (masterAddress != null && !masterAddress.equals(node.getThisAddress())) { Future f = nodeEngine.getOperationService().createInvocationBuilder(SERVICE_NAME, new AssignPartitions(), masterAddress).setTryCount(1).invoke(); f.get(1, TimeUnit.SECONDS); } } catch (Exception e) { logger.finest(e); } finally { triggerMasterFlag.set(false); } } private boolean isClusterFormedByOnlyLiteMembers() { final ClusterServiceImpl clusterService = node.getClusterService(); return clusterService.getMembers(DATA_MEMBER_SELECTOR).isEmpty(); } public void setInitialState(Address[][] newState, int partitionStateVersion) { lock.lock(); try { partitionStateManager.setInitialState(newState, partitionStateVersion); } finally { lock.unlock(); } } @Override public int getMemberGroupsSize() { return partitionStateManager.getMemberGroupsSize(); } @Probe(name = "maxBackupCount") @Override public int getMaxAllowedBackupCount() { return max(min(getMemberGroupsSize() - 1, InternalPartition.MAX_BACKUP_COUNT), 0); } @Override public boolean isMemberAllowedToJoin(Address address) { lock.lock(); try { ClusterState clusterState = node.getClusterService().getClusterState(); if (clusterState == ClusterState.FROZEN || clusterState == ClusterState.PASSIVE) { return true; } if (partitionStateManager.isPresentInPartitionTable(address)) { return false; } final MigrationRunnable activeTask = migrationManager.getActiveTask(); if (activeTask instanceof MigrationManager.MigrateTask) { final MigrationManager.MigrateTask migrateTask = (MigrationManager.MigrateTask) activeTask; final MigrationInfo migrationInfo = migrateTask.migrationInfo; if (address.equals(migrationInfo.getSource()) || address.equals(migrationInfo.getDestination())) { return false; } } return true; } finally { lock.unlock(); } } @Override public void memberAdded(MemberImpl member) { logger.fine("Adding " + member); lock.lock(); try { if (!member.localMember()) { partitionStateManager.updateMemberGroupsSize(); } lastMaster = node.getMasterAddress(); if (node.isMaster()) { if (partitionStateManager.isInitialized()) { final ClusterState clusterState = nodeEngine.getClusterService().getClusterState(); if (clusterState == ClusterState.ACTIVE) { migrationManager.triggerControlTask(); } } } } finally { lock.unlock(); } } @Override public void memberRemoved(final MemberImpl member) { logger.fine("Removing " + member); final Address deadAddress = member.getAddress(); final Address thisAddress = node.getThisAddress(); lock.lock(); try { partitionStateManager.updateMemberGroupsSize(); migrationManager.onMemberRemove(member); boolean isThisNodeNewMaster = node.isMaster() && !thisAddress.equals(lastMaster); if (isThisNodeNewMaster) { assert !shouldFetchPartitionTables : "SOMETHING IS WRONG! Removed member: " + member; shouldFetchPartitionTables = true; } lastMaster = node.getMasterAddress(); migrationManager.pauseMigration(); replicaManager.cancelReplicaSyncRequestsTo(deadAddress); if (node.isMaster()) { migrationManager.triggerControlTask(); } migrationManager.resumeMigration(); } finally { lock.unlock(); } } public void cancelReplicaSyncRequestsTo(Address deadAddress) { lock.lock(); try { replicaManager.cancelReplicaSyncRequestsTo(deadAddress); } finally { lock.unlock(); } } @Override public PartitionRuntimeState createPartitionState() { if (!isFetchMostRecentPartitionTableTaskRequired()) { return createPartitionStateInternal(); } return null; } public PartitionRuntimeState createPartitionStateInternal() { if (!partitionStateManager.isInitialized()) { return null; } lock.lock(); try { if (!partitionStateManager.isInitialized()) { return null; } List completedMigrations = migrationManager.getCompletedMigrationsCopy(); InternalPartition[] partitions = partitionStateManager.getPartitions(); PartitionRuntimeState state = new PartitionRuntimeState(partitions, completedMigrations, getPartitionStateVersion()); state.setActiveMigration(migrationManager.getActiveMigration()); return state; } finally { lock.unlock(); } } /** * Creates a transient PartitionRuntimeState to commit given migration. * Result migration is applied to partition table and migration is added to completed-migrations set. * Version of created partition table is incremented by 1. */ PartitionRuntimeState createMigrationCommitPartitionState(MigrationInfo migrationInfo) { lock.lock(); try { if (!partitionStateManager.isInitialized()) { return null; } List completedMigrations = migrationManager.getCompletedMigrationsCopy(); InternalPartition[] partitions = partitionStateManager.getPartitionsCopy(); int partitionId = migrationInfo.getPartitionId(); InternalPartitionImpl partition = (InternalPartitionImpl) partitions[partitionId]; migrationManager.applyMigration(partition, migrationInfo); migrationInfo.setStatus(MigrationStatus.SUCCESS); completedMigrations.add(migrationInfo); int committedVersion = getPartitionStateVersion() + 1; return new PartitionRuntimeState(partitions, completedMigrations, committedVersion); } finally { lock.unlock(); } } /** * Creates a transient PartitionRuntimeState to commit promotions. * Results of promotions are applied to partition table. * Version of created partition table is incremented by number of promotions. */ PartitionRuntimeState createPromotionCommitPartitionState(Collection migrationInfos) { lock.lock(); try { if (!partitionStateManager.isInitialized()) { return null; } List completedMigrations = migrationManager.getCompletedMigrationsCopy(); InternalPartition[] partitions = partitionStateManager.getPartitionsCopy(); for (MigrationInfo migrationInfo : migrationInfos) { int partitionId = migrationInfo.getPartitionId(); InternalPartitionImpl partition = (InternalPartitionImpl) partitions[partitionId]; migrationManager.applyMigration(partition, migrationInfo); migrationInfo.setStatus(MigrationStatus.SUCCESS); } int committedVersion = getPartitionStateVersion() + migrationInfos.size(); return new PartitionRuntimeState(partitions, completedMigrations, committedVersion); } finally { lock.unlock(); } } @SuppressWarnings("checkstyle:npathcomplexity") void publishPartitionRuntimeState() { if (!partitionStateManager.isInitialized()) { // do not send partition state until initialized! return; } if (!node.isMaster()) { return; } if (!isReplicaSyncAllowed()) { // migration is disabled because of a member leave, wait till enabled! return; } PartitionRuntimeState partitionState = createPartitionStateInternal(); if (partitionState == null) { return; } if (logger.isFineEnabled()) { logger.fine("Publishing partition state, version: " + partitionState.getVersion()); } PartitionStateOperation op = new PartitionStateOperation(partitionState); OperationService operationService = nodeEngine.getOperationService(); Collection members = node.clusterService.getMemberImpls(); for (MemberImpl member : members) { if (!member.localMember()) { try { operationService.send(op, member.getAddress()); } catch (Exception e) { logger.finest(e); } } } } @SuppressWarnings("checkstyle:npathcomplexity") boolean syncPartitionRuntimeState() { if (!partitionStateManager.isInitialized()) { // do not send partition state until initialized! return false; } if (!node.isMaster()) { return false; } PartitionRuntimeState partitionState = createPartitionStateInternal(); if (partitionState == null) { return false; } if (logger.isFineEnabled()) { logger.fine("Sync'ing partition state, version: " + partitionState.getVersion()); } OperationService operationService = nodeEngine.getOperationService(); Collection members = node.clusterService.getMemberImpls(); List> calls = firePartitionStateOperation(members, partitionState, operationService); Collection results = returnWithDeadline(calls, PTABLE_SYNC_TIMEOUT_SECONDS, TimeUnit.SECONDS, partitionStateSyncTimeoutHandler); if (calls.size() != results.size()) { return false; } for (Boolean result : results) { if (!result) { if (logger.isFineEnabled()) { logger.fine("Partition state, version: " + partitionState.getVersion() + " sync failed to one of the members!"); } return false; } } return true; } private List> firePartitionStateOperation(Collection members, PartitionRuntimeState partitionState, OperationService operationService) { final ClusterServiceImpl clusterService = node.clusterService; List> calls = new ArrayList>(members.size()); for (MemberImpl member : members) { if (!(member.localMember() || clusterService.isMemberRemovedWhileClusterIsNotActive(member.getAddress()))) { try { Address address = member.getAddress(); PartitionStateOperation operation = new PartitionStateOperation(partitionState, true); Future f = operationService.invokeOnTarget(SERVICE_NAME, operation, address); calls.add(f); } catch (Exception e) { logger.finest(e); } } } return calls; } public boolean processPartitionRuntimeState(final PartitionRuntimeState partitionState) { final Address sender = partitionState.getEndpoint(); if (!node.getNodeExtension().isStartCompleted()) { logger.warning("Ignoring received partition table, startup is not completed yet. Sender: " + sender); return false; } final Address master = node.getMasterAddress(); if (node.isMaster() && !node.getThisAddress().equals(sender)) { logger.warning("This is the master node and received a PartitionRuntimeState from " + sender + ". Ignoring incoming state! "); return false; } else { if (sender == null || !sender.equals(master)) { if (node.clusterService.getMember(sender) == null) { logger.severe("Received a ClusterRuntimeState from an unknown member!" + " => Sender: " + sender + ", Master: " + master + "! "); return false; } else { logger.warning("Received a ClusterRuntimeState, but its sender doesn't seem to be master!" + " => Sender: " + sender + ", Master: " + master + "! " + "(Ignore if master node has changed recently.)"); return false; } } } return applyNewState(partitionState, sender); } private boolean applyNewState(PartitionRuntimeState partitionState, Address sender) { lock.lock(); try { final int newVersion = partitionState.getVersion(); final int currentVersion = partitionStateManager.getVersion(); if (newVersion < currentVersion) { logger.warning("Master version should be greater than ours! Local version: " + currentVersion + ", Master version: " + newVersion + " Master: " + nodeEngine.getMasterAddress()); return false; } else if (newVersion == currentVersion) { if (logger.isFineEnabled()) { logger.fine("Master version should be greater than ours! Local version: " + currentVersion + ", Master version: " + newVersion + " Master: " + nodeEngine.getMasterAddress()); } return true; } partitionStateManager.setVersion(newVersion); partitionStateManager.setInitialized(); filterAndLogUnknownAddressesInPartitionTable(sender, partitionState.getPartitionTable()); finalizeOrRollbackMigration(partitionState); return true; } finally { lock.unlock(); } } private void finalizeOrRollbackMigration(PartitionRuntimeState partitionState) { final Address[][] partitionTable = partitionState.getPartitionTable(); Collection completedMigrations = partitionState.getCompletedMigrations(); for (MigrationInfo completedMigration : completedMigrations) { assert completedMigration.getStatus() == MigrationStatus.SUCCESS || completedMigration.getStatus() == MigrationStatus.FAILED : "Invalid migration: " + completedMigration; if (migrationManager.addCompletedMigration(completedMigration)) { int partitionId = completedMigration.getPartitionId(); Address[] replicas = partitionTable[partitionId]; // mdogan: // Each partition should be updated right after migration is finalized // at the moment, it doesn't cause any harm to existing services, // because we have a `migrating` flag in partition which is cleared during migration finalization. // But from API point of view, we should provide explicit guarantees. // For the time being, leaving this stuff as is to not to change behaviour. partitionStateManager.updateReplicaAddresses(partitionId, replicas); migrationManager.scheduleActiveMigrationFinalization(completedMigration); } } updateAllPartitions(partitionTable); migrationManager.retainCompletedMigrations(completedMigrations); } private void updateAllPartitions(Address[][] partitionTable) { for (int partitionId = 0; partitionId < partitionCount; partitionId++) { Address[] replicas = partitionTable[partitionId]; partitionStateManager.updateReplicaAddresses(partitionId, replicas); } } private void filterAndLogUnknownAddressesInPartitionTable(Address sender, Address[][] partitionTable) { final Set
unknownAddresses = new HashSet
(); for (int partitionId = 0; partitionId < partitionTable.length; partitionId++) { Address[] replicas = partitionTable[partitionId]; searchUnknownAddressesInPartitionTable(sender, unknownAddresses, partitionId, replicas); } logUnknownAddressesInPartitionTable(sender, unknownAddresses); } private void logUnknownAddressesInPartitionTable(Address sender, Set
unknownAddresses) { if (!unknownAddresses.isEmpty() && logger.isLoggable(Level.WARNING)) { StringBuilder s = new StringBuilder("Following unknown addresses are found in partition table") .append(" sent from master[").append(sender).append("].") .append(" (Probably they have recently joined or left the cluster.)") .append(" {"); for (Address address : unknownAddresses) { s.append("\n\t").append(address); } s.append("\n}"); logger.warning(s.toString()); } } private void searchUnknownAddressesInPartitionTable(Address sender, Set
unknownAddresses, int partitionId, Address[] addresses) { final ClusterServiceImpl clusterService = node.clusterService; final ClusterState clusterState = clusterService.getClusterState(); for (int index = 0; index < InternalPartition.MAX_REPLICA_COUNT; index++) { Address address = addresses[index]; if (address != null && node.clusterService.getMember(address) == null) { if (clusterState == ClusterState.ACTIVE || !clusterService.isMemberRemovedWhileClusterIsNotActive(address)) { if (logger.isFinestEnabled()) { logger.finest( "Unknown " + address + " found in partition table sent from master " + sender + ". It has probably already left the cluster. partitionId=" + partitionId); } unknownAddresses.add(address); } } } } @Override public IPartition[] getPartitions() { IPartition[] result = new IPartition[partitionCount]; System.arraycopy(partitionStateManager.getPartitions(), 0, result, 0, partitionCount); return result; } @Override public InternalPartition[] getInternalPartitions() { return partitionStateManager.getPartitions(); } @Override public InternalPartition getPartition(int partitionId) { return getPartition(partitionId, true); } @Override public InternalPartition getPartition(int partitionId, boolean triggerOwnerAssignment) { InternalPartitionImpl p = partitionStateManager.getPartitionImpl(partitionId); if (triggerOwnerAssignment && p.getOwnerOrNull() == null) { // probably ownerships are not set yet. // force it. getPartitionOwner(partitionId); } return p; } @Override public boolean prepareToSafeShutdown(long timeout, TimeUnit unit) { if (!node.joined()) { return true; } if (node.isLiteMember()) { return true; } CountDownLatch latch = getShutdownLatch(); InternalOperationService operationService = nodeEngine.getOperationService(); long timeoutMillis = unit.toMillis(timeout); long awaitStep = Math.min(SAFE_SHUTDOWN_MAX_AWAIT_STEP_MILLIS, timeoutMillis); try { do { Address masterAddress = nodeEngine.getMasterAddress(); if (masterAddress == null) { logger.warning("Safe shutdown failed, master member is not known!"); return false; } if (node.getThisAddress().equals(masterAddress)) { onShutdownRequest(node.getThisAddress()); } else { operationService.send(new ShutdownRequestOperation(), masterAddress); } if (latch.await(awaitStep, TimeUnit.MILLISECONDS)) { return true; } timeoutMillis -= awaitStep; } while (timeoutMillis > 0); } catch (InterruptedException e) { logger.info("Safe shutdown is interrupted!"); } return false; } private CountDownLatch getShutdownLatch() { CountDownLatch latch = shutdownLatchRef.get(); if (latch == null) { latch = new CountDownLatch(1); if (!shutdownLatchRef.compareAndSet(null, latch)) { latch = shutdownLatchRef.get(); } } return latch; } public void onShutdownRequest(Address address) { if (lock.tryLock()) { try { migrationManager.onShutdownRequest(address); } finally { lock.unlock(); } } } public void onShutdownResponse() { CountDownLatch latch = shutdownLatchRef.get(); assert latch != null; latch.countDown(); } @Override public boolean isMemberStateSafe() { return partitionReplicaStateChecker.getPartitionServiceState() == PartitionServiceState.SAFE; } @Override public boolean hasOnGoingMigration() { return hasOnGoingMigrationLocal() || (!node.isMaster() && partitionReplicaStateChecker.hasOnGoingMigrationMaster(Level.FINEST)); } @Override public boolean hasOnGoingMigrationLocal() { return migrationManager.hasOnGoingMigration(); } @Override public final int getPartitionId(Data key) { return HashUtil.hashToIndex(key.getPartitionHash(), partitionCount); } @Override public final int getPartitionId(Object key) { return getPartitionId(nodeEngine.toData(key)); } @Override public final int getPartitionCount() { return partitionCount; } public long getPartitionMigrationTimeout() { return partitionMigrationTimeout; } // called in operation threads // Caution: Returning version array without copying for performance reasons. Callers must not modify this array! @Override public long[] incrementPartitionReplicaVersions(int partitionId, int backupCount) { return replicaManager.incrementPartitionReplicaVersions(partitionId, backupCount); } // called in operation threads @Override public void updatePartitionReplicaVersions(int partitionId, long[] versions, int replicaIndex) { replicaManager.updatePartitionReplicaVersions(partitionId, versions, replicaIndex); } @Override public boolean isPartitionReplicaVersionStale(int partitionId, long[] versions, int replicaIndex) { return replicaManager.isPartitionReplicaVersionStale(partitionId, versions, replicaIndex); } // called in operation threads // Caution: Returning version array without copying for performance reasons. Callers must not modify this array! @Override public long[] getPartitionReplicaVersions(int partitionId) { return replicaManager.getPartitionReplicaVersions(partitionId); } @Override public Map> getMemberPartitionsMap() { Collection dataMembers = node.getClusterService().getMembers(DATA_MEMBER_SELECTOR); int dataMembersSize = dataMembers.size(); int partitionsPerMember = (dataMembersSize > 0 ? (int) ceil((float) partitionCount / dataMembersSize) : 0); Map> memberPartitions = new HashMap>(dataMembersSize); for (int partitionId = 0; partitionId < partitionCount; partitionId++) { Address owner = getPartitionOwnerOrWait(partitionId); List ownedPartitions = memberPartitions.get(owner); if (ownedPartitions == null) { ownedPartitions = new ArrayList(partitionsPerMember); memberPartitions.put(owner, ownedPartitions); } ownedPartitions.add(partitionId); } return memberPartitions; } @Override public List getMemberPartitions(Address target) { List ownedPartitions = new LinkedList(); for (int i = 0; i < partitionCount; i++) { final Address owner = getPartitionOwner(i); if (target.equals(owner)) { ownedPartitions.add(i); } } return ownedPartitions; } @Override public void reset() { lock.lock(); try { shouldFetchPartitionTables = false; replicaManager.reset(); partitionStateManager.reset(); migrationManager.reset(); } finally { lock.unlock(); } } @Override public void pauseMigration() { migrationManager.pauseMigration(); } @Override public void resumeMigration() { migrationManager.resumeMigration(); } public boolean isReplicaSyncAllowed() { return migrationManager.isMigrationAllowed(); } @Override public void shutdown(boolean terminate) { logger.finest("Shutting down the partition service"); migrationManager.stop(); reset(); } @Override @Probe(name = "migrationQueueSize") public long getMigrationQueueSize() { return migrationManager.getMigrationQueueSize(); } public PartitionServiceProxy getPartitionServiceProxy() { return proxy; } @Override public String addMigrationListener(MigrationListener listener) { return partitionEventManager.addMigrationListener(listener); } @Override public boolean removeMigrationListener(String registrationId) { return partitionEventManager.removeMigrationListener(registrationId); } @Override public String addPartitionLostListener(PartitionLostListener listener) { return partitionEventManager.addPartitionLostListener(listener); } @Override public String addLocalPartitionLostListener(PartitionLostListener listener) { return partitionEventManager.addLocalPartitionLostListener(listener); } @Override public boolean removePartitionLostListener(String registrationId) { return partitionEventManager.removePartitionLostListener(registrationId); } @Override public void dispatchEvent(PartitionEvent partitionEvent, PartitionEventListener partitionEventListener) { partitionEventListener.onEvent(partitionEvent); } public void addPartitionListener(PartitionListener listener) { lock.lock(); try { partitionListener.addChildListener(listener); } finally { lock.unlock(); } } @Override public boolean isPartitionOwner(int partitionId) { InternalPartition partition = partitionStateManager.getPartitionImpl(partitionId); return partition.isLocal(); } @Override public int getPartitionStateVersion() { return partitionStateManager.getVersion(); } @Override public void onPartitionLost(IPartitionLostEvent event) { partitionEventManager.onPartitionLost(event); } public void setInternalMigrationListener(InternalMigrationListener listener) { migrationManager.setInternalMigrationListener(listener); } public InternalMigrationListener getInternalMigrationListener() { return migrationManager.getInternalMigrationListener(); } public void resetInternalMigrationListener() { migrationManager.resetInternalMigrationListener(); } /** * @return copy of ongoing replica-sync operations */ public List getOngoingReplicaSyncRequests() { return replicaManager.getOngoingReplicaSyncRequests(); } /** * @return copy of scheduled replica-sync requests */ public List> getScheduledReplicaSyncRequests() { return replicaManager.getScheduledReplicaSyncRequests(); } public PartitionStateManager getPartitionStateManager() { return partitionStateManager; } public MigrationManager getMigrationManager() { return migrationManager; } public PartitionReplicaManager getReplicaManager() { return replicaManager; } public PartitionReplicaStateChecker getPartitionReplicaStateChecker() { return partitionReplicaStateChecker; } public PartitionEventManager getPartitionEventManager() { return partitionEventManager; } boolean isFetchMostRecentPartitionTableTaskRequired() { return shouldFetchPartitionTables; } boolean scheduleFetchMostRecentPartitionTableTaskIfRequired() { lock.lock(); try { if (shouldFetchPartitionTables) { migrationManager.schedule(new FetchMostRecentPartitionTableTask()); return true; } return false; } finally { lock.unlock(); } } private class FetchMostRecentPartitionTableTask implements MigrationRunnable { private final Address thisAddress = node.getThisAddress(); private int maxVersion; private PartitionRuntimeState newState; public void run() { maxVersion = partitionStateManager.getVersion(); Collection> futures = invokeFetchPartitionStateOps(); logger.info("Fetching most recent partition table! my version: " + maxVersion); Collection allCompletedMigrations = new HashSet(); Collection allActiveMigrations = new HashSet(); processResults(futures, allCompletedMigrations, allActiveMigrations); logger.info("Most recent partition table version: " + maxVersion); processNewState(allCompletedMigrations, allActiveMigrations); syncPartitionRuntimeState(); } private Collection> invokeFetchPartitionStateOps() { Collection members = node.clusterService.getMemberImpls(); Collection> futures = new ArrayList>( members.size()); for (MemberImpl m : members) { if (m.localMember()) { continue; } Future future = nodeEngine.getOperationService() .createInvocationBuilder(SERVICE_NAME, new FetchPartitionStateOperation(), m.getAddress()).setTryCount(Integer.MAX_VALUE) .setCallTimeout(Long.MAX_VALUE).invoke(); futures.add(future); } return futures; } private void processResults(Collection> futures, Collection allCompletedMigrations, Collection allActiveMigrations) { for (Future future : futures) { try { PartitionRuntimeState state = future.get(); if (state == null) { // state can be null, if not initialized continue; } if (maxVersion < state.getVersion()) { newState = state; maxVersion = state.getVersion(); } allCompletedMigrations.addAll(state.getCompletedMigrations()); if (state.getActiveMigration() != null) { allActiveMigrations.add(state.getActiveMigration()); } } catch (TargetNotMemberException e) { EmptyStatement.ignore(e); } catch (MemberLeftException e) { EmptyStatement.ignore(e); } catch (InterruptedException e) { logger.fine("FetchMostRecentPartitionTableTask is interrupted."); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (!(cause instanceof TargetNotMemberException) && !(cause instanceof MemberLeftException)) { logger.warning("Failed to fetch partition table!", e); } } } } private void processNewState(Collection allCompletedMigrations, Collection allActiveMigrations) { lock.lock(); try { processMigrations(allCompletedMigrations, allActiveMigrations); if (newState != null) { newState.setCompletedMigrations(allCompletedMigrations); maxVersion = Math.max(maxVersion, getPartitionStateVersion()) + 1; newState.setVersion(maxVersion); logger.info("Applying the most recent of partition state..."); applyNewState(newState, thisAddress); } else if (partitionStateManager.isInitialized()) { partitionStateManager.incrementVersion(); for (MigrationInfo migrationInfo : allCompletedMigrations) { if (migrationManager.addCompletedMigration(migrationInfo)) { if (logger.isFinestEnabled()) { logger.finest("Scheduling migration finalization after finding most recent partition table: " + migrationInfo); } migrationManager.scheduleActiveMigrationFinalization(migrationInfo); } } } shouldFetchPartitionTables = false; } finally { lock.unlock(); } } private void processMigrations(Collection allCompletedMigrations, Collection allActiveMigrations) { allCompletedMigrations.addAll(migrationManager.getCompletedMigrationsCopy()); if (migrationManager.getActiveMigration() != null) { allActiveMigrations.add(migrationManager.getActiveMigration()); } for (MigrationInfo activeMigration : allActiveMigrations) { activeMigration.setStatus(MigrationStatus.FAILED); if (allCompletedMigrations.add(activeMigration)) { logger.info("Marked active migration " + activeMigration + " as " + MigrationStatus.FAILED); } } } } @Override public String toString() { return "InternalPartitionService {" + "version: " + getPartitionStateVersion() + ", migrationQ: " + getMigrationQueueSize() + "}"; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy