All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.internal.cluster.impl.ClusterStateManager Maven / Gradle / Ivy

There is a newer version: 62
Show newest version
/*
 * Copyright (c) 2008-2019, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.internal.cluster.impl;

import com.hazelcast.cluster.ClusterState;
import com.hazelcast.core.Member;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.cluster.impl.operations.LockClusterStateOp;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.util.LockGuard;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.spi.Operation;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.transaction.TransactionException;
import com.hazelcast.transaction.TransactionOptions;
import com.hazelcast.transaction.TransactionOptions.TransactionType;
import com.hazelcast.transaction.impl.Transaction;
import com.hazelcast.transaction.impl.TransactionManagerServiceImpl;
import com.hazelcast.util.ExceptionUtil;
import com.hazelcast.util.FutureUtil;
import com.hazelcast.util.Preconditions;
import com.hazelcast.version.Version;

import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;

import static com.hazelcast.internal.cluster.impl.ClusterServiceImpl.SERVICE_NAME;
import static com.hazelcast.util.FutureUtil.waitWithDeadline;

/**
 * ClusterStateManager stores cluster state and manages cluster state transactions.
 * 

* When a node joins to the cluster, its initial state is set. *

* When a cluster state change is requested, a cluster-wide transaction is started * and state is changed all over the cluster atomically. */ @SuppressWarnings("checkstyle:methodcount") public class ClusterStateManager { private static final TransactionOptions DEFAULT_TX_OPTIONS = new TransactionOptions() .setDurability(1) .setTimeout(1, TimeUnit.MINUTES) .setTransactionType(TransactionType.TWO_PHASE); private static final long LOCK_LEASE_EXTENSION_MILLIS = TimeUnit.SECONDS.toMillis(20); // this is the version at which the cluster operates. see Cluster#getClusterVersion volatile Version clusterVersion = Version.UNKNOWN; private final Node node; private final ILogger logger; private final Lock clusterServiceLock; private final AtomicReference stateLockRef = new AtomicReference(LockGuard.NOT_LOCKED); private volatile ClusterState state = ClusterState.ACTIVE; ClusterStateManager(Node node, Lock clusterServiceLock) { this.node = node; this.clusterServiceLock = clusterServiceLock; this.logger = node.getLogger(getClass()); } public ClusterState getState() { LockGuard stateLock = getStateLock(); return stateLock.isLocked() ? ClusterState.IN_TRANSITION : state; } public Version getClusterVersion() { // if version is locked we still operate using the "old" version, so we return this one return clusterVersion; } LockGuard getStateLock() { LockGuard stateLock = stateLockRef.get(); while (stateLock.isLeaseExpired()) { if (stateLockRef.compareAndSet(stateLock, LockGuard.NOT_LOCKED)) { logger.fine("Cluster state lock: " + stateLock + " is expired."); stateLock = LockGuard.NOT_LOCKED; break; } stateLock = stateLockRef.get(); } return stateLock; } void initialClusterState(ClusterState initialState, Version version) { clusterServiceLock.lock(); try { node.getNodeExtension().onInitialClusterState(initialState); final ClusterState currentState = getState(); if (currentState != ClusterState.ACTIVE && currentState != initialState) { logger.warning("Initial state is already set! " + "Current state: " + currentState + ", Given state: " + initialState); return; } // no need to validate again logger.fine("Setting initial cluster state: " + initialState + " and version: " + version); validateNodeCompatibleWith(version); setClusterStateAndVersion(initialState, version, true); } finally { clusterServiceLock.unlock(); } } void setClusterState(ClusterState newState, boolean isTransient) { clusterServiceLock.lock(); try { doSetClusterState(newState, isTransient); } finally { clusterServiceLock.unlock(); } } public void setClusterVersion(Version newVersion) { clusterServiceLock.lock(); try { doSetClusterVersion(newVersion); } finally { clusterServiceLock.unlock(); } } private void setClusterStateAndVersion(ClusterState newState, Version newVersion, boolean isTransient) { this.state = newState; this.clusterVersion = newVersion; stateLockRef.set(LockGuard.NOT_LOCKED); changeNodeState(newState); node.getNodeExtension().onClusterStateChange(newState, isTransient); node.getNodeExtension().onClusterVersionChange(newVersion); } private void doSetClusterState(ClusterState newState, boolean isTransient) { this.state = newState; stateLockRef.set(LockGuard.NOT_LOCKED); changeNodeState(newState); node.getNodeExtension().onClusterStateChange(newState, isTransient); } private void doSetClusterVersion(Version newVersion) { this.clusterVersion = newVersion; stateLockRef.set(LockGuard.NOT_LOCKED); node.getNodeExtension().onClusterVersionChange(newVersion); } void reset() { clusterServiceLock.lock(); try { state = ClusterState.ACTIVE; // not notifying cluster version listeners about change to UNKNOWN. consider for example the following scenario: // - node starts with codebase version 3.9, overrides init cluster version via group property to 3.8 // - node joins an existing 3.8 cluster which is undergoing rolling-upgrade to 3.9 // - once all cluster members are on 3.9, cluster version is upgraded to 3.9.0 // - clusterVersion is reset to UNKNOWN // - if cluster version listener is notified of null cluster version, it should receive the overridden one (3.8) // - if 3.8 discovery & join messages are incompatible, node will not be able to join 3.9 cluster // Instead, not notifying cluster version listeners will let the node use its last set cluster version for discovery & // join messages and join the cluster. clusterVersion = Version.UNKNOWN; stateLockRef.set(LockGuard.NOT_LOCKED); } finally { clusterServiceLock.unlock(); } } /** * Validates the requested cluster state change and sets a {@code ClusterStateLock}. */ public void lockClusterState(ClusterStateChange stateChange, Address initiator, String txnId, long leaseTime, int memberListVersion, int partitionStateVersion) { Preconditions.checkNotNull(stateChange); clusterServiceLock.lock(); try { if (!node.getNodeExtension().isStartCompleted()) { throw new IllegalStateException("Can not lock cluster state! Startup is not completed yet!"); } if (node.getClusterService().getClusterJoinManager().isMastershipClaimInProgress()) { throw new IllegalStateException("Can not lock cluster state! Mastership claim is in progress!"); } if (stateChange.isOfType(Version.class)) { validateNodeCompatibleWith((Version) stateChange.getNewState()); validateClusterVersionChange((Version) stateChange.getNewState()); } checkMemberListVersion(memberListVersion); checkMigrationsAndPartitionStateVersion(stateChange, partitionStateVersion); lockOrExtendClusterState(initiator, txnId, leaseTime); try { // check migration status and partition-state version again // if partition state is changed then release the lock and fail. checkMigrationsAndPartitionStateVersion(stateChange, partitionStateVersion); } catch (IllegalStateException e) { stateLockRef.set(LockGuard.NOT_LOCKED); throw e; } } finally { clusterServiceLock.unlock(); } } private void checkMemberListVersion(int memberListVersion) { int thisMemberListVersion = node.getClusterService().getMemberListVersion(); if (memberListVersion != thisMemberListVersion) { throw new IllegalStateException( "Can not lock cluster state! Member list versions are not matching!" + " Expected version: " + memberListVersion + ", Current version: " + thisMemberListVersion); } } private void lockOrExtendClusterState(Address initiator, String txnId, long leaseTime) { Preconditions.checkPositive(leaseTime, "Lease time should be positive!"); LockGuard currentLock = getStateLock(); if (!currentLock.allowsLock(txnId)) { throw new TransactionException("Locking failed for " + initiator + ", tx: " + txnId + ", current state: " + toString()); } long newLeaseTime = currentLock.getRemainingTime() + leaseTime; if (newLeaseTime < 0L) { newLeaseTime = Long.MAX_VALUE; } stateLockRef.set(new LockGuard(initiator, txnId, newLeaseTime)); } // check if current node is compatible with requested cluster version // wraps NodeExtension#isNodeVersionCompatibleWith(Version) and throws a VersionMismatchException if incompatibility is found. private void validateNodeCompatibleWith(Version clusterVersion) { if (!node.getNodeExtension().isNodeVersionCompatibleWith(clusterVersion)) { throw new VersionMismatchException("Node's codebase version " + node.getVersion() + " is incompatible with " + "the requested cluster version " + clusterVersion); } } // validate transition from current to newClusterVersion is allowed private void validateClusterVersionChange(Version newClusterVersion) { if (!clusterVersion.isUnknown() && clusterVersion.getMajor() != newClusterVersion.getMajor()) { throw new IllegalArgumentException("Transition to requested version " + newClusterVersion + " not allowed for current cluster version " + clusterVersion); } } private void checkMigrationsAndPartitionStateVersion(ClusterStateChange stateChange, int partitionStateVersion) { final InternalPartitionService partitionService = node.getPartitionService(); final int thisPartitionStateVersion = partitionService.getPartitionStateVersion(); if (partitionService.hasOnGoingMigrationLocal()) { throw new IllegalStateException("Still have pending migration tasks, " + "cannot lock cluster state! New state: " + stateChange + ", current state: " + getState()); } else if (partitionStateVersion != thisPartitionStateVersion) { throw new IllegalStateException("Can not lock cluster state! Partition tables have different versions! " + "Expected version: " + partitionStateVersion + " Current version: " + thisPartitionStateVersion); } } public boolean rollbackClusterState(String txnId) { clusterServiceLock.lock(); try { final LockGuard currentLock = getStateLock(); if (!currentLock.allowsUnlock(txnId)) { return false; } logger.fine("Rolling back cluster state transaction: " + txnId); stateLockRef.set(LockGuard.NOT_LOCKED); // if state allows join after rollback, then remove all members which left during transaction. if (state.isJoinAllowed()) { node.getClusterService().getMembershipManager().removeAllMissingMembers(); } return true; } finally { clusterServiceLock.unlock(); } } // for tests only void commitClusterState(ClusterStateChange newState, Address initiator, String txnId) { commitClusterState(newState, initiator, txnId, false); } public void commitClusterState(ClusterStateChange stateChange, Address initiator, String txnId, boolean isTransient) { Preconditions.checkNotNull(stateChange); stateChange.validate(); clusterServiceLock.lock(); try { final LockGuard stateLock = getStateLock(); if (!stateLock.allowsUnlock(txnId)) { throw new TransactionException( "Cluster state change [" + state + " -> " + stateChange + "] failed for " + initiator + ", current state: " + stateToString()); } if (stateChange.isOfType(ClusterState.class)) { ClusterState newState = (ClusterState) stateChange.getNewState(); doSetClusterState(newState, isTransient); // if state is changed to allow joins, then remove all members which left while not active. if (newState.isJoinAllowed()) { node.getClusterService().getMembershipManager().removeAllMissingMembers(); } } else if (stateChange.isOfType(Version.class)) { // version is validated on cluster-state-lock, thus we can commit without checking compatibility Version newVersion = (Version) stateChange.getNewState(); logger.info("Cluster version set to " + newVersion); doSetClusterVersion(newVersion); } else { throw new IllegalArgumentException("Illegal ClusterStateChange of type " + stateChange.getType() + "."); } } finally { clusterServiceLock.unlock(); } } private void changeNodeState(ClusterState newState) { if (newState == ClusterState.PASSIVE) { node.changeNodeStateToPassive(); } else { node.changeNodeStateToActive(); } } void changeClusterState(ClusterStateChange stateChange, MemberMap memberMap, int partitionStateVersion, boolean isTransient) { changeClusterState(stateChange, memberMap, DEFAULT_TX_OPTIONS, partitionStateVersion, isTransient); } void changeClusterState(ClusterStateChange stateChange, MemberMap memberMap, TransactionOptions options, int partitionStateVersion, boolean isTransient) { checkParameters(stateChange, options); if (isCurrentStateEqualToRequestedOne(stateChange)) { return; } ClusterState oldState = getState(); ClusterState requestedState = stateChange.getClusterStateOrNull(); NodeEngineImpl nodeEngine = node.getNodeEngine(); TransactionManagerServiceImpl txManagerService = (TransactionManagerServiceImpl) nodeEngine.getTransactionManagerService(); Transaction tx = txManagerService.newAllowedDuringPassiveStateTransaction(options); notifyBeforeStateChange(oldState, requestedState, isTransient); tx.begin(); try { String txnId = tx.getTxnId(); Collection members = memberMap.getMembers(); int memberListVersion = memberMap.getVersion(); addTransactionRecords(stateChange, tx, members, memberListVersion, partitionStateVersion, isTransient); lockClusterStateOnAllMembers(stateChange, nodeEngine, options.getTimeoutMillis(), txnId, members, memberListVersion, partitionStateVersion); checkMemberListChange(memberListVersion); tx.prepare(); } catch (Throwable e) { tx.rollback(); notifyAfterStateChange(oldState, requestedState, isTransient); if (e instanceof TargetNotMemberException || e.getCause() instanceof MemberLeftException) { throw new IllegalStateException("Cluster members changed during state change!", e); } throw ExceptionUtil.rethrow(e); } try { tx.commit(); } catch (Throwable e) { if (e instanceof TargetNotMemberException || e.getCause() instanceof MemberLeftException) { // Member left while tx is being committed after prepare successful. // We cannot rollback tx after this point. Cluster state change is done // on other members. // Left members will be added to the members-removed-in-not-active-state-list // if new state is passive or frozen. They will be able to rejoin later. return; } throw ExceptionUtil.rethrow(e); } finally { notifyAfterStateChange(oldState, requestedState, isTransient); } } private void notifyBeforeStateChange(ClusterState oldState, ClusterState requestedState, boolean isTransient) { if (requestedState != null) { node.getNodeExtension().beforeClusterStateChange(oldState, requestedState, isTransient); } } private void notifyAfterStateChange(ClusterState oldState, ClusterState requestedState, boolean isTransient) { if (requestedState != null) { // on failure, the actual state is not equal to requestedState, that's why we pass getState() node.getNodeExtension().afterClusterStateChange(oldState, getState(), isTransient); } } private boolean isCurrentStateEqualToRequestedOne(ClusterStateChange change) { if (change.isOfType(ClusterState.class)) { return getState() == change.getNewState(); } else if (change.isOfType(Version.class)) { return clusterVersion != null && clusterVersion.equals(change.getNewState()); } return false; } private void lockClusterStateOnAllMembers(ClusterStateChange stateChange, NodeEngineImpl nodeEngine, long leaseTime, String txnId, Collection members, int memberListVersion, int partitionStateVersion) { Collection futures = new ArrayList(members.size()); final Address thisAddress = node.getThisAddress(); for (Member member : members) { Operation op = new LockClusterStateOp(stateChange, thisAddress, txnId, leaseTime, memberListVersion, partitionStateVersion); Future future = nodeEngine.getOperationService().invokeOnTarget(SERVICE_NAME, op, member.getAddress()); futures.add(future); } StateManagerExceptionHandler exceptionHandler = new StateManagerExceptionHandler(logger); waitWithDeadline(futures, leaseTime, TimeUnit.MILLISECONDS, exceptionHandler); exceptionHandler.rethrowIfFailed(); } private void addTransactionRecords(ClusterStateChange stateChange, Transaction tx, Collection members, int memberListVersion, int partitionStateVersion, boolean isTransient) { long leaseTime = Math.min(tx.getTimeoutMillis(), LOCK_LEASE_EXTENSION_MILLIS); for (Member member : members) { tx.add(new ClusterStateTransactionLogRecord(stateChange, node.getThisAddress(), member.getAddress(), tx.getTxnId(), leaseTime, memberListVersion, partitionStateVersion, isTransient)); } } private void checkMemberListChange(int initialMemberListVersion) { int currentMemberListVersion = node.getClusterService().getMembershipManager().getMemberListVersion(); if (initialMemberListVersion != currentMemberListVersion) { throw new IllegalStateException("Cluster members changed during state change! " + "Initial version: " + initialMemberListVersion + ", Current version: " + currentMemberListVersion); } } private void checkParameters(ClusterStateChange newState, TransactionOptions options) { Preconditions.checkNotNull(newState); Preconditions.checkNotNull(options); newState.validate(); if (options.getTransactionType() != TransactionType.TWO_PHASE) { throw new IllegalArgumentException("Changing cluster state requires 2PC transaction!"); } } public String stateToString() { return "ClusterState{state=" + state + ", lock=" + stateLockRef.get() + '}'; } @Override public String toString() { return "ClusterStateManager{stateLockRef=" + stateLockRef + ", state=" + state + '}'; } private static final class StateManagerExceptionHandler implements FutureUtil.ExceptionHandler { private final ILogger logger; // written and read by same/single thread private Throwable error; private StateManagerExceptionHandler(ILogger logger) { this.logger = logger; } @Override public void handleException(final Throwable throwable) { Throwable cause = throwable; if (throwable instanceof ExecutionException && throwable.getCause() != null) { cause = throwable.getCause(); } if (error == null) { error = cause; } log(cause); } private void log(Throwable cause) { if (logger.isFineEnabled()) { logger.fine("failure during cluster state change", cause); } } void rethrowIfFailed() { if (error != null) { throw ExceptionUtil.rethrow(error); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy