All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.internal.cluster.impl.ClusterServiceImpl Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.internal.cluster.impl;

import com.hazelcast.auditlog.AuditlogTypeIds;
import com.hazelcast.cluster.Address;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.InitialMembershipEvent;
import com.hazelcast.cluster.InitialMembershipListener;
import com.hazelcast.cluster.Member;
import com.hazelcast.cluster.MemberSelector;
import com.hazelcast.cluster.MembershipEvent;
import com.hazelcast.cluster.MembershipListener;
import com.hazelcast.cluster.impl.MemberImpl;
import com.hazelcast.hotrestart.HotRestartService;
import com.hazelcast.instance.EndpointQualifier;
import com.hazelcast.instance.impl.HazelcastInstanceImpl;
import com.hazelcast.instance.impl.LifecycleServiceImpl;
import com.hazelcast.instance.impl.Node;
import com.hazelcast.internal.cluster.ClusterService;
import com.hazelcast.internal.cluster.Versions;
import com.hazelcast.internal.cluster.impl.operations.DemoteDataMemberOp;
import com.hazelcast.internal.cluster.impl.operations.ExplicitSuspicionOp;
import com.hazelcast.internal.cluster.impl.operations.OnJoinOp;
import com.hazelcast.internal.cluster.impl.operations.PromoteLiteMemberOp;
import com.hazelcast.internal.cluster.impl.operations.ShutdownNodeOp;
import com.hazelcast.internal.cluster.impl.operations.TriggerExplicitSuspicionOp;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.namespace.NamespaceUtil;
import com.hazelcast.internal.nio.Connection;
import com.hazelcast.internal.nio.ConnectionListener;
import com.hazelcast.internal.services.ManagedService;
import com.hazelcast.internal.services.TransactionalService;
import com.hazelcast.internal.util.Timer;
import com.hazelcast.internal.util.UuidUtil;
import com.hazelcast.internal.util.executor.ExecutorType;
import com.hazelcast.logging.ILogger;
import com.hazelcast.persistence.PersistenceService;
import com.hazelcast.spi.exception.RetryableHazelcastException;
import com.hazelcast.spi.impl.NodeEngine;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.eventservice.EventPublishingService;
import com.hazelcast.spi.impl.eventservice.EventRegistration;
import com.hazelcast.spi.impl.eventservice.EventService;
import com.hazelcast.spi.impl.executionservice.ExecutionService;
import com.hazelcast.spi.impl.operationservice.Operation;
import com.hazelcast.spi.impl.operationservice.OperationService;
import com.hazelcast.spi.impl.operationservice.impl.InvocationFuture;
import com.hazelcast.spi.properties.ClusterProperty;
import com.hazelcast.transaction.TransactionOptions;
import com.hazelcast.transaction.TransactionalObject;
import com.hazelcast.transaction.impl.Transaction;
import com.hazelcast.version.Version;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;

import static com.hazelcast.cluster.impl.MemberImpl.NA_MEMBER_LIST_JOIN_VERSION;
import static com.hazelcast.cluster.memberselector.MemberSelectors.NON_LOCAL_MEMBER_SELECTOR;
import static com.hazelcast.instance.EndpointQualifier.MEMBER;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.CLUSTER_METRIC_CLUSTER_SERVICE_SIZE;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.CLUSTER_PREFIX;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.CLUSTER_PREFIX_CLOCK;
import static com.hazelcast.internal.metrics.MetricDescriptorConstants.CLUSTER_PREFIX_HEARTBEAT;
import static com.hazelcast.internal.util.Preconditions.checkFalse;
import static com.hazelcast.internal.util.Preconditions.checkNotNull;
import static com.hazelcast.internal.util.Preconditions.checkTrue;
import static java.lang.String.format;
import static java.util.concurrent.TimeUnit.SECONDS;

@SuppressWarnings({"checkstyle:methodcount", "checkstyle:classdataabstractioncoupling", "checkstyle:classfanoutcomplexity"})
public class ClusterServiceImpl implements ClusterService, ConnectionListener, ManagedService,
        EventPublishingService, TransactionalService {

    public static final String SERVICE_NAME = "hz:core:clusterService";
    public static final String SPLIT_BRAIN_HANDLER_EXECUTOR_NAME = "hz:cluster:splitbrain";

    static final String CLUSTER_EXECUTOR_NAME = "hz:cluster";
    static final String MEMBERSHIP_EVENT_EXECUTOR_NAME = "hz:cluster:event";
    static final String VERSION_AUTO_UPGRADE_EXECUTOR_NAME = "hz:cluster:version:auto:upgrade";

    private static final int DEFAULT_MERGE_RUN_DELAY_MILLIS = 100;
    private static final long CLUSTER_SHUTDOWN_SLEEP_DURATION_IN_MILLIS = 1000;
    private static final boolean ASSERTION_ENABLED = ClusterServiceImpl.class.desiredAssertionStatus();
    private static final String TRANSACTION_OPTIONS_MUST_NOT_BE_NULL = "Transaction options must not be null!";
    private static final String STATE_MUST_NOT_BE_NULL = "State must not be null!";
    private static final String VERSION_MUST_NOT_BE_NULL = "Version must not be null!";

    private final Node node;
    private final ILogger logger;
    private final NodeEngineImpl nodeEngine;
    private final ClusterClockImpl clusterClock;
    private final MembershipManager membershipManager;
    private final ClusterJoinManager clusterJoinManager;
    private final ClusterStateManager clusterStateManager;
    private final ClusterHeartbeatManager clusterHeartbeatManager;
    private final ReentrantLock clusterServiceLock = new ReentrantLock();
    private final AtomicReference joined =
            new AtomicReference<>(new JoinHolder(false));
    private final AtomicBoolean joinedBefore = new AtomicBoolean();

    private volatile UUID clusterId;
    private volatile Address masterAddress;
    private volatile MemberImpl localMember;

    private static class JoinHolder {
        private final CountDownLatch latch = new CountDownLatch(1);
        private final boolean isJoined;

        JoinHolder(boolean isJoined) {
            this.isJoined = isJoined;
        }
    }

    public ClusterServiceImpl(Node node, MemberImpl localMember) {
        this.node = node;
        this.localMember = localMember;
        nodeEngine = node.nodeEngine;

        logger = node.getLogger(ClusterService.class.getName());
        clusterClock = new ClusterClockImpl(logger);

        membershipManager = new MembershipManager(node, this, clusterServiceLock);
        clusterStateManager = new ClusterStateManager(node, clusterServiceLock);
        clusterJoinManager = new ClusterJoinManager(node, this, clusterServiceLock);
        clusterHeartbeatManager = new ClusterHeartbeatManager(node, this, clusterServiceLock);

        node.getServer().getConnectionManager(MEMBER).addConnectionListener(this);
        ExecutionService executionService = nodeEngine.getExecutionService();
        executionService.register(CLUSTER_EXECUTOR_NAME, 2, Integer.MAX_VALUE, ExecutorType.CACHED);
        executionService.register(SPLIT_BRAIN_HANDLER_EXECUTOR_NAME, 2, Integer.MAX_VALUE, ExecutorType.CACHED);
        //MEMBERSHIP_EVENT_EXECUTOR is a single threaded executor to ensure that events are executed in correct order.
        executionService.register(MEMBERSHIP_EVENT_EXECUTOR_NAME, 1, Integer.MAX_VALUE, ExecutorType.CACHED);
        executionService.register(VERSION_AUTO_UPGRADE_EXECUTOR_NAME, 1, Integer.MAX_VALUE, ExecutorType.CACHED);
        registerMetrics();
    }

    private void registerMetrics() {
        MetricsRegistry metricsRegistry = node.nodeEngine.getMetricsRegistry();
        metricsRegistry.registerStaticMetrics(clusterClock, CLUSTER_PREFIX_CLOCK);
        metricsRegistry.registerStaticMetrics(clusterHeartbeatManager, CLUSTER_PREFIX_HEARTBEAT);
        metricsRegistry.registerStaticMetrics(this, CLUSTER_PREFIX);
    }

    @Override
    public void init(NodeEngine nodeEngine, Properties properties) {
        long mergeFirstRunDelayMs = node.getProperties().getPositiveMillisOrDefault(ClusterProperty.MERGE_FIRST_RUN_DELAY_SECONDS,
                DEFAULT_MERGE_RUN_DELAY_MILLIS);
        long mergeNextRunDelayMs = node.getProperties().getPositiveMillisOrDefault(ClusterProperty.MERGE_NEXT_RUN_DELAY_SECONDS,
                DEFAULT_MERGE_RUN_DELAY_MILLIS);

        ExecutionService executionService = nodeEngine.getExecutionService();
        executionService.scheduleWithRepetition(SPLIT_BRAIN_HANDLER_EXECUTOR_NAME, new SplitBrainHandler(node),
                mergeFirstRunDelayMs, mergeNextRunDelayMs, TimeUnit.MILLISECONDS);

        membershipManager.init();
        clusterHeartbeatManager.init();
    }

    public void sendLocalMembershipEvent() {
        membershipManager.sendMembershipEvents(Collections.emptySet(), Collections.singleton(getLocalMember()), false);
    }

    public void handleExplicitSuspicion(MembersViewMetadata expectedMembersViewMetadata, Address suspectedAddress) {
        membershipManager.handleExplicitSuspicion(expectedMembersViewMetadata, suspectedAddress);
    }

    public void handleExplicitSuspicionTrigger(Address caller, int callerMemberListVersion,
                                               MembersViewMetadata suspectedMembersViewMetadata) {
        membershipManager.handleExplicitSuspicionTrigger(caller, callerMemberListVersion, suspectedMembersViewMetadata);
    }

    public void suspectMember(Member suspectedMember, String reason, boolean destroyConnection) {
        membershipManager.suspectMember((MemberImpl) suspectedMember, reason, destroyConnection);
    }

    public void suspectAddressIfNotConnected(Address address) {
        clusterServiceLock.lock();
        try {
            MemberImpl member = getMember(address);
            if (member == null) {
                if (logger.isFineEnabled()) {
                    logger.fine("Cannot suspect " + address + ", since it's not a member.");
                }

                return;
            }

            Connection conn = node.getServer().getConnectionManager(MEMBER).get(address);
            if (conn != null && conn.isAlive()) {
                if (logger.isFineEnabled()) {
                    logger.fine("Cannot suspect " + member + ", since there's a live connection -> " + conn);
                }

                return;
            }
            suspectMember(member, "No connection", false);
        } finally {
            clusterServiceLock.unlock();
        }
    }

    void sendExplicitSuspicion(MembersViewMetadata endpointMembersViewMetadata) {
        Address endpoint = endpointMembersViewMetadata.getMemberAddress();
        if (endpoint.equals(node.getThisAddress())) {
            logger.warning("Cannot send explicit suspicion for " + endpointMembersViewMetadata + " to itself.");
            return;
        }

        if (!isJoined()) {
            if (logger.isFineEnabled()) {
                logger.fine("Cannot send explicit suspicion, not joined yet!");
            }

            return;
        }

        Version clusterVersion = getClusterVersion();
        assert !clusterVersion.isUnknown() : "Cluster version should not be unknown after join!";

        Operation op = new ExplicitSuspicionOp(endpointMembersViewMetadata);
        nodeEngine.getOperationService().send(op, endpoint);
    }

    void sendExplicitSuspicionTrigger(Address triggerTo, MembersViewMetadata endpointMembersViewMetadata) {
        if (triggerTo.equals(node.getThisAddress())) {
            logger.warning("Cannot send explicit suspicion trigger for " + endpointMembersViewMetadata + " to itself.");
            return;
        }

        int memberListVersion = membershipManager.getMemberListVersion();
        Operation op = new TriggerExplicitSuspicionOp(memberListVersion, endpointMembersViewMetadata);
        OperationService operationService = nodeEngine.getOperationService();
        operationService.send(op, triggerTo);
    }

    public MembersView handleMastershipClaim(@Nonnull Address candidateAddress,
                                             @Nonnull UUID candidateUuid) {
        checkNotNull(candidateAddress);
        checkNotNull(candidateUuid);
        checkFalse(getThisAddress().equals(candidateAddress), "cannot accept my own mastership claim!");

        clusterServiceLock.lock();
        try {
            checkTrue(isJoined(), candidateAddress + " claims mastership but this node is not joined!");
            checkFalse(isMaster(),
                    candidateAddress + " claims mastership but this node is master!");

            MemberImpl masterCandidate = membershipManager.getMember(candidateAddress, candidateUuid);
            checkTrue(masterCandidate != null,
                    candidateAddress + " claims mastership but it is not a member!");

            MemberMap memberMap = membershipManager.getMemberMap();
            if (!shouldAcceptMastership(memberMap, masterCandidate)) {
                String message = "Cannot accept mastership claim of " + candidateAddress
                        + " at the moment. There are more suitable master candidates in the member list.";
                logger.fine(message);
                throw new RetryableHazelcastException(message);
            }

            if (!membershipManager.clearMemberSuspicion(masterCandidate, "Mastership claim")) {
                throw new IllegalStateException("Cannot accept mastership claim of " + candidateAddress + ". "
                        + getMasterAddress() + " is already master.");
            }

            setMasterAddress(masterCandidate.getAddress());

            MembersView response = memberMap.toTailMembersView(masterCandidate, true);

            logger.warning("Mastership of " + candidateAddress + " is accepted. Response: " + response);

            return response;
        } finally {
            clusterServiceLock.unlock();
        }
    }

    // called under cluster service lock
    // mastership is accepted when all members before the candidate is suspected
    private boolean shouldAcceptMastership(MemberMap memberMap, MemberImpl candidate) {
        assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!";
        for (MemberImpl member : memberMap.headMemberSet(candidate, false)) {
            if (!membershipManager.isMemberSuspected(member)) {
                if (logger.isFineEnabled()) {
                    logger.fine("Should not accept mastership claim of " + candidate + ", because " + member
                            + " is not suspected at the moment and is before than " + candidate + " in the member list.");
                }

                return false;
            }
        }
        return true;
    }

    public void merge(Address newTargetAddress) {
        node.getJoiner().setTargetAddress(newTargetAddress);
        LifecycleServiceImpl lifecycleService = node.hazelcastInstance.getLifecycleService();
        lifecycleService.runUnderLifecycleLock(new ClusterMergeTask(node));
    }

    @Override
    public void reset() {
        clusterServiceLock.lock();
        try {
            resetJoinState();
            resetLocalMemberUuid();
            resetClusterId();
            clearInternalState();
        } finally {
            clusterServiceLock.unlock();
        }
    }

    private void resetLocalMemberUuid() {
        assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!";
        assert !isJoined() : "Cannot reset local member UUID when joined.";

        Map addressMap = localMember.getAddressMap();
        UUID newUuid = UuidUtil.newUnsecureUUID();

        logger.warning("Resetting local member UUID. Previous: " + localMember.getUuid() + ", new: " + newUuid);
        node.setThisUuid(newUuid);
        localMember = new MemberImpl.Builder(addressMap)
                .version(localMember.getVersion())
                .localMember(true)
                .uuid(newUuid)
                .attributes(localMember.getAttributes())
                .liteMember(localMember.isLiteMember())
                .memberListJoinVersion(localMember.getMemberListJoinVersion())
                .instance(node.hazelcastInstance)
                .build();
        node.loggingService.setThisMember(localMember);
        node.getLocalAddressRegistry().setLocalUuid(newUuid);
    }

    public void resetJoinState() {
        clusterServiceLock.lock();
        try {
            setMasterAddress(null);
            setJoined(false);
        } finally {
            clusterServiceLock.unlock();
        }
    }

    @SuppressWarnings("checkstyle:parameternumber")
    public boolean finalizeJoin(MembersView membersView, Address callerAddress, UUID callerUuid, UUID targetUuid,
                                UUID clusterId, ClusterState clusterState, Version clusterVersion, long clusterStartTime,
                                long masterTime, OnJoinOp preJoinOp) {
        clusterServiceLock.lock();
        try {
            if (!checkValidMaster(callerAddress)) {
                if (logger.isFineEnabled()) {
                    logger.fine("Not finalizing join because caller: " + callerAddress + " is not known master: "
                            + getMasterAddress());
                }
                MembersViewMetadata membersViewMetadata = new MembersViewMetadata(callerAddress, callerUuid,
                        callerAddress, membersView.getVersion());
                sendExplicitSuspicion(membersViewMetadata);
                return false;
            }

            if (isJoined()) {
                if (logger.isFineEnabled()) {
                    logger.fine("Node is already joined... No need to finalize join...");
                }

                return false;
            }

            checkMemberUpdateContainsLocalMember(membersView, targetUuid);

            try {
                initialClusterState(clusterState, clusterVersion);
            } catch (VersionMismatchException e) {
                // node should shut down since it cannot handle the cluster version
                // it is safe to do so here because no operations have been executed yet
                logger.severe(format("This member will shutdown because it cannot join the cluster: %s", e.getMessage()));
                node.shutdown(true);
                return false;
            }
            setClusterId(clusterId);
            ClusterClockImpl clusterClock = getClusterClock();
            clusterClock.setClusterStartTime(clusterStartTime);
            clusterClock.setMasterTime(masterTime);

            // run pre-join op before member list update, so operations other than join ops will be refused by operation service
            if (preJoinOp != null) {
                nodeEngine.getOperationService().run(preJoinOp);
            }

            membershipManager.updateMembers(membersView);
            clusterHeartbeatManager.heartbeat();
            setJoined(true);
            node.getNodeExtension().getAuditlogService()
                .eventBuilder(AuditlogTypeIds.CLUSTER_MEMBER_ADDED)
                .message("Member joined")
                .addParameter("membersView", membersView)
                .addParameter("address", node.getThisAddress())
                .log();
            return true;
        } finally {
            clusterServiceLock.unlock();
        }
    }

    public boolean updateMembers(MembersView membersView, Address callerAddress, UUID callerUuid, UUID targetUuid) {
        clusterServiceLock.lock();
        try {
            if (!isJoined()) {
                logger.warning("Not updating members received from caller: " + callerAddress + " because node is not joined! ");
                return false;
            }

            if (!checkValidMaster(callerAddress)) {
                logger.warning("Not updating members because caller: " + callerAddress + " is not known master: "
                        + getMasterAddress());
                MembersViewMetadata callerMembersViewMetadata = new MembersViewMetadata(callerAddress, callerUuid,
                        callerAddress, membersView.getVersion());
                if (!clusterJoinManager.isMastershipClaimInProgress()) {
                    sendExplicitSuspicion(callerMembersViewMetadata);
                }
                return false;
            }

            checkMemberUpdateContainsLocalMember(membersView, targetUuid);

            if (!shouldProcessMemberUpdate(membersView)) {
                return false;
            }

            membershipManager.updateMembers(membersView);
            return true;
        } finally {
            clusterServiceLock.unlock();
        }
    }

    private void checkMemberUpdateContainsLocalMember(MembersView membersView, UUID targetUuid) {
        UUID thisUuid = getThisUuid();
        if (!thisUuid.equals(targetUuid)) {
            String msg = "Not applying member update because target uuid: " + targetUuid + " is different! -> " + membersView
                    + ", local member: " + localMember;
            throw new IllegalArgumentException(msg);
        }

        Member localMember = getLocalMember();
        if (!membersView.containsMember(localMember.getAddress(), localMember.getUuid())) {
            String msg = "Not applying member update because member list doesn't contain us! -> " + membersView
                    + ", local member: " + localMember;
            throw new IllegalArgumentException(msg);
        }
    }

    private boolean checkValidMaster(Address callerAddress) {
        return (callerAddress != null && callerAddress.equals(getMasterAddress()));
    }

    private boolean shouldProcessMemberUpdate(MembersView membersView) {
        int memberListVersion = membershipManager.getMemberListVersion();
        if (memberListVersion > membersView.getVersion()) {
            if (logger.isFineEnabled()) {
                logger.fine("Received an older member update, ignoring... Current version: "
                        + memberListVersion + ", Received version: " + membersView.getVersion());
            }

            return false;
        }

        if (memberListVersion == membersView.getVersion()) {
            if (ASSERTION_ENABLED) {
                MemberMap memberMap = membershipManager.getMemberMap();
                Collection
currentAddresses = memberMap.getAddresses(); Collection
newAddresses = membersView.getAddresses(); assert currentAddresses.size() == newAddresses.size() && newAddresses.containsAll(currentAddresses) : "Member view versions are same but new member view doesn't match the current!" + " Current: " + memberMap.toMembersView() + ", New: " + membersView; } if (logger.isFineEnabled()) { logger.fine("Received a periodic member update, ignoring... Version: " + memberListVersion); } return false; } return true; } @Override public void connectionAdded(Connection connection) { } @Override public void connectionRemoved(Connection connection) { if (logger.isFineEnabled()) { logger.fine("Removed connection to " + connection.getRemoteAddress()); } if (!isJoined()) { Address masterAddress = getMasterAddress(); if (masterAddress != null && masterAddress.equals(connection.getRemoteAddress())) { setMasterAddressToJoin(null); } } } public NodeEngineImpl getNodeEngine() { return nodeEngine; } @Override public boolean isMissingMember(Address address, UUID uuid) { return membershipManager.isMissingMember(address, uuid); } public Collection getActiveAndMissingMembers() { return membershipManager.getActiveAndMissingMembers(); } public void notifyForRemovedMember(MemberImpl member) { clusterServiceLock.lock(); try { membershipManager.onMemberRemove(member); } finally { clusterServiceLock.unlock(); } } public void shrinkMissingMembers(Collection memberUuidsToRemove) { membershipManager.shrinkMissingMembers(memberUuidsToRemove); } @Override public MemberImpl getMember(Address address) { if (address == null) { return null; } return membershipManager.getMember(address); } @Override public MemberImpl getMember(UUID uuid) { if (uuid == null) { return null; } return membershipManager.getMember(uuid); } @Override public MemberImpl getMember(Address address, UUID uuid) { if (address == null || uuid == null) { return null; } return membershipManager.getMember(address, uuid); } @Override @Nonnull public Collection getMemberImpls() { return membershipManager.getMembers(); } public Collection
getMemberAddresses() { return membershipManager.getMemberMap().getAddresses(); } @Override @Nonnull public Set getMembers() { return membershipManager.getMemberSet(); } @Override public Collection getMembers(MemberSelector selector) { return (Collection) new MemberSelectingCollection(membershipManager.getMembers(), selector); } @Override public void shutdown(boolean terminate) { clearInternalState(); } private void clearInternalState() { clusterServiceLock.lock(); try { membershipManager.reset(); clusterHeartbeatManager.reset(); clusterStateManager.reset(); clusterJoinManager.reset(); resetJoinState(); } finally { clusterServiceLock.unlock(); } } public boolean setMasterAddressToJoin(final Address master) { clusterServiceLock.lock(); try { if (isJoined()) { Address currentMasterAddress = getMasterAddress(); if (!currentMasterAddress.equals(master)) { logger.warning("Cannot set master address to " + master + " because node is already joined! Current master: " + currentMasterAddress); } else if (logger.isFineEnabled()) { logger.fine("Master address is already set to " + master); } return false; } setMasterAddress(master); return true; } finally { clusterServiceLock.unlock(); } } // should be called under lock void setMasterAddress(Address master) { assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; if (logger.isFineEnabled()) { logger.fine("Setting master address to " + master); } masterAddress = master; joined.getAndUpdate(holder -> new JoinHolder(holder.isJoined)).latch.countDown(); } @Override public Address getMasterAddress() { return masterAddress; } @Override public boolean isMaster() { return node.getThisAddress().equals(masterAddress); } @Override @Nonnull public Address getThisAddress() { return node.getThisAddress(); } @Override @Nonnull public UUID getThisUuid() { return node.getThisUuid(); } @Override @Nonnull public MemberImpl getLocalMember() { return localMember; } // should be called under lock void setJoined(boolean val) { assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; joined.getAndUpdate(holder -> new JoinHolder(val)).latch.countDown(); joinedBefore.compareAndSet(false, val); if (!node.getNodeExtension().getInternalHotRestartService().isStartCompleted()) { // Hot restart can reset join state. We should allow it to reset joinBefore // because a member which didn't complete hot restart is more similar to a // member which never joined before. Because that member's nodeEngine can't // return true to nodeEngine.isStartCompleted() call. joinedBefore.set(val); } } @Override public boolean isJoined() { return joined.get().isJoined; } @Override public boolean isJoinedBefore() { return joinedBefore.get(); } @Probe(name = CLUSTER_METRIC_CLUSTER_SERVICE_SIZE) @Override public int getSize() { return membershipManager.getMemberMap().size(); } @Override public int getSize(MemberSelector selector) { int size = 0; for (MemberImpl member : membershipManager.getMembers()) { if (selector.select(member)) { size++; } } return size; } @Override @Nonnull public ClusterClockImpl getClusterClock() { return clusterClock; } @Override public long getClusterTime() { return clusterClock.getClusterTime(); } @Override public UUID getClusterId() { return clusterId; } // called under cluster service lock void setClusterId(UUID newClusterId) { assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; assert clusterId == null : "Cluster ID should be null: " + clusterId; clusterId = newClusterId; } // called under cluster service lock private void resetClusterId() { assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; clusterId = null; } @Nonnull public UUID addMembershipListener(@Nonnull MembershipListener listener) { checkNotNull(listener, "listener cannot be null"); EventService eventService = nodeEngine.getEventService(); EventRegistration registration; if (listener instanceof InitialMembershipListener membershipListener) { clusterServiceLock.lock(); try { membershipListener.init(new InitialMembershipEvent(this, getMembers())); registration = eventService.registerLocalListener(SERVICE_NAME, SERVICE_NAME, listener); } finally { clusterServiceLock.unlock(); } } else { registration = eventService.registerLocalListener(SERVICE_NAME, SERVICE_NAME, listener); } return registration.getId(); } public boolean removeMembershipListener(@Nonnull UUID registrationId) { checkNotNull(registrationId, "registrationId cannot be null"); EventService eventService = nodeEngine.getEventService(); return eventService.deregisterListener(SERVICE_NAME, SERVICE_NAME, registrationId); } @SuppressFBWarnings("BC_UNCONFIRMED_CAST") @Override public void dispatchEvent(MembershipEvent event, MembershipListener listener) { // Call with `null` namespace, which will fallback to a default Namespace if available NamespaceUtil.runWithNamespace(nodeEngine, null, () -> { switch (event.getEventType()) { case MembershipEvent.MEMBER_ADDED: listener.memberAdded(event); break; case MembershipEvent.MEMBER_REMOVED: listener.memberRemoved(event); break; default: throw new IllegalArgumentException("Unhandled event: " + event); } }); } public String getMemberListString() { return membershipManager.memberListString(); } void printMemberList() { logger.info(getMemberListString()); } @Nonnull @Override public ClusterState getClusterState() { return clusterStateManager.getState(); } @Override public T createTransactionalObject(String name, Transaction transaction) { throw new UnsupportedOperationException(SERVICE_NAME + " does not support TransactionalObjects!"); } @Override public void rollbackTransaction(UUID transactionId) { clusterStateManager.rollbackClusterState(transactionId); } @Override public void changeClusterState(@Nonnull ClusterState newState) { checkNotNull(newState, STATE_MUST_NOT_BE_NULL); changeClusterState(newState, false); } public void changeClusterState(ClusterState newState, boolean isTransient) { long partitionStateStamp = getPartitionStateStamp(); clusterStateManager.changeClusterState(ClusterStateChange.from(newState), membershipManager.getMemberMap(), partitionStateStamp, isTransient); } @Override public void changeClusterState(@Nonnull ClusterState newState, @Nonnull TransactionOptions options) { checkNotNull(newState, STATE_MUST_NOT_BE_NULL); checkNotNull(options, TRANSACTION_OPTIONS_MUST_NOT_BE_NULL); changeClusterState(newState, options, false); } private void changeClusterState(@Nonnull ClusterState newState, @Nonnull TransactionOptions options, boolean isTransient) { long partitionStateStamp = getPartitionStateStamp(); clusterStateManager.changeClusterState(ClusterStateChange.from(newState), membershipManager.getMemberMap(), options, partitionStateStamp, isTransient); } @Override @Nonnull public Version getClusterVersion() { return clusterStateManager.getClusterVersion(); } @Override public HotRestartService getHotRestartService() { return node.getNodeExtension().getHotRestartService(); } @Override @Nonnull public PersistenceService getPersistenceService() { return node.getNodeExtension().getHotRestartService(); } @Override public void changeClusterVersion(@Nonnull Version version) { checkNotNull(version, VERSION_MUST_NOT_BE_NULL); MemberMap memberMap = membershipManager.getMemberMap(); changeClusterVersion(version, memberMap); } public void changeClusterVersion(@Nonnull Version version, @Nonnull MemberMap memberMap) { long partitionStateStamp = getPartitionStateStamp(); clusterStateManager.changeClusterState(ClusterStateChange.from(version), memberMap, partitionStateStamp, false); } @Override public void changeClusterVersion(@Nonnull Version version, @Nonnull TransactionOptions options) { checkNotNull(version, VERSION_MUST_NOT_BE_NULL); checkNotNull(options, TRANSACTION_OPTIONS_MUST_NOT_BE_NULL); long partitionStateStamp = getPartitionStateStamp(); clusterStateManager.changeClusterState(ClusterStateChange.from(version), membershipManager.getMemberMap(), options, partitionStateStamp, false); } private long getPartitionStateStamp() { return node.getPartitionService().getPartitionStateStamp(); } @Override public int getMemberListJoinVersion() { clusterServiceLock.lock(); try { if (!isJoined()) { throw new IllegalStateException("Member list join version is not available when not joined"); } int joinVersion = localMember.getMemberListJoinVersion(); if (joinVersion == NA_MEMBER_LIST_JOIN_VERSION) { // This can happen when the cluster was just upgraded to 3.10, but this member did not yet learn // its node ID by an async call from master. throw new IllegalStateException("Member list join version is not yet available"); } return joinVersion; } finally { clusterServiceLock.unlock(); } } @Override public void shutdown() { shutdownCluster(null); } @Override public void shutdown(@Nullable TransactionOptions options) { shutdownCluster(options); } private void shutdownCluster(TransactionOptions options) { if (options == null) { changeClusterState(ClusterState.PASSIVE, true); } else { changeClusterState(ClusterState.PASSIVE, options, true); } node.getNodeExtension().getAuditlogService().eventBuilder(AuditlogTypeIds.CLUSTER_SHUTDOWN) .message("Shutting down the cluster") .log(); long timeoutNanos = node.getProperties().getNanos(ClusterProperty.CLUSTER_SHUTDOWN_TIMEOUT_SECONDS); long startNanos = Timer.nanos(); node.getNodeExtension().getInternalHotRestartService() .waitPartitionReplicaSyncOnCluster(timeoutNanos, TimeUnit.NANOSECONDS); timeoutNanos -= (Timer.nanosElapsed(startNanos)); if (node.config.getCPSubsystemConfig().getCPMemberCount() == 0) { shutdownNodesConcurrently(timeoutNanos); } else { shutdownNodesSerially(timeoutNanos); } } private void shutdownNodesConcurrently(final long timeoutNanos) { Operation op = new ShutdownNodeOp(); Collection members = getMembers(NON_LOCAL_MEMBER_SELECTOR); long startTimeNanos = Timer.nanos(); logger.info("Sending shut down operations to all members..."); while (Timer.nanosElapsed(startTimeNanos) < timeoutNanos && !members.isEmpty()) { for (Member member : members) { nodeEngine.getOperationService().send(op, member.getAddress()); } try { Thread.sleep(CLUSTER_SHUTDOWN_SLEEP_DURATION_IN_MILLIS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); logger.warning("Shutdown sleep interrupted. ", e); break; } members = getMembers(NON_LOCAL_MEMBER_SELECTOR); } logger.info("Number of other members remaining: " + getSize(NON_LOCAL_MEMBER_SELECTOR) + ". Shutting down itself."); HazelcastInstanceImpl hazelcastInstance = node.hazelcastInstance; hazelcastInstance.getLifecycleService().shutdown(); } private void shutdownNodesSerially(final long timeoutNanos) { Operation op = new ShutdownNodeOp(); long startTimeNanos = Timer.nanos(); Collection members = getMembers(NON_LOCAL_MEMBER_SELECTOR); logger.info("Sending shut down operations to other members one by one..."); while (Timer.nanosElapsed(startTimeNanos) < timeoutNanos && !members.isEmpty()) { Member member = members.iterator().next(); nodeEngine.getOperationService().send(op, member.getAddress()); members = getMembers(NON_LOCAL_MEMBER_SELECTOR); try { Thread.sleep(CLUSTER_SHUTDOWN_SLEEP_DURATION_IN_MILLIS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); logger.warning("Shutdown sleep interrupted. ", e); break; } } logger.info("Number of other members remaining: " + getSize(NON_LOCAL_MEMBER_SELECTOR) + ". Shutting down itself."); HazelcastInstanceImpl hazelcastInstance = node.hazelcastInstance; hazelcastInstance.getLifecycleService().shutdown(); } private void initialClusterState(ClusterState clusterState, Version version) { if (isJoined()) { throw new IllegalStateException("Cannot set initial state after node joined! -> " + clusterState); } clusterStateManager.initialClusterState(clusterState, version); } public MembershipManager getMembershipManager() { return membershipManager; } public ClusterStateManager getClusterStateManager() { return clusterStateManager; } public ClusterJoinManager getClusterJoinManager() { return clusterJoinManager; } public ClusterHeartbeatManager getClusterHeartbeatManager() { return clusterHeartbeatManager; } @Override public void promoteLocalLiteMember() { MemberImpl member = getLocalMember(); if (!member.isLiteMember()) { throw new IllegalStateException(member + " is not a lite member!"); } MemberImpl master = getMasterMember(); PromoteLiteMemberOp op = new PromoteLiteMemberOp(); op.setCallerUuid(member.getUuid()); InvocationFuture future = nodeEngine.getOperationService().invokeOnTarget(SERVICE_NAME, op, master.getAddress()); MembersView view = future.joinInternal(); clusterServiceLock.lock(); try { if (!member.getAddress().equals(master.getAddress())) { updateMembers(view, master.getAddress(), master.getUuid(), getThisUuid()); } MemberImpl localMemberInMemberList = membershipManager.getMember(member.getAddress()); boolean isStillLiteMember = localMemberInMemberList.isLiteMember(); node.getNodeExtension().getAuditlogService().eventBuilder(AuditlogTypeIds.CLUSTER_PROMOTE_MEMBER) .message("Promotion of the lite member") .addParameter("success", !isStillLiteMember) .addParameter("address", node.getThisAddress()) .log(); if (isStillLiteMember) { throw new IllegalStateException("Cannot promote to data member! Previous master was: " + master.getAddress() + ", Current master is: " + getMasterAddress()); } } finally { clusterServiceLock.unlock(); } } MemberImpl promoteAndGetLocalMember() { MemberImpl member = getLocalMember(); assert member.isLiteMember() : "Local member is not lite member!"; assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; localMember = new MemberImpl.Builder(member.getAddressMap()) .version(member.getVersion()) .localMember(true) .uuid(member.getUuid()) .attributes(member.getAttributes()) .memberListJoinVersion(member.getMemberListJoinVersion()) .instance(node.hazelcastInstance) .build(); node.loggingService.setThisMember(localMember); return localMember; } MemberImpl demoteAndGetLocalMember() { MemberImpl member = getLocalMember(); assert !member.isLiteMember() : "Local member is not data member!"; assert clusterServiceLock.isHeldByCurrentThread() : "Called without holding cluster service lock!"; localMember = new MemberImpl.Builder(member.getAddressMap()) .version(member.getVersion()) .localMember(true) .uuid(member.getUuid()) .attributes(member.getAttributes()) .memberListJoinVersion(member.getMemberListJoinVersion()) .instance(node.hazelcastInstance) .liteMember(true) .build(); node.loggingService.setThisMember(localMember); return localMember; } @Override public void demoteLocalDataMember() { if (getClusterVersion().isUnknownOrLessThan(Versions.V5_4)) { throw new UnsupportedOperationException("demoteLocalDataMember requires cluster version 5.4 or greater"); } MemberImpl member = getLocalMember(); if (member.isLiteMember()) { throw new IllegalStateException(member + " is not a data member!"); } MemberImpl master = getMasterMember(); long maxWaitSeconds = node.getProperties().getSeconds(ClusterProperty.DEMOTE_MAX_WAIT); if (!nodeEngine.getPartitionService().onDemote(maxWaitSeconds, SECONDS)) { throw new IllegalStateException("Cannot demote to lite member! Previous master was: " + master.getAddress() + ", Current master is: " + getMasterAddress() + ". Cluster state is " + getClusterState()); } DemoteDataMemberOp op = new DemoteDataMemberOp(); op.setCallerUuid(member.getUuid()); InvocationFuture future = nodeEngine.getOperationService().invokeOnMaster(SERVICE_NAME, op); MembersViewResponse response = future.joinInternal(); clusterServiceLock.lock(); try { if (!node.isMaster()) { updateMembers(response.getMembersView(), response.getMemberAddress(), response.getMemberUuid(), getThisUuid()); } MemberImpl localMemberInMemberList = membershipManager.getMember(member.getAddress()); boolean isNowLiteMember = localMemberInMemberList.isLiteMember(); node.getNodeExtension().getAuditlogService().eventBuilder(AuditlogTypeIds.CLUSTER_DEMOTE_MEMBER) .message("Demotion of the data member") .addParameter("success", isNowLiteMember) .addParameter("address", node.getThisAddress()) .log(); if (!isNowLiteMember) { throw new IllegalStateException("Cannot demote to lite member! Previous master was: " + master.getAddress() + ", Current master is: " + getMasterAddress()); } } finally { clusterServiceLock.unlock(); } } @Override public int getMemberListVersion() { return membershipManager.getMemberListVersion(); } private MemberImpl getMasterMember() { MemberImpl master; clusterServiceLock.lock(); try { Address masterAddress = getMasterAddress(); if (masterAddress == null) { throw new IllegalStateException("Master is not known yet!"); } master = getMember(masterAddress); } finally { clusterServiceLock.unlock(); } return master; } @Override public String toString() { return "ClusterService" + "{address=" + getThisAddress() + '}'; } /** * @param timeoutMillis the maximum time in millis to block on join * @return true is cluster has been joined, false if timed out * @throws InterruptedException */ public boolean blockOnJoin(long timeoutMillis) throws InterruptedException { return joined.get().latch.await(timeoutMillis, TimeUnit.MILLISECONDS); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy