All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.cluster.impl.ClusterJoinManager Maven / Gradle / Ivy

There is a newer version: 5.4.0
Show newest version
/*
 * Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.cluster.impl;

import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.MemberInfo;
import com.hazelcast.cluster.impl.operations.AuthenticationFailureOperation;
import com.hazelcast.cluster.impl.operations.BeforeJoinCheckFailureOperation;
import com.hazelcast.cluster.impl.operations.ConfigMismatchOperation;
import com.hazelcast.cluster.impl.operations.FinalizeJoinOperation;
import com.hazelcast.cluster.impl.operations.GroupMismatchOperation;
import com.hazelcast.cluster.impl.operations.JoinRequestOperation;
import com.hazelcast.cluster.impl.operations.MasterDiscoveryOperation;
import com.hazelcast.cluster.impl.operations.MemberInfoUpdateOperation;
import com.hazelcast.cluster.impl.operations.PostJoinOperation;
import com.hazelcast.cluster.impl.operations.SetMasterOperation;
import com.hazelcast.instance.BuildInfo;
import com.hazelcast.instance.GroupProperty;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.instance.Node;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.nio.Connection;
import com.hazelcast.nio.Packet;
import com.hazelcast.partition.PartitionRuntimeState;
import com.hazelcast.security.Credentials;
import com.hazelcast.spi.Operation;
import com.hazelcast.spi.OperationService;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.util.Clock;
import com.hazelcast.util.FutureUtil;

import javax.security.auth.login.LoginContext;
import javax.security.auth.login.LoginException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.logging.Level;

import static com.hazelcast.cluster.impl.ClusterServiceImpl.createMemberInfoList;
import static com.hazelcast.cluster.impl.operations.FinalizeJoinOperation.FINALIZE_JOIN_MAX_TIMEOUT;
import static com.hazelcast.cluster.impl.operations.FinalizeJoinOperation.FINALIZE_JOIN_TIMEOUT_FACTOR;
import static com.hazelcast.util.FutureUtil.logAllExceptions;
import static com.hazelcast.util.FutureUtil.waitWithDeadline;
import static com.hazelcast.util.Preconditions.checkNotNull;
import static java.lang.String.format;

/**
 * ClusterJoinManager manages member join process.
 * 

* If this node is not master, * then it will answer with sending master node's address to a join request. *

* If this is master node, it will handle join request and notify all other members * about newly joined member. */ public class ClusterJoinManager { private static final int CLUSTER_OPERATION_RETRY_COUNT = 100; private final ILogger logger; private final Node node; private final NodeEngineImpl nodeEngine; private final ClusterServiceImpl clusterService; private final Lock clusterServiceLock; private final ClusterClockImpl clusterClock; private final ClusterStateManager clusterStateManager; private final Set setJoins = new LinkedHashSet(100); private final long maxWaitMillisBeforeJoin; private final long waitMillisBeforeJoin; private final FutureUtil.ExceptionHandler whileFinalizeJoinsExceptionHandler; private long firstJoinRequest; private long timeToStartJoin; private boolean joinInProgress; public ClusterJoinManager(Node node, ClusterServiceImpl clusterService, Lock clusterServiceLock) { this.node = node; this.clusterService = clusterService; this.clusterServiceLock = clusterServiceLock; this.nodeEngine = clusterService.getNodeEngine(); logger = node.getLogger(getClass()); clusterStateManager = clusterService.getClusterStateManager(); clusterClock = clusterService.getClusterClock(); maxWaitMillisBeforeJoin = node.groupProperties.getMillis(GroupProperty.MAX_WAIT_SECONDS_BEFORE_JOIN); waitMillisBeforeJoin = node.groupProperties.getMillis(GroupProperty.WAIT_SECONDS_BEFORE_JOIN); whileFinalizeJoinsExceptionHandler = logAllExceptions(logger, "While waiting finalize join calls...", Level.WARNING); } public boolean isJoinInProgress() { if (joinInProgress) { return true; } clusterServiceLock.lock(); try { return joinInProgress || !setJoins.isEmpty(); } finally { clusterServiceLock.unlock(); } } public void handleJoinRequest(JoinRequest joinRequest, Connection connection) { if (!ensureNodeIsReady()) { return; } if (!ensureValidConfiguration(joinRequest)) { return; } Address target = joinRequest.getAddress(); boolean isRequestFromCurrentMaster = target.equals(node.getMasterAddress()); // if the join request from current master, do not send a master answer, // because master can somehow dropped its connection and wants to join back if (!node.isMaster() && !isRequestFromCurrentMaster) { sendMasterAnswer(target); return; } if (joinInProgress) { if (logger.isFinestEnabled()) { logger.finest(format("Join is in progress, cannot handle join request from %s at the moment", target)); } return; } executeJoinRequest(joinRequest, connection, target); } private boolean ensureNodeIsReady() { if (node.joined() && node.isRunning()) { return true; } if (logger.isFinestEnabled()) { logger.finest("Node is not ready to process join request..."); } return false; } private boolean ensureValidConfiguration(JoinMessage joinMessage) { Address address = joinMessage.getAddress(); try { if (isValidJoinMessage(joinMessage)) { return true; } logger.warning(format("Received an invalid join request from %s, cause: clusters part of different cluster-groups", address)); nodeEngine.getOperationService().send(new GroupMismatchOperation(), address); } catch (ConfigMismatchException e) { logger.warning(format("Received an invalid join request from %s, cause: %s", address, e.getMessage())); OperationService operationService = nodeEngine.getOperationService(); operationService.send(new ConfigMismatchOperation(e.getMessage()), address); } return false; } private boolean isValidJoinMessage(JoinMessage joinMessage) { try { return validateJoinMessage(joinMessage); } catch (ConfigMismatchException e) { throw e; } catch (Exception e) { return false; } } public boolean validateJoinMessage(JoinMessage joinMessage) throws Exception { if (joinMessage.getPacketVersion() != Packet.VERSION) { return false; } try { ConfigCheck newMemberConfigCheck = joinMessage.getConfigCheck(); ConfigCheck clusterConfigCheck = node.createConfigCheck(); return clusterConfigCheck.isCompatible(newMemberConfigCheck); } catch (Exception e) { logger.warning(format("Invalid join request from %s, cause: %s", joinMessage.getAddress(), e.getMessage())); throw e; } } private void executeJoinRequest(JoinRequest joinRequest, Connection connection, Address target) { clusterServiceLock.lock(); try { if (checkIfJoinRequestFromAnExistingMember(joinRequest, connection)) { return; } if (checkClusterStateBeforeJoin(target)) { return; } long now = Clock.currentTimeMillis(); if (logger.isFinestEnabled()) { String timeToStart = (timeToStartJoin > 0 ? ", timeToStart: " + (timeToStartJoin - now) : ""); logger.finest(format("Handling join from %s, joinInProgress: %b%s", target, joinInProgress, timeToStart)); } MemberInfo memberInfo = getMemberInfo(joinRequest, target); if (memberInfo == null) { return; } if (!validateJoinRequest(target)) { return; } startJoinRequest(target, now, memberInfo); } finally { clusterServiceLock.unlock(); } } private boolean checkClusterStateBeforeJoin(Address target) { if (clusterStateManager.getState() == ClusterState.IN_TRANSITION) { String message = "Cluster state either is in transition process. Join is not allowed for now -> " + clusterStateManager.stateToString(); logger.warning(message); OperationService operationService = nodeEngine.getOperationService(); operationService.send(new BeforeJoinCheckFailureOperation(message), target); return true; } else if (clusterStateManager.getState() != ClusterState.ACTIVE) { if (!clusterService.isMemberRemovedWhileClusterIsNotActive(target)) { String message = "Cluster state either is locked or doesn't allow new members to join -> " + clusterStateManager.stateToString(); logger.warning(message); OperationService operationService = nodeEngine.getOperationService(); operationService.send(new BeforeJoinCheckFailureOperation(message), target); return true; } } return false; } private MemberInfo getMemberInfo(JoinRequest joinRequest, Address target) { MemberInfo memberInfo = joinRequest.toMemberInfo(); if (!setJoins.contains(memberInfo)) { try { checkSecureLogin(joinRequest, memberInfo); } catch (Exception e) { ILogger securityLogger = node.loggingService.getLogger("com.hazelcast.security"); nodeEngine.getOperationService().send(new AuthenticationFailureOperation(), target); securityLogger.severe(e); return null; } } return memberInfo; } private void checkSecureLogin(JoinRequest joinRequest, MemberInfo newMemberInfo) { if (node.securityContext != null && !setJoins.contains(newMemberInfo)) { Credentials credentials = joinRequest.getCredentials(); if (credentials == null) { throw new SecurityException("Expecting security credentials, but credentials could not be found in join request"); } try { LoginContext loginContext = node.securityContext.createMemberLoginContext(credentials); loginContext.login(); } catch (LoginException e) { throw new SecurityException(format("Authentication has failed for %s@%s, cause: %s", credentials.getPrincipal(), credentials.getEndpoint(), e.getMessage())); } } } private boolean validateJoinRequest(Address target) { if (node.isMaster()) { try { node.getNodeExtension().validateJoinRequest(); } catch (Exception e) { logger.warning(e.getMessage()); nodeEngine.getOperationService().send(new BeforeJoinCheckFailureOperation(e.getMessage()), target); return false; } } return true; } private void startJoinRequest(Address target, long now, MemberInfo memberInfo) { if (firstJoinRequest == 0) { firstJoinRequest = now; } if (setJoins.add(memberInfo)) { sendMasterAnswer(target); if (now - firstJoinRequest < maxWaitMillisBeforeJoin) { timeToStartJoin = now + waitMillisBeforeJoin; } } if (now >= timeToStartJoin) { startJoin(); } } public boolean sendJoinRequest(Address toAddress, boolean withCredentials) { if (toAddress == null) { toAddress = node.getMasterAddress(); } JoinRequestOperation joinRequest = new JoinRequestOperation(node.createJoinRequest(withCredentials)); return nodeEngine.getOperationService().send(joinRequest, toAddress); } public void handleMaster(Address masterAddress, Address callerAddress) { if (node.joined()) { if (logger.isFinestEnabled()) { logger.finest(format("Ignoring master response %s from %s, this node is already joined", masterAddress, callerAddress)); } return; } if (node.getThisAddress().equals(masterAddress)) { if (node.isMaster()) { logger.finest(format("Ignoring master response %s from %s, this node is already master", masterAddress, callerAddress)); } else { node.setAsMaster(); } return; } handleMasterResponse(masterAddress, callerAddress); } private void handleMasterResponse(Address masterAddress, Address callerAddress) { clusterServiceLock.lock(); try { if (logger.isFinestEnabled()) { logger.finest(format("Handling master response %s from %s", masterAddress, callerAddress)); } Address currentMaster = node.getMasterAddress(); if (currentMaster == null || currentMaster.equals(masterAddress)) { setMasterAndJoin(masterAddress); return; } if (currentMaster.equals(callerAddress)) { logger.info(format("Setting master to %s since %s says it is not master anymore", masterAddress, currentMaster)); setMasterAndJoin(masterAddress); return; } Connection conn = node.connectionManager.getConnection(currentMaster); if (conn != null && conn.isAlive()) { logger.info(format("Ignoring master response %s from %s since this node has an active master %s", masterAddress, callerAddress, currentMaster)); sendJoinRequest(currentMaster, true); } else { logger.warning(format("Ambiguous master response: This node has a master %s, but does not have a connection" + " to %s. Sent master response as %s. Master field will be unset now...", currentMaster, callerAddress, masterAddress)); node.setMasterAddress(null); } } finally { clusterServiceLock.unlock(); } } private void setMasterAndJoin(Address masterAddress) { node.setMasterAddress(masterAddress); node.connectionManager.getOrConnect(masterAddress); if (!sendJoinRequest(masterAddress, true)) { logger.warning("Could not create connection to possible master " + masterAddress); } } public boolean sendMasterQuestion(Address toAddress) { checkNotNull(toAddress, "No endpoint is specified!"); BuildInfo buildInfo = node.getBuildInfo(); final Address thisAddress = node.getThisAddress(); JoinMessage joinMessage = new JoinMessage(Packet.VERSION, buildInfo.getBuildNumber(), thisAddress, node.getLocalMember().getUuid(), node.isLiteMember(), node.createConfigCheck()); return nodeEngine.getOperationService().send(new MasterDiscoveryOperation(joinMessage), toAddress); } public void answerMasterQuestion(JoinMessage joinMessage, Connection connection) { if (!ensureValidConfiguration(joinMessage)) { return; } if (node.getMasterAddress() != null) { if (!checkIfJoinRequestFromAnExistingMember(joinMessage, connection)) { sendMasterAnswer(joinMessage.getAddress()); } } else { if (logger.isFinestEnabled()) { logger.finest(format("Received a master question from %s," + " but this node is not master itself or doesn't have a master yet!", joinMessage.getAddress())); } } } private void sendMasterAnswer(Address target) { Address masterAddress = node.getMasterAddress(); if (masterAddress == null) { logger.info(format("Cannot send master answer to %s since master node is not known yet", target)); return; } SetMasterOperation op = new SetMasterOperation(masterAddress); nodeEngine.getOperationService().send(op, target); } boolean checkIfJoinRequestFromAnExistingMember(JoinMessage joinMessage, Connection connection) { MemberImpl member = clusterService.getMember(joinMessage.getAddress()); if (member == null) { return false; } Address target = member.getAddress(); if (joinMessage.getUuid().equals(member.getUuid())) { if (node.isMaster()) { if (logger.isFinestEnabled()) { logger.finest(format("Ignoring join request, member already exists: %s", joinMessage)); } // send members update back to node trying to join again... Operation[] postJoinOps = nodeEngine.getPostJoinOperations(); boolean isPostJoinOperation = postJoinOps != null && postJoinOps.length > 0; PostJoinOperation postJoinOp = isPostJoinOperation ? new PostJoinOperation(postJoinOps) : null; Operation operation = new FinalizeJoinOperation(createMemberInfoList(clusterService.getMemberImpls()), postJoinOp, clusterClock.getClusterTime(), clusterStateManager.getState(), node.partitionService.createPartitionState(), false); nodeEngine.getOperationService().send(operation, target); } else { sendMasterAnswer(target); } return true; } // remove old member and process join request: // - if this node is master OR // - if requesting address is equal to master node's address, that means the master node somehow disconnected // and wants to join back, so drop old member and process join request if this node becomes master if (node.isMaster() || target.equals(node.getMasterAddress())) { logger.warning(format("New join request has been received from an existing endpoint %s." + " Removing old member and processing join request...", member)); clusterService.doRemoveAddress(target, false); Connection existing = node.connectionManager.getConnection(target); if (existing != connection) { node.connectionManager.destroyConnection(existing); node.connectionManager.registerConnection(target, connection); } return false; } return true; } private void startJoin() { logger.finest("Starting join..."); clusterServiceLock.lock(); try { try { joinInProgress = true; // pause migrations until join, member-update and post-join operations are completed node.getPartitionService().pauseMigration(); Collection members = clusterService.getMemberImpls(); Collection memberInfos = createMemberInfoList(members); for (MemberInfo memberJoining : setJoins) { memberInfos.add(memberJoining); } long time = clusterClock.getClusterTime(); // post join operations must be lock free, that means no locks at all: // no partition locks, no key-based locks, no service level locks! Operation[] postJoinOps = nodeEngine.getPostJoinOperations(); boolean createPostJoinOperation = (postJoinOps != null && postJoinOps.length > 0); PostJoinOperation postJoinOp = (createPostJoinOperation ? new PostJoinOperation(postJoinOps) : null); int count = members.size() - 1 + setJoins.size(); List calls = new ArrayList(count); PartitionRuntimeState partitionState = node.partitionService.createPartitionState(); for (MemberInfo member : setJoins) { long startTime = clusterClock.getClusterStartTime(); Operation joinOperation = new FinalizeJoinOperation(memberInfos, postJoinOp, time, clusterService.getClusterId(), startTime, clusterStateManager.getState(), partitionState); calls.add(invokeClusterOperation(joinOperation, member.getAddress())); } for (MemberImpl member : members) { if (!member.getAddress().equals(clusterService.getThisAddress())) { Operation infoUpdateOperation = new MemberInfoUpdateOperation(memberInfos, time, true); calls.add(invokeClusterOperation(infoUpdateOperation, member.getAddress())); } } clusterService.updateMembers(memberInfos); int timeout = Math.min(calls.size() * FINALIZE_JOIN_TIMEOUT_FACTOR, FINALIZE_JOIN_MAX_TIMEOUT); waitWithDeadline(calls, timeout, TimeUnit.SECONDS, whileFinalizeJoinsExceptionHandler); } finally { node.getPartitionService().resumeMigration(); } } finally { clusterServiceLock.unlock(); } } private Future invokeClusterOperation(Operation op, Address target) { return nodeEngine.getOperationService() .createInvocationBuilder(ClusterServiceImpl.SERVICE_NAME, op, target) .setTryCount(CLUSTER_OPERATION_RETRY_COUNT).invoke(); } void reset() { clusterServiceLock.lock(); try { joinInProgress = false; setJoins.clear(); timeToStartJoin = Clock.currentTimeMillis() + waitMillisBeforeJoin; firstJoinRequest = 0; } finally { clusterServiceLock.unlock(); } } void removeJoin(MemberInfo memberInfo) { setJoins.remove(memberInfo); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy