org.elasticsearch.cluster.coordination.PreVoteCollector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster.coordination;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationState.VoteCollection;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.monitor.NodeHealthService;
import org.elasticsearch.monitor.StatusInfo;
import org.elasticsearch.threadpool.ThreadPool.Names;
import org.elasticsearch.transport.TransportException;
import org.elasticsearch.transport.TransportResponseHandler;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.LongConsumer;
import java.util.stream.StreamSupport;
import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap;
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
public class PreVoteCollector {
private static final Logger logger = LogManager.getLogger(PreVoteCollector.class);
public static final String REQUEST_PRE_VOTE_ACTION_NAME = "internal:cluster/request_pre_vote";
private final TransportService transportService;
private final Runnable startElection;
private final LongConsumer updateMaxTermSeen;
private final ElectionStrategy electionStrategy;
private NodeHealthService nodeHealthService;
// Tuple for simple atomic updates. null until the first call to `update()`.
private volatile Tuple state; // DiscoveryNode component is null if there is currently no known leader.
PreVoteCollector(
final TransportService transportService,
final Runnable startElection,
final LongConsumer updateMaxTermSeen,
final ElectionStrategy electionStrategy,
NodeHealthService nodeHealthService
) {
this.transportService = transportService;
this.startElection = startElection;
this.updateMaxTermSeen = updateMaxTermSeen;
this.electionStrategy = electionStrategy;
this.nodeHealthService = nodeHealthService;
transportService.registerRequestHandler(
REQUEST_PRE_VOTE_ACTION_NAME,
Names.GENERIC,
false,
false,
PreVoteRequest::new,
(request, channel, task) -> channel.sendResponse(handlePreVoteRequest(request))
);
}
/**
* Start a new pre-voting round.
*
* @param clusterState the last-accepted cluster state
* @param broadcastNodes the nodes from whom to request pre-votes
* @return the pre-voting round, which can be closed to end the round early.
*/
public Releasable start(final ClusterState clusterState, final Iterable broadcastNodes) {
PreVotingRound preVotingRound = new PreVotingRound(clusterState, state.v2().getCurrentTerm());
preVotingRound.start(broadcastNodes);
return preVotingRound;
}
// only for testing
PreVoteResponse getPreVoteResponse() {
return state.v2();
}
// only for testing
@Nullable
DiscoveryNode getLeader() {
return state.v1();
}
public void update(final PreVoteResponse preVoteResponse, @Nullable final DiscoveryNode leader) {
logger.trace("updating with preVoteResponse={}, leader={}", preVoteResponse, leader);
state = new Tuple<>(leader, preVoteResponse);
}
private PreVoteResponse handlePreVoteRequest(final PreVoteRequest request) {
updateMaxTermSeen.accept(request.getCurrentTerm());
Tuple state = this.state;
assert state != null : "received pre-vote request before fully initialised";
final DiscoveryNode leader = state.v1();
final PreVoteResponse response = state.v2();
final StatusInfo statusInfo = nodeHealthService.getHealth();
if (statusInfo.getStatus() == UNHEALTHY) {
String message = "rejecting " + request + " on unhealthy node: [" + statusInfo.getInfo() + "]";
logger.debug(message);
throw new NodeHealthCheckFailureException(message);
}
if (leader == null) {
return response;
}
if (leader.equals(request.getSourceNode())) {
// This is a _rare_ case where our leader has detected a failure and stepped down, but we are still a follower. It's possible
// that the leader lost its quorum, but while we're still a follower we will not offer joins to any other node so there is no
// major drawback in offering a join to our old leader. The advantage of this is that it makes it slightly more likely that the
// leader won't change, and also that its re-election will happen more quickly than if it had to wait for a quorum of followers
// to also detect its failure.
return response;
}
throw new CoordinationStateRejectedException("rejecting " + request + " as there is already a leader");
}
@Override
public String toString() {
return "PreVoteCollector{" + "state=" + state + '}';
}
private class PreVotingRound implements Releasable {
private final Map preVotesReceived = newConcurrentMap();
private final AtomicBoolean electionStarted = new AtomicBoolean();
private final PreVoteRequest preVoteRequest;
private final ClusterState clusterState;
private final AtomicBoolean isClosed = new AtomicBoolean();
PreVotingRound(final ClusterState clusterState, final long currentTerm) {
this.clusterState = clusterState;
preVoteRequest = new PreVoteRequest(transportService.getLocalNode(), currentTerm);
}
void start(final Iterable broadcastNodes) {
assert StreamSupport.stream(broadcastNodes.spliterator(), false).noneMatch(Coordinator::isZen1Node) : broadcastNodes;
logger.debug("{} requesting pre-votes from {}", this, broadcastNodes);
broadcastNodes.forEach(
n -> transportService.sendRequest(
n,
REQUEST_PRE_VOTE_ACTION_NAME,
preVoteRequest,
new TransportResponseHandler() {
@Override
public PreVoteResponse read(StreamInput in) throws IOException {
return new PreVoteResponse(in);
}
@Override
public void handleResponse(PreVoteResponse response) {
handlePreVoteResponse(response, n);
}
@Override
public void handleException(TransportException exp) {
logger.debug(new ParameterizedMessage("{} failed", this), exp);
}
@Override
public String executor() {
return Names.GENERIC;
}
@Override
public String toString() {
return "TransportResponseHandler{" + PreVoteCollector.this + ", node=" + n + '}';
}
}
)
);
}
private void handlePreVoteResponse(final PreVoteResponse response, final DiscoveryNode sender) {
if (isClosed.get()) {
logger.debug("{} is closed, ignoring {} from {}", this, response, sender);
return;
}
updateMaxTermSeen.accept(response.getCurrentTerm());
if (response.getLastAcceptedTerm() > clusterState.term()
|| (response.getLastAcceptedTerm() == clusterState.term()
&& response.getLastAcceptedVersion() > clusterState.getVersionOrMetadataVersion())) {
logger.debug("{} ignoring {} from {} as it is fresher", this, response, sender);
return;
}
preVotesReceived.put(sender, response);
// create a fake VoteCollection based on the pre-votes and check if there is an election quorum
final VoteCollection voteCollection = new VoteCollection();
final DiscoveryNode localNode = clusterState.nodes().getLocalNode();
final PreVoteResponse localPreVoteResponse = getPreVoteResponse();
preVotesReceived.forEach(
(node, preVoteResponse) -> voteCollection.addJoinVote(
new Join(
node,
localNode,
preVoteResponse.getCurrentTerm(),
preVoteResponse.getLastAcceptedTerm(),
preVoteResponse.getLastAcceptedVersion()
)
)
);
if (electionStrategy.isElectionQuorum(
clusterState.nodes().getLocalNode(),
localPreVoteResponse.getCurrentTerm(),
localPreVoteResponse.getLastAcceptedTerm(),
localPreVoteResponse.getLastAcceptedVersion(),
clusterState.getLastCommittedConfiguration(),
clusterState.getLastAcceptedConfiguration(),
voteCollection
) == false) {
logger.debug("{} added {} from {}, no quorum yet", this, response, sender);
return;
}
if (electionStarted.compareAndSet(false, true) == false) {
logger.debug("{} added {} from {} but election has already started", this, response, sender);
return;
}
logger.debug("{} added {} from {}, starting election", this, response, sender);
startElection.run();
}
@Override
public String toString() {
return "PreVotingRound{"
+ "preVotesReceived="
+ preVotesReceived
+ ", electionStarted="
+ electionStarted
+ ", preVoteRequest="
+ preVoteRequest
+ ", isClosed="
+ isClosed
+ '}';
}
@Override
public void close() {
final boolean isNotAlreadyClosed = isClosed.compareAndSet(false, true);
assert isNotAlreadyClosed;
}
}
}