All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.atomix.raft.impl.ReconfigurationHelper Maven / Gradle / Ivy

The newest version!
/*
 * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH under
 * one or more contributor license agreements. See the NOTICE file distributed
 * with this work for additional information regarding copyright ownership.
 * Licensed under the Camunda License 1.0. You may not use this file
 * except in compliance with the Camunda License 1.0.
 */
package io.atomix.raft.impl;

import io.atomix.cluster.MemberId;
import io.atomix.cluster.messaging.MessagingException.NoRemoteHandler;
import io.atomix.cluster.messaging.MessagingException.NoSuchMemberException;
import io.atomix.raft.RaftError;
import io.atomix.raft.RaftException.ProtocolException;
import io.atomix.raft.RaftServer;
import io.atomix.raft.RaftServer.Role;
import io.atomix.raft.cluster.RaftMember;
import io.atomix.raft.cluster.RaftMember.Type;
import io.atomix.raft.cluster.impl.DefaultRaftMember;
import io.atomix.raft.impl.RaftContext.State;
import io.atomix.raft.protocol.ForceConfigureRequest;
import io.atomix.raft.protocol.JoinRequest;
import io.atomix.raft.protocol.LeaveRequest;
import io.atomix.raft.protocol.RaftResponse.Status;
import io.atomix.raft.protocol.TransferRequest;
import io.atomix.raft.storage.system.Configuration;
import io.atomix.raft.utils.ForceConfigureQuorum;
import io.atomix.utils.concurrent.ThreadContext;
import io.atomix.utils.logging.ContextualLoggerFactory;
import io.atomix.utils.logging.LoggerContext;
import java.net.ConnectException;
import java.time.Instant;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeoutException;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.slf4j.Logger;

public final class ReconfigurationHelper {

  private final ThreadContext threadContext;
  private final RaftContext raftContext;
  private final Logger logger;

  public ReconfigurationHelper(final RaftContext raftContext) {
    threadContext = raftContext.getThreadContext();
    this.raftContext = raftContext;
    logger =
        ContextualLoggerFactory.getLogger(
            getClass(),
            LoggerContext.builder(RaftServer.class).addValue(raftContext.getName()).build());
  }

  public CompletableFuture join(final Collection clusterMembers) {
    final var result = new CompletableFuture();
    threadContext.execute(
        () -> {
          final var joining =
              new DefaultRaftMember(
                  raftContext.getCluster().getLocalMember().memberId(), Type.ACTIVE, Instant.now());
          final var assistingMembers =
              clusterMembers.stream()
                  .filter(memberId -> !memberId.equals(joining.memberId()))
                  .collect(Collectors.toCollection(LinkedBlockingQueue::new));
          if (assistingMembers.isEmpty()) {
            result.completeExceptionally(
                new IllegalStateException(
                    "Cannot join cluster, because there are no other members in the cluster."));
            return;
          }
          threadContext.execute(() -> joinWithRetry(joining, assistingMembers, result));
        });
    return result;
  }

  /**
   * Repeatedly tries to join the cluster until it succeeds or there are no more members to try.
   * When sending a join request to an assisting member fails because the member is currently not
   * known, or it is known but not ready to receive join request, try again with a different
   * assisting member.
   *
   * 

Retrying helps in cases where the cluster is in flux and not all members are online and * ready. * * @param joining the new member joining * @param assistingMembers a queue of members that we will send a join request to. * @param result a future to complete when joining succeeds or fails */ private void joinWithRetry( final RaftMember joining, final Queue assistingMembers, final CompletableFuture result) { final var receiver = assistingMembers.poll(); if (receiver == null) { result.completeExceptionally( new IllegalStateException( "Sent join request to all known members, but all failed. No more members left.")); return; } raftContext .getProtocol() .join(receiver, JoinRequest.builder().withJoiningMember(joining).build()) .whenCompleteAsync( (response, error) -> { if (error != null) { final var cause = error.getCause(); if (cause instanceof NoSuchMemberException || cause instanceof NoRemoteHandler || cause instanceof TimeoutException || cause instanceof ConnectException) { logger.debug("Join request was not acknowledged, retrying", cause); threadContext.execute(() -> joinWithRetry(joining, assistingMembers, result)); } else { logger.error("Join request failed with an unexpected error, not retrying", error); result.completeExceptionally(error); } } else if (response.status() == Status.OK) { logger.debug("Join request accepted"); result.complete(null); } else if (response.error().type() == RaftError.Type.NO_LEADER || response.error().type() == RaftError.Type.UNAVAILABLE) { logger.debug("Join request failed, retrying", response.error().createException()); threadContext.execute(() -> joinWithRetry(joining, assistingMembers, result)); } else { final var errorAsException = response.error().createException(); logger.error("Join request rejected, not retrying", errorAsException); result.completeExceptionally(errorAsException); } }, threadContext); } public CompletableFuture leave() { final CompletableFuture future = new CompletableFuture<>(); threadContext.execute(() -> leaveInternal(future)); return future; } private void leaveInternal(final CompletableFuture future) { final var leaving = raftContext.getCluster().getLocalMember(); final var receiver = Optional.ofNullable(raftContext.getLeader()) .map(DefaultRaftMember::memberId) .or( () -> raftContext.getCluster().getVotingMembers().stream() .map(RaftMember::memberId) .findAny()) .orElseThrow(); raftContext .getProtocol() .leave(receiver, LeaveRequest.builder().withLeavingMember(leaving).build()) .whenCompleteAsync( (response, error) -> { if (error != null) { future.completeExceptionally(error); } else if (response.status() == Status.OK) { future.complete(null); raftContext.updateState(State.LEFT); } else { future.completeExceptionally(response.error().createException()); } }, threadContext); } /** * Force configuration works as follows. Assume current members are 0,1,3,4, and we want to force * remove 2 and 3. * *

   *
   *   External                        Raft 0 (follower)                     Raft 1 (follower)             Raft 2/3
   *      |                                 |                                     |                        (Members to be removed)
   *      |    forceConfigure([0,1])        |                                     |                                    |
   *      |-------------------------------->|                                     |                                    |
   *      |                                 |                                     |                                    |
   *      |               Configuration={   |                                     |                                    |
   *      |                newMembers=[0,1],|                                     |                                    |
   *      |                oldMembers=[]    |                                     |                                    |
   *      |                force=TRUE       |                                     |                                    |
   *      |               Commit new config |   ForceConfigureRequest(newMembers) |                                    |
   *      |                                 |------------------------------------>|                                    |
   *      |                                 |               OK                    |Commit new Configuration            |
   *      |         OK                      |<------------------------------------|                                    |
   *      |<--------------------------------|                                     |        Poll/Vote/Append            |
   *      |                                 |                                     |<-----------------------------------|
   *      |                        election |             poll/vote               |----------------------------------->|
   *      |                        timeout  ------------------------------------->|     Reject because Force==TRUE     |
   *      |                                 |               OK                    |                                    |
   *      |                                 |<------------------------------------|                                    |
   *      |                    Become leader|                                     |                                    |
   *      |                                 |                                     |                                    |
   *      |             Append InitialEntry |                                     |                                    |
   *      |       Append ConfigurationEntry |                                     |                                    |
   *      |               Configuration={   |           AppendEntry               |                                    |
   *      |                newMembers=[0,1] |------------------------------------>|                                    |
   *      |                force=FALSE      |<------------------------------------|                                    |
   *      |               }                 |                                     |                                    |
   *      |                                 |------------------------------------>|                                    |
   *      |                                 |<------------------------------------|                                    |
   *      |                                 |                                     |                                    |
   *      |                Commit new config|            AppendEntry              |On commitIndex update               |
   *      |                                 |------------------------------------>|Commit new config                   |
   *      |                                 |                                     |                                    |
   *      |                                 |                                     |      Poll/Vote                     |
   *      |                                 |                                     |<-----------------------------------|
   *      |                                 |                                     |  Reject because log not uptodate   |
   *      |                                 |                                     |----------------------------------->|
   *      |                                 |                                     |                                    |
   * 
*/ public CompletableFuture forceConfigure(final Map newMembersIds) { final CompletableFuture future = new CompletableFuture<>(); threadContext.execute(() -> triggerForceConfigure(newMembersIds, future)); return future; } private void triggerForceConfigure( final Map newMembersIds, final CompletableFuture future) { final var currentConfiguration = raftContext.getCluster().getConfiguration(); final Set newMembers = newMembersIds.entrySet().stream() .map( memberEntry -> new DefaultRaftMember( memberEntry.getKey(), memberEntry.getValue(), Instant.now())) .collect(Collectors.toSet()); if (currentConfiguration == null || !currentConfiguration.force()) { // No need to overwrite if it is already in force configure and this is a retry if (raftContext.getRaftRole().role() == Role.LEADER) { // Optimization: If the current configuration is already the same as new forced, we // can skip reconfiguring. It is most likely a retry of a previous force request, // which was interpreted as failure because of a request timeout. raftContext.transition(Role.FOLLOWER); } logger.info( "Current configuration is '{}'. Forcing configuration with members '{}'", currentConfiguration, newMembers); final var newConfiguration = new Configuration( raftContext.getCurrentConfigurationIndex() + 1, raftContext.getTerm(), Instant.now().toEpochMilli(), newMembers, Set.of(), true); raftContext.getCluster().configure(newConfiguration); } else if (!(currentConfiguration.allMembers().equals(newMembers))) { // This is not expected. When force configuration is retried, we expect that they are // retried with the same state. If this is not the case, it is likely that there are two // force configuration requested at the same time. // Reject the request. There is possibly no way out to recover from this. future.completeExceptionally( new IllegalStateException( String.format( "Expected to force configure with members '%s', but the member is already in force configuration with a different set of members '%s'", newMembers, currentConfiguration.allMembers()))); return; } sendForceConfigureRequestToAllMembers(future); } private void sendForceConfigureRequestToAllMembers(final CompletableFuture future) { final Configuration configuration = raftContext.getCluster().getConfiguration(); final var otherMembers = configuration.newMembers().stream() .map(RaftMember::memberId) .filter(m -> !m.equals(raftContext.getCluster().getLocalMember().memberId())) .collect(Collectors.toSet()); if (otherMembers.isEmpty()) { future.complete(null); return; } final var quorum = new ForceConfigureQuorum( success -> { if (Boolean.TRUE.equals(success)) { future.complete(null); } else { future.completeExceptionally( new ProtocolException( "Failed to force configure because not all members acknowledged the request.")); } }, otherMembers); final ForceConfigureRequest request = ForceConfigureRequest.builder() .withTerm(configuration.term()) .withIndex(configuration.index()) .withTime(configuration.time()) // Beware that using ImmutableCollections can break Kryo serialization .withNewMembers(new HashSet<>(configuration.newMembers())) .from(raftContext.getCluster().getLocalMember().memberId()) .build(); otherMembers.forEach(memberId -> sendForceConfigurationRequest(memberId, request, quorum)); } private void sendForceConfigurationRequest( final MemberId memberId, final ForceConfigureRequest request, final ForceConfigureQuorum quorum) { logger.trace("Sending '{}' request to member '{}'", request, memberId); raftContext .getProtocol() .forceConfigure(memberId, request) .whenCompleteAsync( (response, error) -> { if (error != null) { logger.warn( "Failed to send force configure request to member '{}'", memberId, error); quorum.fail(memberId); } else if (response.status() == Status.OK) { logger.debug("Successfully sent force configure request to member '{}'", memberId); quorum.succeed(memberId); } else { logger.warn( "Failed to send force configure request to member '{}': {}", memberId, response.error()); quorum.fail(memberId); } }, threadContext); } /** Attempts to become the leader. */ public CompletableFuture anoint() { if (raftContext.getRaftRole().role() == Role.LEADER) { return CompletableFuture.completedFuture(null); } final CompletableFuture future = new CompletableFuture<>(); threadContext.execute(() -> anointInternal(future)); return future; } private void anointInternal(final CompletableFuture future) { // Register a leader election listener to wait for the election of this node. final Consumer electionListener = new Consumer<>() { @Override public void accept(final RaftMember member) { if (member.memberId().equals(raftContext.getCluster().getLocalMember().memberId())) { future.complete(null); } else { future.completeExceptionally(new ProtocolException("Failed to transfer leadership")); } raftContext.removeLeaderElectionListener(this); } }; raftContext.addLeaderElectionListener(electionListener); // If a leader already exists, request a leadership transfer from it. Otherwise, // transition to the candidate // state and attempt to get elected. final RaftMember member = raftContext.getCluster().getLocalMember(); final RaftMember leader = raftContext.getLeader(); if (leader != null) { raftContext .getProtocol() .transfer( leader.memberId(), TransferRequest.builder().withMember(member.memberId()).build()) .whenCompleteAsync( (response, error) -> { if (error != null) { future.completeExceptionally(error); } else if (response.status() == Status.ERROR) { future.completeExceptionally(response.error().createException()); } else { raftContext.transition(Role.CANDIDATE); } }, threadContext); } else { raftContext.transition(Role.CANDIDATE); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy