io.atomix.raft.RaftServer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of zeebe-atomix-cluster Show documentation
The newest version!
/*
 * Copyright 2015-present Open Networking Foundation
 * Copyright © 2020 camunda services GmbH ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.atomix.raft;

import static com.google.common.base.Preconditions.checkNotNull;
import static io.atomix.raft.RaftException.ConfigurationException;

import io.atomix.cluster.ClusterMembershipService;
import io.atomix.cluster.MemberId;
import io.atomix.raft.cluster.RaftCluster;
import io.atomix.raft.cluster.RaftMember;
import io.atomix.raft.cluster.RaftMember.Type;
import io.atomix.raft.impl.DefaultRaftServer;
import io.atomix.raft.impl.RaftContext;
import io.atomix.raft.partition.RaftElectionConfig;
import io.atomix.raft.partition.RaftPartitionConfig;
import io.atomix.raft.protocol.RaftServerProtocol;
import io.atomix.raft.storage.RaftStorage;
import io.atomix.raft.storage.log.RaftLog;
import io.atomix.raft.zeebe.EntryValidator;
import io.atomix.raft.zeebe.EntryValidator.NoopEntryValidator;
import io.camunda.zeebe.util.health.FailureListener;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.CompletableFuture;
import java.util.function.Supplier;

/**
 * Provides a standalone implementation of the Raft consensus
 * algorithm.
 *
 * To create a new server, use the server {@link RaftServer.Builder}. Servers require cluster
 * membership information in order to perform communication. Each server must be provided a local
 * {@link MemberId} to which to bind the internal {@link io.atomix.raft.protocol.RaftServerProtocol}
 * and a set of addresses for other members in the cluster.
 *
 * 
State machines
 *
 * State machines are provided in a factory to allow servers to transition between stateful and
 * stateless states.
 *
 * {@code
 * Address address = new Address("123.456.789.0", 5000);
 * Collection members = Arrays.asList(new Address("123.456.789.1", 5000), new Address("123.456.789.2", 5000));
 *
 * RaftServer server = RaftServer.builder(address)
 *   .withStateMachine(MyStateMachine::new)
 *   .build();
 *
 * }
 *
 * Raft relies upon determinism to ensure consistency throughout the cluster, so it is
 * imperative that each server in a cluster have the same state machine with the same commands.
 * State machines are provided to the server as a {@link Supplier factory} to allow servers to
 * {@link RaftMember#promote(RaftMember.Type) transition} between stateful and stateless states.
 *
 * Storage
 *
 * By default, the log is stored on disk, but users can override the default {@link RaftStorage}
 * configuration via {@link RaftServer.Builder#withStorage(RaftStorage)}.
 *
 * {@code
 * RaftServer server = RaftServer.builder(address)
 *   .withStateMachine(MyStateMachine::new)
 *   .withStorage(Storage.builder()
 *     .withDirectory(new File("logs"))
 *     .build())
 *   .build();
 * }
 *
 * Servers use the {@code Storage} object to manage the storage of cluster configurations, voting
 * information, and state machine snapshots in addition to logs. See the {@link RaftStorage}
 * documentation for more information.
 *
 * Bootstrapping the cluster
 *
 * Once a server has been built, it must be {@link #bootstrap() bootstrapped} to form a new cluster.
 *
 * {@code
 * CompletableFuture future = server.bootstrap();
 * future.thenRun(() -> {
 *   System.out.println("Server bootstrapped!");
 * });
 *
 * }
 *
 * Alternatively, the bootstrapped cluster can include multiple servers by providing an initial
 * configuration to the {@link #bootstrap(MemberId...)} method on each server. When bootstrapping a
 * multi-node cluster, the bootstrap configuration must be identical on all servers for safety.
 *
 * {@code
 * List cluster = Arrays.asList(
 *   new Address("123.456.789.0", 5000),
 *   new Address("123.456.789.1", 5000),
 *   new Address("123.456.789.2", 5000)
 * );
 *
 * CompletableFuture future = server.bootstrap(cluster);
 * future.thenRun(() -> {
 *   System.out.println("Cluster bootstrapped");
 * });
 *
 * }
 *
 * @see RaftStorage
 */
public interface RaftServer {

  /**
   * Returns a new Raft server builder using the default host:port.
   *
   * The server will be constructed at 0.0.0.0:8700.
   *
   * @return The server builder.
   */
  static Builder builder() {
    try {
      final InetAddress address = InetAddress.getByName("0.0.0.0");
      return builder(MemberId.from(address.getHostName()));
    } catch (final UnknownHostException e) {
      throw new ConfigurationException(e, "Cannot configure local node %s", e.getMessage());
    }
  }

  /**
   * Returns a new Raft server builder.
   *
   * 
The provided {@link MemberId} is the address to which to bind the server being constructed.
   *
   * @param localMemberId The local node identifier.
   * @return The server builder.
   */
  static Builder builder(final MemberId localMemberId) {
    return new DefaultRaftServer.Builder(localMemberId);
  }

  /**
   * Returns the server name.
   *
   * 
The server name is provided to the server via the {@link Builder#withName(String) builder
   * configuration}. The name is used internally to manage the server's on-disk state. {@link
   * RaftLog Log}, {@code snapshot}, and {@link io.atomix.raft.storage.system.MetaStore
   * configuration} files stored on disk use the server name as the prefix.
   *
   * @return The server name.
   */
  String name();

  /**
   * Returns the server's cluster configuration.
   *
   * 
The {@link RaftCluster} is representative of the server's current view of the cluster
   * configuration. The first time the server is {@link #bootstrap() started}, the cluster
   * configuration will be initialized using the {@link MemberId} list provided to the server {@link
   * #builder(MemberId) builder}
   *
   * @return The server's cluster configuration.
   */
  RaftCluster cluster();

  /**
   * Adds a role change listener.
   *
   * @param listener The role change listener that consumes the role and the raft term.
   */
  void addRoleChangeListener(RaftRoleChangeListener listener);

  /**
   * Removes a role change listener.
   *
   * @param listener The role change listener to remove.
   */
  void removeRoleChangeListener(RaftRoleChangeListener listener);

  /** Adds a failure listener */
  void addFailureListener(FailureListener listener);

  /** Removes a failure listener */
  void removeFailureListener(FailureListener listener);

  /**
   * Bootstraps a single-node cluster.
   *
   * 
Bootstrapping a single-node cluster results in the server forming a new cluster to which
   * additional servers can be joined.
   *
   * 
Only {@link RaftMember.Type#ACTIVE} members can be included in a bootstrap configuration. If
   * the local server is not initialized as an active member, it cannot be part of the bootstrap
   * configuration for the cluster.
   *
   * 
When the cluster is bootstrapped, the local server will be transitioned into the active
   * state and begin participating in the Raft consensus algorithm. When the cluster is first
   * bootstrapped, no leader will exist. The bootstrapped members will elect a leader amongst
   * themselves.
   *
   * 
It is critical that all servers in a bootstrap configuration be started with the same exact
   * set of members. Bootstrapping multiple servers with different configurations may result in
   * split brain.
   *
   * 
The {@link CompletableFuture} returned by this method will be completed once the cluster has
   * been bootstrapped, a leader has been elected, and the leader has been notified of the local
   * server's client configurations.
   *
   * @return A completable future to be completed once the cluster has been bootstrapped.
   */
  default CompletableFuture bootstrap() {
    return bootstrap(Collections.emptyList());
  }

  /**
   * Bootstraps the cluster using the provided cluster configuration.
   *
   * 
Bootstrapping the cluster results in a new cluster being formed with the provided
   * configuration. The initial nodes in a cluster must always be bootstrapped. This is necessary to
   * prevent split brain. If the provided configuration is empty, the local server will form a
   * single-node cluster.
   *
   * 
Only {@link RaftMember.Type#ACTIVE} members can be included in a bootstrap configuration. If
   * the local server is not initialized as an active member, it cannot be part of the bootstrap
   * configuration for the cluster.
   *
   * 
When the cluster is bootstrapped, the local server will be transitioned into the active
   * state and begin participating in the Raft consensus algorithm. When the cluster is first
   * bootstrapped, no leader will exist. The bootstrapped members will elect a leader amongst
   * themselves.
   *
   * 
It is critical that all servers in a bootstrap configuration be started with the same exact
   * set of members. Bootstrapping multiple servers with different configurations may result in
   * split brain.
   *
   * 
The {@link CompletableFuture} returned by this method will be completed once the cluster has
   * been bootstrapped, a leader has been elected, and the leader has been notified of the local
   * server's client configurations.
   *
   * @param cluster The bootstrap cluster configuration.
   * @return A completable future to be completed once the cluster has been bootstrapped.
   */
  CompletableFuture bootstrap(Collection cluster);

  /**
   * Bootstraps the cluster using the provided cluster configuration.
   *
   * 
Bootstrapping the cluster results in a new cluster being formed with the provided
   * configuration. The initial nodes in a cluster must always be bootstrapped. This is necessary to
   * prevent split brain. If the provided configuration is empty, the local server will form a
   * single-node cluster.
   *
   * 
Only {@link RaftMember.Type#ACTIVE} members can be included in a bootstrap configuration. If
   * the local server is not initialized as an active member, it cannot be part of the bootstrap
   * configuration for the cluster.
   *
   * 
When the cluster is bootstrapped, the local server will be transitioned into the active
   * state and begin participating in the Raft consensus algorithm. When the cluster is first
   * bootstrapped, no leader will exist. The bootstrapped members will elect a leader amongst
   * themselves.
   *
   * 
It is critical that all servers in a bootstrap configuration be started with the same exact
   * set of members. Bootstrapping multiple servers with different configurations may result in
   * split brain.
   *
   * 
The {@link CompletableFuture} returned by this method will be completed once the cluster has
   * been bootstrapped, a leader has been elected, and the leader has been notified of the local
   * server's client configurations.
   *
   * @param members The bootstrap cluster configuration.
   * @return A completable future to be completed once the cluster has been bootstrapped.
   */
  default CompletableFuture bootstrap(final MemberId... members) {
    return bootstrap(Arrays.asList(members));
  }

  /**
   * Starts this raft server by joining an existing replication group. A {@link
   * io.atomix.raft.protocol.JoinRequest} is sent to an arbitrary member of the cluster.
   *
   * @param cluster a list of member ids that are part of the cluster and assist in joining.
   * @return A completable future to be completed once the server has joined the cluster.
   */
  CompletableFuture join(Collection cluster);

  /**
   * Starts this raft server by joining an existing replication group. A {@link
   * io.atomix.raft.protocol.JoinRequest} is sent to an arbitrary member of the cluster.
   *
   * @param cluster a list of member ids that are part of the cluster and assist in joining.
   * @return A completable future to be completed once the server has joined the cluster.
   */
  default CompletableFuture join(final MemberId... cluster) {
    return join(Arrays.asList(cluster));
  }

  /**
   * Requests to leave the replication group by sending a {@link
   * io.atomix.raft.protocol.LeaveRequest} to an arbitrary member of the cluster, as provided by the
   * {@link ClusterMembershipService}.
   *
   * @return A future to be completed successfully once the server has left the cluster.
   */
  CompletableFuture leave();

  /**
   * Promotes the server to leader if possible.
   *
   * @return a future to be completed once the server has been promoted
   */
  CompletableFuture promote();

  /**
   * Force configure the partition to remove all members which are not part of the given
   * membersToRetain.
   *
   * 
This method is typically called to remove a set of unreachable members when there is no
   * leader.
   *
   * @param membersToRetain The members to retain in the partition
   * @return a future to be completed once the server has been force configured
   */
  CompletableFuture forceConfigure(Map membersToRetain);

  /**
   * Update priority of this server used for priority election. If priority election is not enabled,
   * this method has no effect. To get the desired result, priority of all replicas must be updated
   * accordingly. This method only updates the local server's priority.
   *
   * @param newPriority the priority to be set
   * @return a future to be completed when the new priority is applied
   */
  CompletableFuture reconfigurePriority(int newPriority);

  /**
   * Ensures that all records written to the log are flushed to disk
   *
   * @return a future which will be completed after the log is flushed to disk
   */
  CompletableFuture flushLog();

  /**
   * Shuts down the server without leaving the Raft cluster.
   *
   * @return A completable future to be completed once the server has been shutdown.
   */
  CompletableFuture shutdown();

  /**
   * Returns the current Raft context.
   *
   * @return the current Raft context
   */
  RaftContext getContext();

  /**
   * Returns the server's term.
   *
   * @return the server's term
   */
  long getTerm();

  /**
   * Returns whether the server is a follower.
   *
   * @return whether the server is a follower
   */
  default boolean isFollower() {
    return getRole() == Role.FOLLOWER;
  }

  /**
   * Returns the server role.
   *
   * 
The initial state of a Raft server is {@link Role#INACTIVE}. Once the server is {@link
   * #bootstrap() started} and until it is explicitly shutdown, the server will be in one of the
   * active states - {@link Role#PASSIVE}, {@link Role#FOLLOWER}, {@link Role#CANDIDATE}, or {@link
   * Role#LEADER}.
   *
   * @return The server role.
   */
  Role getRole();

  /**
   * Returns whether the server is the leader.
   *
   * @return whether the server is the leader
   */
  default boolean isLeader() {
    return getRole() == Role.LEADER;
  }

  /**
   * Returns a boolean indicating whether the server is running.
   *
   * @return Indicates whether the server is running.
   */
  boolean isRunning();

  /**
   * Steps down from the current leadership, which means tries to transition directly to follower.
   */
  CompletableFuture stepDown();

  /**
   * Builds a single-use Raft server.
   *
   * 
This builder should be used to programmatically configure and construct a new {@link
   * RaftServer} instance. The builder provides methods for configuring all aspects of a Raft
   * server. The {@code RaftServer.Builder} class cannot be instantiated directly. To create a new
   * builder, use one of the {@link RaftServer#builder(MemberId) server builder factory} methods.
   *
   * 
{@code
   * RaftServer.Builder builder = RaftServer.builder(address);
   *
   * }
   *
   * Once the server has been configured, use the {@link #build()} method to build the server
   * instance:
   *
   * {@code
   * RaftServer server = RaftServer.builder(address)
   *   ...
   *   .build();
   *
   * }
   *
   * The state machine is the component of the server that stores state and reacts to commands and
   * queries submitted by clients to the cluster. State machines are provided to the server in the
   * form of a state machine {@link Supplier factory} to allow the server to reconstruct its state
   * when necessary.
   *
   * {@code
   * RaftServer server = RaftServer.builder(address)
   *   .withStateMachine(MyStateMachine::new)
   *   .build();
   *
   * }
   */
  abstract class Builder implements io.atomix.utils.Builder {

    protected String name;
    protected MemberId localMemberId;
    protected ClusterMembershipService membershipService;
    protected RaftServerProtocol protocol;
    protected RaftStorage storage;
    protected RaftThreadContextFactory threadContextFactory;
    protected Supplier randomFactory;
    protected EntryValidator entryValidator = new NoopEntryValidator();
    protected RaftElectionConfig electionConfig = RaftElectionConfig.ofDefaultElection();
    protected RaftPartitionConfig partitionConfig = new RaftPartitionConfig();
    protected int partitionId;

    protected Builder(final MemberId localMemberId) {
      this.localMemberId = checkNotNull(localMemberId, "localMemberId cannot be null");
    }

    /**
     * Sets the server name.
     *
     * The server name is used to
     *
     * @param name The server name.
     * @return The server builder.
     */
    public Builder withName(final String name) {
      this.name = checkNotNull(name, "name cannot be null");
      return this;
    }

    /**
     * Sets the cluster membership service.
     *
     * @param membershipService the cluster membership service
     * @return the server builder
     */
    public Builder withMembershipService(final ClusterMembershipService membershipService) {
      this.membershipService = checkNotNull(membershipService, "membershipService cannot be null");
      return this;
    }

    /**
     * Sets the server protocol.
     *
     * @param protocol The server protocol.
     * @return The server builder.
     */
    public Builder withProtocol(final RaftServerProtocol protocol) {
      this.protocol = checkNotNull(protocol, "protocol cannot be null");
      return this;
    }

    /**
     * Sets the storage module.
     *
     * @param storage The storage module.
     * @return The Raft server builder.
     * @throws NullPointerException if {@code storage} is null
     */
    public Builder withStorage(final RaftStorage storage) {
      this.storage = checkNotNull(storage, "storage cannot be null");
      return this;
    }

    /**
     * Sets the threadContextFactory used to create raft threadContext
     *
     * @param threadContextFactory The RaftThreadContextFactory
     * @return The Raft server builder.
     * @throws NullPointerException if {@code threadContextFactory} is null
     */
    public Builder withThreadContextFactory(final RaftThreadContextFactory threadContextFactory) {
      this.threadContextFactory =
          checkNotNull(threadContextFactory, "threadContextFactory cannot be null");
      return this;
    }

    public Builder withEntryValidator(final EntryValidator entryValidator) {
      this.entryValidator = entryValidator;
      return this;
    }

    public Builder withElectionConfig(final RaftElectionConfig electionConfig) {
      this.electionConfig = electionConfig;
      return this;
    }

    public Builder withPartitionConfig(final RaftPartitionConfig partitionConfig) {
      this.partitionConfig = partitionConfig;
      return this;
    }

    public Builder withPartitionId(final int partitionId) {
      this.partitionId = partitionId;
      return this;
    }
  }

  class CancelledBootstrapException extends RuntimeException {
    public CancelledBootstrapException(final String message) {
      super(message);
    }
  }

  /**
   * Raft server state types.
   *
   * 
States represent the context of the server's internal state machine. Throughout the lifetime
   * of a server, the server will periodically transition between states based on requests,
   * responses, and timeouts.
   *
   * @author Jordan Halterman
   */
  enum Role {

    /**
     * Represents the state of an inactive server.
     *
     * 
All servers start in this state.
     */
    INACTIVE(false),

    /**
     * Represents the state of a server in the process of catching up its log.
     *
     * 
Upon successfully joining an existing cluster, the server will transition to the passive
     * state and remain there until the leader determines that the server has caught up enough to be
     * promoted to a full member.
     */
    PASSIVE(false),

    /**
     * Represents the state of a server in the process of being promoted to an active voting member.
     */
    PROMOTABLE(false),

    /**
     * Represents the state of a server participating in normal log replication.
     *
     * 
The follower state is a standard Raft state in which the server receives replicated log
     * entries from the leader.
     */
    FOLLOWER(true),

    /**
     * Represents the state of a server attempting to become the leader.
     *
     * 
When a server in the follower state fails to receive communication from a valid leader for
     * some time period, the follower will transition to the candidate state. During this period,
     * the candidate requests votes from each of the other servers in the cluster. If the candidate
     * wins the election by receiving votes from a majority of the cluster, it will transition to
     * the leader state.
     */
    CANDIDATE(true),

    /**
     * Represents the state of a server which is actively coordinating and replicating logs with
     * other servers.
     *
     * Leaders are responsible for handling and replicating writes from clients. Note that more
     * than one leader can exist at any given time, but Raft guarantees that no two leaders will
     * exist for the same term.
     */
    LEADER(true);

    private final boolean active;

    Role(final boolean active) {
      this.active = active;
    }

    /**
     * Returns whether the role is a voting Raft member role.
     *
     * @return whether the role is a voting member
     */
    public boolean active() {
      return active;
    }
  }
}