All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsimpledb.kv.raft.Role Maven / Gradle / Ivy

Go to download

JSimpleDB distributed, linearizable ACID compliant key/value store implementation based on the Raft consensus algorithm.

There is a newer version: 3.6.1
Show newest version

/*
 * Copyright (C) 2015 Archie L. Cobbs. All rights reserved.
 */

package org.jsimpledb.kv.raft;

import com.google.common.collect.Iterables;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.jsimpledb.kv.KeyRange;
import org.jsimpledb.kv.RetryTransactionException;
import org.jsimpledb.kv.mvcc.Mutations;
import org.jsimpledb.kv.mvcc.Writes;
import org.jsimpledb.kv.raft.msg.AppendRequest;
import org.jsimpledb.kv.raft.msg.AppendResponse;
import org.jsimpledb.kv.raft.msg.CommitRequest;
import org.jsimpledb.kv.raft.msg.CommitResponse;
import org.jsimpledb.kv.raft.msg.GrantVote;
import org.jsimpledb.kv.raft.msg.InstallSnapshot;
import org.jsimpledb.kv.raft.msg.Message;
import org.jsimpledb.kv.raft.msg.PingRequest;
import org.jsimpledb.kv.raft.msg.PingResponse;
import org.jsimpledb.kv.raft.msg.RequestVote;
import org.jsimpledb.util.LongEncoder;
import org.slf4j.Logger;

/**
 * Common superclass for the three roles played by a Raft node:
 * {@linkplain LeaderRole leader}, {@linkplain FollowerRole follower}, and {@linkplain CandidateRole candidate}.
 */
public abstract class Role {

    final Logger log;
    final RaftKVDatabase raft;
    final Service checkReadyTransactionsService = new Service(this, "check ready transactions") {
        @Override
        public void run() {
            Role.this.checkReadyTransactions();
        }
    };
    final Service checkWaitingTransactionsService = new Service(this, "check waiting transactions") {
        @Override
        public void run() {
            Role.this.checkWaitingTransactions();
        }
    };
    final Service applyCommittedLogEntriesService = new Service(this, "apply committed logs") {
        @Override
        public void run() {
            Role.this.applyCommittedLogEntries();
        }
    };
    final Service triggerKeyWatchesService = new Service(this, "trigger key watches") {
        @Override
        public void run() {
            Role.this.triggerKeyWatches();
        }
    };

// Constructors

    Role(RaftKVDatabase raft) {
        this.raft = raft;
        this.log = this.raft.log;
        assert Thread.holdsLock(this.raft);
    }

// Status

    /**
     * Get the {@link RaftKVDatabase} with which this instance is associated.
     *
     * @return associated database
     */
    public RaftKVDatabase getKVDatabase() {
        return this.raft;
    }

// Lifecycle

    void setup() {
        assert Thread.holdsLock(this.raft);
        this.raft.requestService(this.checkReadyTransactionsService);
        this.raft.requestService(this.checkWaitingTransactionsService);
        this.raft.requestService(this.applyCommittedLogEntriesService);
    }

    void shutdown() {
        assert Thread.holdsLock(this.raft);
        for (RaftKVTransaction tx : this.raft.openTransactions.values())
            this.cleanupForTransaction(tx);
    }

// Service

    abstract void outputQueueEmpty(String address);

    /**
     * Check transactions in the {@link TxState#COMMIT_READY} state to see if we can advance them.
     */
    void checkReadyTransactions() {
        assert Thread.holdsLock(this.raft);
        for (RaftKVTransaction tx : new ArrayList(this.raft.openTransactions.values()))
            new CheckReadyTransactionService(this, tx).run();
    }

    /**
     * Check transactions in the {@link TxState#COMMIT_WAITING} state to see if they are committed yet.
     * We invoke this service method whenever our {@code commitIndex} advances.
     */
    void checkWaitingTransactions() {
        assert Thread.holdsLock(this.raft);
        for (RaftKVTransaction tx : new ArrayList(this.raft.openTransactions.values()))
            new CheckWaitingTransactionService(this, tx).run();
    }

    /**
     * Apply committed but unapplied log entries to the state machine.
     * We invoke this service method whenever log entries are added or our {@code commitIndex} advances.
     */
    void applyCommittedLogEntries() {
        assert Thread.holdsLock(this.raft);

        // Apply committed log entries to the state machine
        while (this.raft.lastAppliedIndex < this.raft.commitIndex) {

            // Grab the first unwritten log entry
            final LogEntry logEntry = this.raft.raftLog.get(0);
            assert logEntry.getIndex() == this.raft.lastAppliedIndex + 1;

            // Check with subclass
            if (!this.mayApplyLogEntry(logEntry))
                break;

            // Get the current config as of the log entry we're about to apply
            final HashMap logEntryConfig = new HashMap<>(this.raft.lastAppliedConfig);
            logEntry.applyConfigChange(logEntryConfig);

            // Prepare combined Mutations containing prefixed log entry changes plus my own
            final Writes logWrites = logEntry.getWrites();
            final Writes myWrites = new Writes();
            myWrites.getPuts().put(RaftKVDatabase.LAST_APPLIED_TERM_KEY, LongEncoder.encode(logEntry.getTerm()));
            myWrites.getPuts().put(RaftKVDatabase.LAST_APPLIED_INDEX_KEY, LongEncoder.encode(logEntry.getIndex()));
            myWrites.getPuts().put(RaftKVDatabase.LAST_APPLIED_CONFIG_KEY, this.raft.encodeConfig(logEntryConfig));
            final byte[] stateMachinePrefix = this.raft.getStateMachinePrefix();
            final Mutations mutations = new Mutations() {

                @Override
                public Iterable getRemoveRanges() {
                    return Iterables.transform(logWrites.getRemoveRanges(), new PrefixKeyRangeFunction(stateMachinePrefix));
                }

                @Override
                public Iterable> getPutPairs() {
                    return Iterables.concat(
                      Iterables.transform(logWrites.getPutPairs(), new PrefixPutFunction(stateMachinePrefix)),
                      myWrites.getPutPairs());
                }

                @Override
                public Iterable> getAdjustPairs() {
                    return Iterables.transform(logWrites.getAdjustPairs(), new PrefixAdjustFunction(stateMachinePrefix));
                }
            };

            // Apply updates to the key/value store (durably)
            if (this.log.isDebugEnabled())
                this.debug("applying committed log entry " + logEntry + " to key/value store");
            try {
                this.raft.kv.mutate(mutations, true);
            } catch (Exception e) {
                if (e instanceof RuntimeException && e.getCause() instanceof IOException)
                    e = (IOException)e.getCause();
                this.error("error applying log entry " + logEntry + " to key/value store", e);
                break;
            }

            // Update in-memory state
            this.raft.lastAppliedTerm = logEntry.getTerm();
            assert logEntry.getIndex() == this.raft.lastAppliedIndex + 1;
            this.raft.lastAppliedIndex = logEntry.getIndex();
            logEntry.applyConfigChange(this.raft.lastAppliedConfig);
            assert this.raft.currentConfig.equals(this.raft.buildCurrentConfig());

            // Delete the log entry
            this.raft.raftLog.remove(0);
            Util.delete(logEntry.getFile(), "applied log file");
        }
    }

    /**
     * Determine whether the given log entry may be applied to the state machine.
     */
    boolean mayApplyLogEntry(LogEntry logEntry) {
        return true;
    }

    /**
     * Trigger any key watches for changes in log entries committed since the last time we checked.
     *
     * 

* This should be invoked: *

    *
  • After advancing the commitIndex
  • *
  • After resetting the state machine
  • *
  • After installing a snapshot
  • *
*/ void triggerKeyWatches() { // Sanity check assert Thread.holdsLock(this.raft); assert this.raft.commitIndex >= this.raft.lastAppliedIndex; assert this.raft.commitIndex <= this.raft.lastAppliedIndex + this.raft.raftLog.size(); assert this.raft.keyWatchIndex <= this.raft.commitIndex; // If nobody is watching, don't bother if (this.raft.keyWatchTracker == null) return; // If we have recevied a snapshot install, we may not be able to tell which keys have changed since last notification; // in that case, trigger all key watches; otherwise, trigger the keys affected by newly committed log entries if (this.raft.keyWatchIndex < this.raft.lastAppliedIndex) { this.raft.keyWatchTracker.triggerAll(); this.raft.keyWatchIndex = this.raft.commitIndex; } else { while (this.raft.keyWatchIndex < this.raft.commitIndex) this.raft.keyWatchTracker.trigger(this.raft.getLogEntryAtIndex(++this.raft.keyWatchIndex).getWrites()); } } // Transactions /** * Check a transaction that is ready to be committed (in the {@link TxState#COMMIT_READY} state). * *

* This should be invoked: *

    *
  • After changing roles
  • *
  • After a transaction has entered the {@link TxState#COMMIT_READY} state
  • *
  • After the leader is newly known (in {@link FollowerRole})
  • *
  • After the leader's output queue goes from non-empty to empty (in {@link FollowerRole})
  • *
  • After the leader's {@code commitIndex} has advanced, in case a config change transaction * is waiting on a previous config change transaction (in {@link LeaderRole})
  • *
* * @param tx the transaction * @throws KVTransactionException if an error occurs */ void checkReadyTransaction(RaftKVTransaction tx) { // Sanity check assert Thread.holdsLock(this.raft); assert tx.getState().equals(TxState.COMMIT_READY); // Get transaction mutations final Writes writes = tx.getMutableView().getWrites(); final String[] configChange = tx.getConfigChange(); // Determine whether transaction is truly read-only final boolean readOnly = tx.isReadOnly() || (writes.isEmpty() && configChange == null); // Check whether we can commit the transaction immediately if (readOnly && !tx.getConsistency().isWaitsForLogEntryToBeCommitted()) { // i.e., UNCOMMITTED if (this.log.isTraceEnabled()) this.trace("trivial commit for read-only, " + tx.getConsistency() + " " + tx); this.raft.succeed(tx); return; } // Check whether we don't need to bother talking to the leader if (readOnly && !tx.getConsistency().isGuaranteesUpToDateReads()) { // i.e., EVENTUAL, EVENTUAL_COMMITTED this.advanceReadyTransaction(tx, tx.getBaseTerm(), tx.getBaseIndex()); return; } // Requires leader communication - let subclass handle it this.checkReadyLeaderTransaction(tx, readOnly); } /** * Check a transaction that is ready to be committed (in the {@link TxState#COMMIT_READY} state) * and requires communication with the leader. * *

* This will not be invoked unless the transaction is read/write or the consistency level provides up-to-date reads. * * @param tx the transaction * @param readOnly if transaction is read-only * @throws KVTransactionException if an error occurs */ abstract void checkReadyLeaderTransaction(RaftKVTransaction tx, boolean readOnly); /** * Advance a transaction from the {@link TxState#COMMIT_READY} state to the {@link TxState#COMMIT_WAITING} state. * * @param tx the transaction * @param commitTerm term of log entry that must be committed before the transaction may succeed * @param commitIndex index of log entry that must be committed before the transaction may succeed */ void advanceReadyTransaction(RaftKVTransaction tx, long commitTerm, long commitIndex) { // Sanity check assert Thread.holdsLock(this.raft); assert tx.getState().equals(TxState.COMMIT_READY); // Set commit term & index and update state if (this.log.isDebugEnabled()) this.debug("advancing " + tx + " to " + TxState.COMMIT_WAITING + " with commit " + commitIndex + "t" + commitTerm); tx.setCommitTerm(commitTerm); tx.setCommitIndex(commitIndex); tx.setState(TxState.COMMIT_WAITING); // Discard information we no longer need tx.getMutableView().disableReadTracking(); // Check this transaction to see if it can be committed this.raft.requestService(this.checkWaitingTransactionsService); } /** * Check a transaction waiting for its log entry to be committed (in the {@link TxState#COMMIT_WAITING} state). * *

* This should be invoked: *

    *
  • After changing roles
  • *
  • After a transaction has entered the {@link TxState#COMMIT_WAITING} state
  • *
  • After advancing my {@code commitIndex} (as leader or follower)
  • *
  • After receiving an updated {@linkplain AppendResponse#getLeaderLeaseTimeout leader lease timeout} * (in {@link FollowerRole})
  • *
* * @param tx the transaction * @throws KVTransactionException if an error occurs */ void checkWaitingTransaction(RaftKVTransaction tx) { // Sanity check assert Thread.holdsLock(this.raft); // Handle the case the transaction's committed log index has already been applied to the state machine final long commitIndex = tx.getCommitIndex(); if (commitIndex < this.raft.lastAppliedIndex) { // This can happen if we lose contact and by the time we're back the log entry has // already been applied to the state machine on some leader and that leader sent // use an InstallSnapshot message. We don't know whether it actually got committed // or not, so the transaction must be retried. throw new RetryTransactionException(tx, "committed log entry was missed"); } // Has the transaction's log entry been received and committed yet? if (commitIndex > this.raft.commitIndex) return; // Verify the term of the committed log entry; if not what we expect, the log entry was overwritten by a new leader final long commitTerm = tx.getCommitTerm(); if (this.raft.getLogTermAtIndex(commitIndex) != commitTerm) throw new RetryTransactionException(tx, "leader was deposed during commit and transaction's log entry overwritten"); // Check with subclass if (!this.mayCommit(tx)) return; // Transaction is officially committed now if (this.log.isTraceEnabled()) this.trace("commit successful for " + tx + " (commit index " + this.raft.commitIndex + " >= " + commitIndex + ")"); this.raft.succeed(tx); } boolean mayCommit(RaftKVTransaction tx) { return true; } /** * Perform any role-specific transaction cleanups. * *

* Invoked either when transaction is closed or this role is being shutdown. * *

* The implementation in {@link Role} does nothing; subclasses should override if appropriate. * * @param tx the transaction */ void cleanupForTransaction(RaftKVTransaction tx) { assert Thread.holdsLock(this.raft); } // Messages // This is a package access version of "implements MessageSwitch" abstract void caseAppendRequest(AppendRequest msg); abstract void caseAppendResponse(AppendResponse msg); abstract void caseCommitRequest(CommitRequest msg); abstract void caseCommitResponse(CommitResponse msg); abstract void caseGrantVote(GrantVote msg); abstract void caseInstallSnapshot(InstallSnapshot msg); abstract void caseRequestVote(RequestVote msg); void casePingRequest(PingRequest msg) { assert Thread.holdsLock(this.raft); final int responseClusterId = this.raft.clusterId != 0 ? this.raft.clusterId : msg.getClusterId(); this.raft.sendMessage(new PingResponse(responseClusterId, this.raft.identity, msg.getSenderId(), this.raft.currentTerm, msg.getTimestamp())); } void casePingResponse(PingResponse msg) { assert Thread.holdsLock(this.raft); // ignore by default } boolean mayAdvanceCurrentTerm(Message msg) { return true; } void failUnexpectedMessage(Message msg) { this.warn("rec'd unexpected message " + msg + " while in role " + this + "; ignoring"); } // Debug boolean checkState() { return true; } // Logging void trace(String msg, Throwable t) { this.raft.trace(msg, t); } void trace(String msg) { this.raft.trace(msg); } void debug(String msg, Throwable t) { this.raft.debug(msg, t); } void debug(String msg) { this.raft.debug(msg); } void info(String msg, Throwable t) { this.raft.info(msg, t); } void info(String msg) { this.raft.info(msg); } void warn(String msg, Throwable t) { this.raft.warn(msg, t); } void warn(String msg) { this.raft.warn(msg); } void error(String msg, Throwable t) { this.raft.error(msg, t); } void error(String msg) { this.raft.error(msg); } // Object @Override public abstract String toString(); String toStringPrefix() { assert Thread.holdsLock(this.raft); return this.getClass().getSimpleName() + "[term=" + this.raft.currentTerm + ",applied=" + this.raft.lastAppliedIndex + "t" + this.raft.lastAppliedTerm + ",commit=" + this.raft.commitIndex + ",log=" + this.raft.raftLog + "]"; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy