com.hazelcast.cp.internal.raft.impl.persistence.RaftStateStore Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-jdbc Show documentation
Hazelcast JDBC Driver
The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.cp.internal.raft.impl.persistence;

import com.hazelcast.config.cp.RaftAlgorithmConfig;
import com.hazelcast.cp.internal.raft.impl.RaftEndpoint;
import com.hazelcast.cp.internal.raft.impl.log.LogEntry;
import com.hazelcast.cp.internal.raft.impl.log.SnapshotEntry;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;

/**
 * Defines the contract of the Raft storage.
 */
public interface RaftStateStore extends Closeable {

    /**
     * Initializes the store before starting to persist Raft state. This method
     * is called before any other method in this interface. After this method
     * returns, the state store must be ready to accept all other method calls.
     */
    void open() throws IOException;

    /**
     * Persists the given local Raft endpoint and initial Raft group members.
     * When this method returns, all the provided data has become durable.
     */
    void persistInitialMembers(
            @Nonnull RaftEndpoint localMember, @Nonnull Collection initialMembers
    ) throws IOException;

    /**
     * Persists the term and the Raft endpoint that the local node voted for in
     * the given term. When this method returns, all the provided data has
     * become durable.
     */
    void persistTerm(int term, @Nullable RaftEndpoint votedFor) throws IOException;

    /**
     * Persists the given log entry.
     * 
     * Log entries are appended to the Raft log with sequential log indices.
     * The first log index is 1.
     * 

     * A block of consecutive log entries has no gaps in the indices, but a
     * gap can appear between the snapshot entry and its preceding regular
     * entry. This happens in an edge case where a follower has fallen so far
     * behind that the missing entries are no longer available from the leader.
     * In that case the leader will send its snapshot entry instead.
     * 

     * In a rare failure scenario Raft must delete a range of the newest
     * entries, rolling back the index of the next persisted entry. Consider
     * the following case where Raft persists three log entries and then
     * deletes entries from index 2:
     * 

     *      persistEntry(1)
     *     
 persistEntry(2)
     *     
 persistEntry(3)
     *     
 deleteEntriesFrom(2)
     * 
     * After this call sequence log indices will remain sequential
     * and the next persistEntry() call will be for index=2.
     *
     * @see #flushLogs()
     * @see #persistSnapshot(SnapshotEntry)
     * @see #deleteEntriesFrom(long)
     * @see RaftAlgorithmConfig
     */
    void persistEntry(@Nonnull LogEntry entry) throws IOException;

    /**
     * Persists the given snapshot entry.
     * 
     * After a snapshot is persisted at index=i and {@link #flushLogs()}
     * is called, the log entry at index=i and all the preceding
     * entries are no longer needed and can be evicted from storage. Failing to
     * evict stale entries will not cause a consistency problem, but it will
     * increase the time to recover after a restart. Therefore eviction can be
     * done in a background task.
     * 

     * Raft takes snapshots at a predetermined interval, controlled by {@link
     * RaftAlgorithmConfig#getCommitIndexAdvanceCountToSnapshot()
     * commitIndexAdvanceCountToSnapshot}. For instance, if it is 100, snapshots
     * will occur at indices 100, 200, 300, and so on.
     * 

     * The snapshot index can lag behind the index of the newest log entry that
     * was already persisted, but there is an upper bound to this difference,
     * controlled by {@link RaftAlgorithmConfig#getUncommittedEntryCountToRejectNewAppends()
     * uncommittedEntryCountToRejectNewAppends}. For instance, if {@code
     * uncommittedEntryCountToRejectNewAppends} is 10, and a {@code
     * persistSnapshot()} call is made with snapshotIndex=100, the index of the
     * preceding {@code persistEntry()} call can be at most 110.
     * 

     * On the other hand, the snapshot index can also be ahead of the newest log
     * entry. This can happen when a Raft follower has fallen so far behind the
     * leader that the leader no longer holds the missing entries. In that case
     * the follower receives a snapshot from the leader. There is no upper bound
     * on the gap between the newest log entry and the index of the received
     * snapshot.
     * 
     *
     * @see #flushLogs()
     * @see #persistEntry(LogEntry)
     * @see RaftAlgorithmConfig
     */
    void persistSnapshot(@Nonnull SnapshotEntry entry) throws IOException;

    /**
     * Rolls back the log by deleting all entries starting with the given index.
     * A deleted log entry is no longer valid and must not be restored (or at
     * least must be ignored during the restore process).
     * 
     * There is a bound on the number of entries that can be deleted, specified
     * by {@link RaftAlgorithmConfig#getUncommittedEntryCountToRejectNewAppends()
     * uncommittedEntryCountToRejectNewAppends}. Say that it is 5 and the
     * highest persisted log entry index is 20. At most 5 newest entries can be
     * deleted, hence deletion can start at index=16 or higher.
     *
     * @see #flushLogs()
     * @see #persistEntry(LogEntry)
     * @see RaftAlgorithmConfig
     */
    void deleteEntriesFrom(long startIndexInclusive) throws IOException;

    /**
     * Forces all buffered (in any layer) Raft log changes to be written
     * to the storage layer and returns after those changes are written.
     *
     * @see #persistEntry(LogEntry)
     * @see #persistSnapshot(SnapshotEntry)
     * @see #deleteEntriesFrom(long)
     */
    void flushLogs() throws IOException;

}