All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.journalkeeper.core.server.AbstractServer Maven / Gradle / Ivy

There is a newer version: 0.1.11
Show newest version
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.journalkeeper.core.server;

import io.journalkeeper.base.ReplicableIterator;
import io.journalkeeper.base.Serializer;
import io.journalkeeper.core.api.ClusterConfiguration;
import io.journalkeeper.core.api.JournalEntry;
import io.journalkeeper.core.api.JournalEntryParser;
import io.journalkeeper.core.api.RaftServer;
import io.journalkeeper.core.api.StateFactory;
import io.journalkeeper.core.api.StateResult;
import io.journalkeeper.core.entry.internal.InternalEntriesSerializeSupport;
import io.journalkeeper.core.entry.internal.InternalEntryType;
import io.journalkeeper.core.entry.internal.LeaderAnnouncementEntry;
import io.journalkeeper.core.entry.internal.ReservedPartition;
import io.journalkeeper.core.entry.internal.ScalePartitionsEntry;
import io.journalkeeper.core.entry.internal.UpdateVotersS1Entry;
import io.journalkeeper.core.entry.internal.UpdateVotersS2Entry;
import io.journalkeeper.core.exception.JournalException;
import io.journalkeeper.core.exception.RecoverException;
import io.journalkeeper.core.journal.Journal;
import io.journalkeeper.core.journal.JournalSnapshot;
import io.journalkeeper.core.metric.DummyMetric;
import io.journalkeeper.core.state.ConfigState;
import io.journalkeeper.core.state.JournalKeeperState;
import io.journalkeeper.core.state.StateQueryResult;
import io.journalkeeper.core.strategy.DefaultJournalCompactionStrategy;
import io.journalkeeper.core.strategy.JournalCompactionStrategy;
import io.journalkeeper.exceptions.IndexOverflowException;
import io.journalkeeper.exceptions.IndexUnderflowException;
import io.journalkeeper.exceptions.NoSuchSnapshotException;
import io.journalkeeper.metric.JMetric;
import io.journalkeeper.metric.JMetricFactory;
import io.journalkeeper.metric.JMetricSupport;
import io.journalkeeper.persistence.BufferPool;
import io.journalkeeper.persistence.MetadataPersistence;
import io.journalkeeper.persistence.PersistenceFactory;
import io.journalkeeper.persistence.ServerMetadata;
import io.journalkeeper.rpc.client.AddPullWatchResponse;
import io.journalkeeper.rpc.client.ConvertRollRequest;
import io.journalkeeper.rpc.client.ConvertRollResponse;
import io.journalkeeper.rpc.client.GetServersResponse;
import io.journalkeeper.rpc.client.PullEventsRequest;
import io.journalkeeper.rpc.client.PullEventsResponse;
import io.journalkeeper.rpc.client.QueryStateRequest;
import io.journalkeeper.rpc.client.QueryStateResponse;
import io.journalkeeper.rpc.client.RemovePullWatchRequest;
import io.journalkeeper.rpc.client.RemovePullWatchResponse;
import io.journalkeeper.rpc.server.GetServerEntriesRequest;
import io.journalkeeper.rpc.server.GetServerEntriesResponse;
import io.journalkeeper.rpc.server.GetServerStateRequest;
import io.journalkeeper.rpc.server.GetServerStateResponse;
import io.journalkeeper.rpc.server.ServerRpc;
import io.journalkeeper.rpc.server.ServerRpcAccessPoint;
import io.journalkeeper.utils.ThreadSafeFormat;
import io.journalkeeper.utils.event.Event;
import io.journalkeeper.utils.event.EventBus;
import io.journalkeeper.utils.event.EventType;
import io.journalkeeper.utils.event.EventWatcher;
import io.journalkeeper.utils.spi.ServiceLoadException;
import io.journalkeeper.utils.spi.ServiceSupport;
import io.journalkeeper.utils.threads.AsyncLoopThread;
import io.journalkeeper.utils.threads.ThreadBuilder;
import io.journalkeeper.utils.threads.Threads;
import io.journalkeeper.utils.threads.ThreadsFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

import static io.journalkeeper.core.api.RaftJournal.INTERNAL_PARTITION;
import static io.journalkeeper.core.api.RaftJournal.RESERVED_PARTITIONS_START;
import static io.journalkeeper.core.server.ThreadNames.FLUSH_JOURNAL_THREAD;
import static io.journalkeeper.core.server.ThreadNames.PRINT_METRIC_THREAD;
import static io.journalkeeper.core.server.ThreadNames.STATE_MACHINE_THREAD;
import static io.journalkeeper.core.transaction.JournalTransactionManager.TRANSACTION_PARTITION_COUNT;
import static io.journalkeeper.core.transaction.JournalTransactionManager.TRANSACTION_PARTITION_START;

// TODO: Add an UUID for each server instance,
//  so multiple server instance of one process
//  can be identified by UUID in logs and threads dump.

/**
 * Server就是集群中的节点,它包含了存储在Server上日志(journal),一组快照(snapshots[])和一个状态机(stateMachine)实例。
 * @author LiYue
 * Date: 2019-03-14
 */
public abstract class AbstractServer
        implements ServerRpc, RaftServer, ServerRpcProvider, MetricProvider {
    private static final Logger logger = LoggerFactory.getLogger(AbstractServer.class);

    private static final String STATE_PATH = "state";
    private static final String SNAPSHOTS_PATH = "snapshots";
    private static final String METADATA_PATH = "metadata";
    private static final String METADATA_FILE = "metadata";
    private static final String PARTIAL_SNAPSHOT_PATH = "partial_snapshot";
    private static final int COMPACT_PERIOD_SEC = 60;
    @Override
    public URI serverUri() {
        return uri;
    }

    /**
     * 节点上的最新状态 和 被状态机执行的最大日志条目的索引值(从 0 开始递增)
     */
    protected final JournalKeeperState state;

    protected final ScheduledExecutorService scheduledExecutor;

    protected final ExecutorService asyncExecutor;
    /**
     * 心跳间隔、选举超时等随机时间的随机范围
     */
    public final static float RAND_INTERVAL_RANGE = 0.5F;

    /**
     * 当前Server URI
     */
    protected URI uri;
    /**
     * 存放日志
     */
    protected Journal journal;
    /**
     * 存放节点上所有状态快照的稀疏数组,数组的索引(key)就是快照对应的日志位置的索引
     */
    protected final NavigableMap> snapshots = new ConcurrentSkipListMap<>();

    /**
     * 当前LEADER节点地址
     */
    protected URI leaderUri;

    /**
     * 观察者节点
     */
    protected List observers;
    protected final PartialSnapshot partialSnapshot;
    private final Map snapshotIteratorMap = new ConcurrentHashMap<>();

    /**
     * 可用状态
     */
    private boolean available = false;

    /**
     * 持久化实现接入点
     */
    protected PersistenceFactory persistenceFactory;
    /**
     * 元数据持久化服务
     */
    protected MetadataPersistence metadataPersistence;


    protected void enable(){
        this.available = true;
    }

    protected void disable() {
        this.available = false;
    }

    protected boolean isAvailable() {
        return available;
    }

    protected final Serializer entrySerializer;
    protected final Serializer entryResultSerializer;
    protected final Serializer querySerializer;
    protected final Serializer resultSerializer;

    protected final BufferPool bufferPool;

    protected ServerRpcAccessPoint serverRpcAccessPoint;

    protected final Map remoteServers = new HashMap<>();
    private Config config;

    private volatile ServerState serverState = ServerState.CREATED;
    /**
     * 上次保存的元数据
     */
    private ServerMetadata lastSavedServerMetadata = null;

    protected final EventBus eventBus;

    protected final Threads threads = ThreadsFactory.create();

    private final JMetricFactory metricFactory;
    private final Map metricMap;
    private final static JMetric DUMMY_METRIC = new DummyMetric();

    private final static String METRIC_APPLY_ENTRIES = "APPLY_ENTRIES";

    private final JMetric applyEntriesMetric;

    protected final Properties properties;
    protected final StateFactory stateFactory;
    protected final JournalEntryParser journalEntryParser;
    protected final VoterConfigManager voterConfigManager;
    private final AtomicInteger nextSnapshotIteratorId = new AtomicInteger();
    private ScheduledFuture flushStateFuture;
    private ScheduledFuture compactJournalFuture;

    private AsyncLoopThread buildStateMachineThread() {
        return ThreadBuilder.builder()
                .name(STATE_MACHINE_THREAD)
                .doWork(this::applyEntries)
                .sleepTime(50,100)
                .onException(e -> logger.warn("{} Exception: ", STATE_MACHINE_THREAD, e))
                .daemon(true)
                .build();
    }


    private AsyncLoopThread buildFlushJournalThread() {
        return ThreadBuilder.builder()
                .name(FLUSH_JOURNAL_THREAD)
                .doWork(this::flushJournal)
                .sleepTime(config.getFlushIntervalMs(), config.getFlushIntervalMs())
                .onException(e -> logger.warn("{} Exception: ", FLUSH_JOURNAL_THREAD, e))
                .daemon(true)
                .build();
    }

    private AsyncLoopThread buildPrintMetricThread() {
        return ThreadBuilder.builder()
                .name(PRINT_METRIC_THREAD)
                .doWork(this::printMetrics)
                .sleepTime(config.getPrintMetricIntervalSec() * 1000, config.getPrintMetricIntervalSec() * 1000)
                .onException(e -> logger.warn("{} Exception: ", PRINT_METRIC_THREAD, e))
                .daemon(true)
                .build();
    }

    @Override
    public synchronized void init(URI uri, List voters, Set userPartitions, URI preferredLeader) throws IOException {
        ReservedPartition.validatePartitions(userPartitions);
        this.uri = uri;
        Set partitions = new HashSet<>(userPartitions);
        partitions.add(INTERNAL_PARTITION);
        partitions.addAll(IntStream.range(TRANSACTION_PARTITION_START, TRANSACTION_PARTITION_START + TRANSACTION_PARTITION_COUNT).boxed().collect(Collectors.toSet()));
        state.init(statePath(),voters, partitions, preferredLeader);
        createFistSnapshot(voters, partitions, preferredLeader);
        lastSavedServerMetadata = createServerMetadata();
        metadataPersistence.save(metadataFile(),lastSavedServerMetadata);
    }

    private JournalCompactionStrategy journalCompactionStrategy;

    @Override
    public boolean isInitialized() {
        try {
            ServerMetadata  metadata = metadataPersistence.load(metadataFile(), ServerMetadata.class);
            return metadata != null && metadata.isInitialized();
        } catch (Exception e) {
            return false;
        }
    }

    int getTerm(long index) {
        try {
            return journal.getTerm(index);
        } catch (IndexUnderflowException e) {
            if(index  + 1 == snapshots.firstKey()) {
                return snapshots.firstEntry().getValue().lastIncludedTerm();
            } else {
                throw e;
            }
        }
    }

    protected AbstractServer(StateFactory stateFactory, Serializer entrySerializer,
                             Serializer entryResultSerializer, Serializer querySerializer,
                             Serializer resultSerializer,
                             JournalEntryParser journalEntryParser, ScheduledExecutorService scheduledExecutor,
                             ExecutorService asyncExecutor, ServerRpcAccessPoint serverRpcAccessPoint, Properties properties) {
        this.journalEntryParser = journalEntryParser;
        this.scheduledExecutor = scheduledExecutor;
        this.asyncExecutor = asyncExecutor;
        this.config = toConfig(properties);
        this.threads.createThread(buildStateMachineThread());
        this.threads.createThread(buildFlushJournalThread());
        this.entrySerializer = entrySerializer;
        this.querySerializer = querySerializer;
        this.resultSerializer = resultSerializer;
        this.entryResultSerializer = entryResultSerializer;
        this.serverRpcAccessPoint = serverRpcAccessPoint;
        this.properties = properties;
        this.stateFactory = stateFactory;
        this.voterConfigManager = new VoterConfigManager(journalEntryParser);

        try {
            journalCompactionStrategy = ServiceSupport.load(JournalCompactionStrategy.class);
        } catch (ServiceLoadException ignored) {
            journalCompactionStrategy = new DefaultJournalCompactionStrategy(config.getJournalRetentionMin());
        }
        logger.info("Using JournalCompactionStrategy: {}.", journalCompactionStrategy.getClass().getCanonicalName());
        // init metrics
        if (config.isEnableMetric()) {
            this.metricFactory = ServiceSupport.load(JMetricFactory.class);
            this.metricMap = new ConcurrentHashMap<>();
            if (config.getPrintMetricIntervalSec() > 0) {
                this.threads.createThread(buildPrintMetricThread());
            }
        } else {
            this.metricFactory = null;
            this.metricMap = null;
        }
        applyEntriesMetric = getMetric(METRIC_APPLY_ENTRIES);


        this.eventBus = new EventBus(config.getRpcTimeoutMs());
        persistenceFactory = ServiceSupport.load(PersistenceFactory.class);
        metadataPersistence = persistenceFactory.createMetadataPersistenceInstance();
        bufferPool = ServiceSupport.load(BufferPool.class);
        journal = new Journal(
                persistenceFactory,
                bufferPool, journalEntryParser);
        this.state = new JournalKeeperState<>(stateFactory, metadataPersistence);

        this.partialSnapshot = new PartialSnapshot(partialSnapshotPath());
        state.addInterceptor(InternalEntryType.TYPE_SCALE_PARTITIONS, this::scalePartitions);
        state.addInterceptor(InternalEntryType.TYPE_LEADER_ANNOUNCEMENT, this::announceLeader);
        state.addInterceptor(InternalEntryType.TYPE_CREATE_SNAPSHOT, this::createSnapShot);
    }

    protected Path workingDir() {
        return config.getWorkingDir();
    }

    protected Path snapshotsPath() {
        return workingDir().resolve(SNAPSHOTS_PATH);
    }

    private void createFistSnapshot(List voters, Set partitions, URI preferredLeader) throws IOException {
        JournalKeeperState snapshot = new JournalKeeperState<>(stateFactory, metadataPersistence);
        snapshot.init(snapshotsPath().resolve(String.valueOf(0L)), voters, partitions, preferredLeader);
        ;
    }

    protected Path statePath() {
        return workingDir().resolve(STATE_PATH);
    }
    protected Path metadataFile() {
        return workingDir().resolve(METADATA_PATH).resolve(METADATA_FILE);
    }

    /**
     * 监听属性commitIndex的变化,
     * 当commitIndex变更时如果commitIndex > lastApplied,
     * 反复执行applyEntries直到lastApplied == commitIndex:
     *
     * 1. 如果需要,复制当前状态为新的快照保存到属性snapshots, 索引值为lastApplied。
     * 2. lastApplied自增,将log[lastApplied]应用到状态机,更新当前状态state;
     *
     */
    private void applyEntries()  {
        while (this.serverState == ServerState.RUNNING && state.lastApplied() < journal.commitIndex()) {
            applyEntriesMetric.start();

            JournalEntry journalEntry = journal.read(state.lastApplied());
            StateResult stateResult = state.applyEntry(journalEntry, entrySerializer, journal);
            afterStateChanged(stateResult.getUserResult());

            Map parameters = new HashMap<>(stateResult.getEventData().size() + 1);
            stateResult.getEventData().forEach(parameters::put);
            parameters.put("lastApplied", String.valueOf(state.lastApplied()));
            fireEvent(EventType.ON_STATE_CHANGE, parameters);
            applyEntriesMetric.end(journalEntry.getLength());
        }
    }

    private void fireOnLeaderChangeEvent(int term) {
        Map eventData = new HashMap<>();
        eventData.put("leader", String.valueOf(this.leaderUri));
        eventData.put("term", String.valueOf(term));
        fireEvent(EventType.ON_LEADER_CHANGE, eventData);
    }

    private void announceLeader(InternalEntryType type, byte [] internalEntry) {
        LeaderAnnouncementEntry leaderAnnouncementEntry = InternalEntriesSerializeSupport.parse(internalEntry);
        fireOnLeaderChangeEvent(leaderAnnouncementEntry.getTerm());
    }

    private void scalePartitions(InternalEntryType type, byte [] internalEntry) {
        ScalePartitionsEntry scalePartitionsEntry = InternalEntriesSerializeSupport.parse(internalEntry);
        Set partitions = scalePartitionsEntry.getPartitions();
        try {
            Set currentPartitions = journal.getPartitions();
            currentPartitions.removeIf(p -> p >= RESERVED_PARTITIONS_START);

            for (int partition : partitions) {
                if (!currentPartitions.contains(partition)) {
                    journal.addPartition(partition);
                }
            }

            List toBeRemoved = new ArrayList<>();
            for (Integer partition: currentPartitions) {
                if (!partitions.contains(partition)) {
                    toBeRemoved.add(partition);
                }
            }
            for (Integer partition : toBeRemoved) {
                journal.removePartition(partition);
            }

            logger.info("Journal repartitioned, partitions: {}, path: {}.",
                    journal.getPartitions(), journalPath().toAbsolutePath().toString());
        } catch (IOException e) {
            throw new JournalException(e);
        }
    }

    protected void fireEvent(int eventType, Map eventData) {
        eventBus.fireEvent(new Event(eventType, eventData));
    }

    /**
     * 当状态变化后触发事件
     * @param updateResult 状态机执行结果
     */
    protected void afterStateChanged(ER updateResult) {}
    /**
     * 如果需要,保存一次快照
     */

    @Override
    public CompletableFuture queryServerState(QueryStateRequest request) {
        return CompletableFuture.supplyAsync(() -> {
            try {
                StateQueryResult queryResult = state.query(querySerializer.parse(request.getQuery()), journal);
                return new QueryStateResponse(resultSerializer.serialize(queryResult.getResult()), queryResult.getLastApplied());
            } catch (Throwable throwable) {
                return new QueryStateResponse(throwable);
            }
        }, asyncExecutor);
    }

    /**
     * 如果请求位置存在对应的快照,直接从快照中读取状态返回;如果请求位置不存在对应的快照,那么需要找到最近快照日志,以这个最近快照日志对应的快照为输入,从最近快照日志开始(不含)直到请求位置(含)依次在状态机中执行这些日志,执行完毕后得到的快照就是请求位置的对应快照,读取这个快照的状态返回给客户端即可。
     * 实现流程:
     *
     * 对比logIndex与在属性snapshots数组的上下界,检查请求位置是否越界,如果越界返回INDEX_OVERFLOW/INDEX_UNDERFLOW错误。
     * 查询snapshots[logIndex]是否存在,如果存在快照中读取状态返回,否则下一步;
     * 找到snapshots中距离logIndex最近且小于logIndex的快照位置和快照,记为nearestLogIndex和nearestSnapshot;
     * 从log中的索引位置nearestLogIndex + 1开始,读取N条日志,N = logIndex - nearestLogIndex获取待执行的日志数组execLogs[];
     * 调用以nearestSnapshot为输入,依次在状态机stateMachine中执行execLogs,得到logIndex位置对应的快照,从快照中读取状态返回。
     */
    @Override
    public CompletableFuture querySnapshot(QueryStateRequest request) {
        return CompletableFuture.supplyAsync(() -> {

            try {
                if (request.getIndex() > state.lastApplied()) {
                    throw new IndexOverflowException();
                }

                if (request.getIndex() == state.lastApplied()) {
                    StateQueryResult queryResult = state.query(querySerializer.parse(request.getQuery()), journal);
                    if(queryResult.getLastApplied() == request.getIndex()) {
                        return new QueryStateResponse(resultSerializer.serialize(queryResult.getResult()), queryResult.getLastApplied());
                    }
                }

                JournalKeeperState snapshot;
                Map.Entry> nearestSnapshot = snapshots.floorEntry(request.getIndex());
                if (null == nearestSnapshot) {
                    throw new IndexUnderflowException();
                }

                if(request.getIndex() == nearestSnapshot.getKey()) {
                    snapshot = nearestSnapshot.getValue();
                } else {
                    snapshot = new JournalKeeperState<>(stateFactory, metadataPersistence);
                    Path tempSnapshotPath = snapshotsPath().resolve(String.valueOf(request.getIndex()));
                    if (Files.exists(tempSnapshotPath)) {
                        throw new ConcurrentModificationException(String.format("A snapshot of position %d is creating, please retry later.", request.getIndex()));
                    }
                    nearestSnapshot.getValue().dump(tempSnapshotPath);
                    snapshot.recover(tempSnapshotPath, properties);

                    while (snapshot.lastApplied() < request.getIndex()) {
                        snapshot.applyEntry(journal.read(snapshot.lastApplied()), entrySerializer, journal);
                    }
                    snapshot.flush();

                    snapshots.putIfAbsent(request.getIndex(), snapshot);
                }
                return new QueryStateResponse(resultSerializer.serialize(snapshot.query(querySerializer.parse(request.getQuery()), journal).getResult()));
            } catch (Throwable throwable) {
                return new QueryStateResponse(throwable);
            }
        }, asyncExecutor);
    }

    private void createSnapShot(InternalEntryType type, byte [] internalEntry) {
        if (type == InternalEntryType.TYPE_CREATE_SNAPSHOT) {
            createSnapshot();
        }
    }

    private Config toConfig(Properties properties) {
        Config config = new Config();
        config.setSnapshotIntervalSec(Integer.parseInt(
                properties.getProperty(
                        Config.SNAPSHOT_INTERVAL_SEC_KEY,
                        String.valueOf(Config.DEFAULT_SNAPSHOT_INTERVAL_SEC))));
        config.setJournalRetentionMin(Integer.parseInt(
                properties.getProperty(
                        Config.JOURNAL_RETENTION_MIN_KEY,
                        String.valueOf(Config.DEFAULT_JOURNAL_RETENTION_MIN))));
        config.setRpcTimeoutMs(Long.parseLong(
                properties.getProperty(
                        Config.RPC_TIMEOUT_MS_KEY,
                        String.valueOf(Config.DEFAULT_RPC_TIMEOUT_MS))));
        config.setFlushIntervalMs(Long.parseLong(
                properties.getProperty(
                        Config.FLUSH_INTERVAL_MS_KEY,
                        String.valueOf(Config.DEFAULT_FLUSH_INTERVAL_MS))));

        config.setWorkingDir(Paths.get(
                properties.getProperty(Config.WORKING_DIR_KEY,
                        config.getWorkingDir().normalize().toString())));

        config.setGetStateBatchSize(Integer.parseInt(
                properties.getProperty(
                        Config.GET_STATE_BATCH_SIZE_KEY,
                        String.valueOf(Config.DEFAULT_GET_STATE_BATCH_SIZE))));

        config.setEnableMetric(Boolean.parseBoolean(
                properties.getProperty(
                        Config.ENABLE_METRIC_KEY,
                        String.valueOf(Config.DEFAULT_ENABLE_METRIC))));

        config.setPrintMetricIntervalSec(Integer.parseInt(
                properties.getProperty(
                        Config.PRINT_METRIC_INTERVAL_SEC_KEY,
                        String.valueOf(Config.DEFAULT_PRINT_METRIC_INTERVAL_SEC))));

        return config;
    }


    @Override
    public void watch(EventWatcher eventWatcher) {
        this.eventBus.watch(eventWatcher);
    }

    @Override
    public void unWatch(EventWatcher eventWatcher) {
        this.eventBus.unWatch(eventWatcher);
    }

    @Override
    public CompletableFuture addPullWatch() {
        return CompletableFuture.supplyAsync(() ->
                new AddPullWatchResponse(eventBus.addPullWatch(), eventBus.pullIntervalMs()), asyncExecutor);
    }

    @Override
    public CompletableFuture removePullWatch(RemovePullWatchRequest request) {
        return CompletableFuture
                .runAsync(() -> eventBus.removePullWatch(request.getPullWatchId()), asyncExecutor)
                .thenApply(v -> new RemovePullWatchResponse());
    }

    @Override
    public CompletableFuture pullEvents(PullEventsRequest request) {
        return CompletableFuture.supplyAsync(() -> {
            if(request.getAckSequence() >= 0 ) {
                eventBus.ackPullEvents(request.getPullWatchId(), request.getAckSequence());
            }
            return new PullEventsResponse(eventBus.pullEvents(request.getPullWatchId()));
        }, asyncExecutor);
    }

    @Override
    public CompletableFuture getServers() {
        return CompletableFuture.supplyAsync(() ->
                new GetServersResponse(
                        new ClusterConfiguration(leaderUri, state.voters(), observers)),
                asyncExecutor);
    }

    protected Path partialSnapshotPath() {
        return workingDir().resolve(PARTIAL_SNAPSHOT_PATH);
    }

    private void createSnapshot() {
        long lastApplied = state.lastApplied();
        logger.info("Creating snapshot at index: {}...", lastApplied);
        Path snapshotPath = snapshotsPath().resolve(String.valueOf(lastApplied));
        try {
            state.dump(snapshotPath);
            JournalKeeperState snapshot = new JournalKeeperState<>(stateFactory, metadataPersistence);
            snapshot.recover(snapshotPath, properties);
            snapshot.createSnapshot(journal);
            snapshots.put(snapshot.lastApplied(), snapshot);
            logger.info("Snapshot at index: {} created, {}.", lastApplied, snapshot);

        } catch (IOException e) {
            logger.warn("Create snapshot exception! Snapshot: {}.", snapshotPath, e);
        }
    }

    @Override
    public CompletableFuture getServerState(GetServerStateRequest request) {
        return CompletableFuture.supplyAsync(() -> {
            try {
                int iteratorId;
                if (request.getIteratorId() >= 0) {
                    iteratorId = request.getIteratorId();
                } else {
                    long snapshotIndex = request.getLastIncludedIndex() + 1;
                    JournalKeeperState snapshot = snapshots.get(snapshotIndex);
                    if (null != snapshot) {
                        ReplicableIterator iterator = snapshot.iterator();
                        iteratorId = nextSnapshotIteratorId.getAndIncrement();
                        snapshotIteratorMap.put(iteratorId, iterator);
                        scheduledExecutor.schedule(() -> snapshotIteratorMap.remove(iteratorId), 1, TimeUnit.MINUTES);
                    } else {
                        throw new NoSuchSnapshotException();
                    }
                }
                ReplicableIterator iterator = snapshotIteratorMap.get(iteratorId);
                if (null != iterator) {
                    return new GetServerStateResponse(
                            iterator.lastIncludedIndex(), iterator.lastIncludedTerm(),
                            iterator.offset(), iterator.nextTrunk(), iterator.hasMoreTrunks(), iteratorId
                    );
                } else {
                    throw new NoSuchSnapshotException();
                }
            } catch (Throwable t) {
                logger.warn("GetServerState exception!", t);
                return new GetServerStateResponse(t);
            }
        }, asyncExecutor).exceptionally(GetServerStateResponse::new);
    }

    @Override
    public final void start() {
        if(this.serverState != ServerState.CREATED) {
            throw new IllegalStateException("AbstractServer can only start once!");
        }
        this.serverState = ServerState.STARTING;
        doStart();
        threads.startThread(STATE_MACHINE_THREAD);
        threads.startThread(FLUSH_JOURNAL_THREAD);
        if(threads.exists(PRINT_METRIC_THREAD)) {
            threads.startThread(PRINT_METRIC_THREAD);
        }
        flushStateFuture = scheduledExecutor.scheduleAtFixedRate(this::flushState,
                ThreadLocalRandom.current().nextLong(10L, 50L),
                config.getFlushIntervalMs(), TimeUnit.MILLISECONDS);

        compactJournalFuture = scheduledExecutor.scheduleAtFixedRate(this::compactJournalPeriodically,
                ThreadLocalRandom.current().nextLong(0, COMPACT_PERIOD_SEC),
                COMPACT_PERIOD_SEC, TimeUnit.SECONDS);
        this.serverState = ServerState.RUNNING;
    }

    protected abstract void doStart();
    /**
     * 刷盘:
     * 1. 日志
     * 2. 状态
     * 3. 元数据
     */
    private void flushAll() {
        journal.flush();
        flushState();
    }

    private void flushJournal() {
        this.journal.flush();
        onJournalFlushed();
    }

    protected void onJournalFlushed() {}

    private void flushState() {
        try {
            if (state instanceof Flushable) {
                ((Flushable) state).flush();
            }
            ServerMetadata metadata = createServerMetadata();
            if (!metadata.equals(lastSavedServerMetadata)) {
                metadataPersistence.save(metadataFile(), metadata);
                lastSavedServerMetadata = metadata;
            }
        } catch(Throwable e) {
            logger.warn("Flush exception, commitIndex: {}, lastApplied: {}, server: {}: ",
                    journal.commitIndex(), state.lastApplied(), uri, e);
        }
    }

    public CompletableFuture getServerRpc(URI uri) {
        return CompletableFuture.completedFuture(
                remoteServers.computeIfAbsent(uri, uri1 -> serverRpcAccessPoint.getServerRpcAgent(uri1)));
    }

    @Override
    public final void stop() {
        try {
            this.serverState = ServerState.STOPPING;
            doStop();
            remoteServers.values().forEach(ServerRpc::stop);
            waitJournalApplied();
            threads.stopThread(STATE_MACHINE_THREAD);
            threads.stopThread(FLUSH_JOURNAL_THREAD);
            if(threads.exists(PRINT_METRIC_THREAD)) {
                threads.stopThread(PRINT_METRIC_THREAD);
            }

            stopAndWaitScheduledFeature(compactJournalFuture, 1000L);
            stopAndWaitScheduledFeature(flushStateFuture, 1000L);
            if(persistenceFactory instanceof Closeable) {
                ((Closeable) persistenceFactory).close();
            }
            flushAll();
            journal.close();
            this.serverState = ServerState.STOPPED;
        } catch (Throwable t) {
            t.printStackTrace();
            logger.warn("Exception: ", t);
        }
    }

    private void waitJournalApplied() throws InterruptedException {
        while (journal.commitIndex() < state.lastApplied()) {
            Thread.sleep(50L);
        }
    }

    protected abstract void doStop();
    protected void stopAndWaitScheduledFeature(ScheduledFuture scheduledFuture, long timeout) throws TimeoutException {
        if (scheduledFuture != null) {
            long t0 = System.currentTimeMillis();
            while (!scheduledFuture.isDone()) {
                if(System.currentTimeMillis() - t0 > timeout) {
                    throw new TimeoutException("Wait for async job timeout!");
                }
                scheduledFuture.cancel(true);
                try {
                    Thread.sleep(50);
                } catch (InterruptedException e) {
                    logger.warn("Exception: ", e);
                }
            }
        }
    }

    /**
     * 从持久化存储恢复
     * 1. 元数据
     * 2. 状态和快照
     * 3. 日志
     */
    @Override
    public synchronized void recover() throws IOException {
        lastSavedServerMetadata = metadataPersistence.load(metadataFile(), ServerMetadata.class);
        if(lastSavedServerMetadata == null || !lastSavedServerMetadata.isInitialized()) {
            throw new RecoverException(
                    String.format("Recover failed! Cause: metadata is not initialized. Metadata path: %s.",
                            metadataFile().toString()));
        }
        onMetadataRecovered(lastSavedServerMetadata);
        state.recover(statePath(), properties);
        recoverSnapshots();
        recoverJournal(state.getPartitions(), snapshots.firstEntry().getValue().getJournalSnapshot(), lastSavedServerMetadata.getCommitIndex());
        onJournalRecovered(journal);
    }

    protected void onJournalRecovered(Journal journal) {
        recoverVoterConfig();
    }



    /**
     * Check reserved entries to ensure the last UpdateVotersConfig entry is applied to the current voter config.
     */
    private void recoverVoterConfig() {
        boolean isRecoveredFromJournal = false;
        for(long index = journal.maxIndex(INTERNAL_PARTITION) - 1;
            index >= journal.minIndex(INTERNAL_PARTITION);
            index --) {
            JournalEntry entry = journal.readByPartition(INTERNAL_PARTITION, index);
            InternalEntryType type = InternalEntriesSerializeSupport.parseEntryType(entry.getPayload().getBytes());

            if(type == InternalEntryType.TYPE_UPDATE_VOTERS_S1) {
                UpdateVotersS1Entry updateVotersS1Entry = InternalEntriesSerializeSupport.parse(entry.getPayload().getBytes());
                state.setConfigState(new ConfigState(
                        updateVotersS1Entry.getConfigOld(), updateVotersS1Entry.getConfigNew()));
                isRecoveredFromJournal = true;
                break;
            } else if(type == InternalEntryType.TYPE_UPDATE_VOTERS_S2) {
                UpdateVotersS2Entry updateVotersS2Entry = InternalEntriesSerializeSupport.parse(entry.getPayload().getBytes());
                state.setConfigState(new ConfigState(updateVotersS2Entry.getConfigNew()));
                isRecoveredFromJournal = true;
                break;
            }
        }

        if (isRecoveredFromJournal) {
            logger.info("Voters config is recovered from journal.");
        } else {
            logger.info("No voters config entry found in journal, Using config in the metadata.");
        }
        logger.info(state.getConfigState().toString());
    }


    private void recoverJournal(Set partitions, JournalSnapshot journalSnapshot, long commitIndex) throws IOException {
        journal.recover(journalPath(), commitIndex, journalSnapshot,  properties);
        journal.rePartition(partitions);
    }

    private Path journalPath() {
        return workingDir();
    }
    private void recoverSnapshots() throws IOException {
        if(!Files.isDirectory(snapshotsPath())) {
            Files.createDirectories(snapshotsPath());
        }
        StreamSupport.stream(
                Files.newDirectoryStream(snapshotsPath(),
                        entry -> entry.getFileName().toString().matches("\\d+")
                    ).spliterator(), false)
                .map(path -> {
                    JournalKeeperState snapshot = new JournalKeeperState<>(stateFactory, metadataPersistence);
                    snapshot.recover(path, properties);
                    if(Long.parseLong(path.getFileName().toString()) == snapshot.lastApplied()) {
                        return snapshot;
                    } else {
                        snapshot.close();
                        return null;
                    }
                }).filter(Objects::nonNull)
                .forEach(snapshot -> snapshots.put(snapshot.lastApplied(), snapshot));
    }

    protected void onMetadataRecovered(ServerMetadata metadata) {
//        if(lastSavedServerMetadata.isJointConsensus()) {
//            votersConfigStateMachine = new ConfigState(
//                    lastSavedServerMetadata.getOldVoters(),
//                    lastSavedServerMetadata.getVoters()
//            );
//        } else {
//            votersConfigStateMachine =
//                    new ConfigState(lastSavedServerMetadata.getVoters());
//        }
        this.uri = metadata.getThisServer();

//        if(metadata.getPartitions() == null ) {
//            metadata.setPartitions(new HashSet<>());
//        }

//        if(metadata.getPartitions().isEmpty()) {
//            metadata.getPartitions().addAll(
//                    Stream.of(RaftJournal.DEFAULT_PARTITION, INTERNAL_PARTITION)
//                            .collect(Collectors.toSet())
//            );
//        }

    }



    protected ServerMetadata createServerMetadata() {
        ServerMetadata serverMetadata = new ServerMetadata();
        serverMetadata.setInitialized(true);
        serverMetadata.setThisServer(uri);
//        ConfigState config = votersConfigStateMachine.clone();
//        serverMetadata.setPartitions(journal.getPartitions());
//        serverMetadata.setVoters(config.getConfigNew());
//        serverMetadata.setOldVoters(config.getConfigOld());
//        serverMetadata.setJointConsensus(config.isJointConsensus());
        serverMetadata.setCommitIndex(journal.commitIndex());
        return serverMetadata;
    }

    protected long randomInterval(long interval) {
        return interval + Math.round(ThreadLocalRandom.current().nextDouble(-1 * RAND_INTERVAL_RANGE, RAND_INTERVAL_RANGE) * interval);
    }

    @Override
    public CompletableFuture getServerEntries(GetServerEntriesRequest request) {
        return CompletableFuture.supplyAsync(() ->
                new GetServerEntriesResponse(
                        journal.readRaw(request.getIndex(), (int) Math.min(request.getMaxSize(), state.lastApplied() - request.getIndex())),
                        journal.minIndex(), state.lastApplied()), asyncExecutor)
                .exceptionally(e -> {
                    try {
                        throw e;
                    } catch (CompletionException ce) {
                        return new GetServerEntriesResponse(ce.getCause(), journal.minIndex(), state.lastApplied());
                    } catch (Throwable throwable) {
                        return new GetServerEntriesResponse(throwable, journal.minIndex(), state.lastApplied());
                    }
                });
    }

    @Override
    public CompletableFuture convertRoll(ConvertRollRequest request) {
        throw new UnsupportedOperationException();
    }

    @Override
    public ServerState serverState() {
        return this.serverState;
    }

    protected long getRpcTimeoutMs() {
        return config.getRpcTimeoutMs();
    }

    public void compact(long indexExclusive) {
        Long index = snapshots.floorKey(indexExclusive);
        if (null != index) {
            logger.info("Request compact journal to {}, found a floor snapshot at index: {}.", indexExclusive, index);
            compactJournalToSnapshot(index);
        } else {
            logger.warn("Request compact journal to {}, no snapshot found which less than or equal {}, " +
                            "nothing to compact!",
                    indexExclusive, indexExclusive);
        }
    }

    private void compactJournalPeriodically() {
        long index = journalCompactionStrategy.calculateCompactionIndex(
                snapshots.entrySet().stream().collect(
                        Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().timestamp(),
                                (v1, v2) -> {
                                    throw new RuntimeException(String.format("Duplicate key for values %s and %s", v1, v2));
                                },
                                TreeMap::new)
                ), journal
        );

        if (index > snapshots.firstKey()) {
            compactJournalToSnapshot(index);
        }

    }

    private void compactJournalToSnapshot(long index) {
        logger.info("Compact journal to index: {}...", index);
        try {
            JournalKeeperState snapshot = snapshots.get(index);
            if (null != snapshot) {
                JournalSnapshot journalSnapshot = snapshot.getJournalSnapshot();
                logger.info("Compact journal entries, journal snapshot: {}, journal: {}...", journalSnapshot, journal);
                journal.compact(snapshot.getJournalSnapshot());
                logger.info("Compact journal finished, journal: {}.", journal);

                NavigableMap> headMap = snapshots.headMap(index, false);
                while (!headMap.isEmpty()) {
                    snapshot = headMap.remove(headMap.firstKey());
                    logger.info("Discard snapshot: {}.", snapshot.getPath());
                    snapshot.close();
                    snapshot.clear();
                }
            } else {
                logger.warn("Compact journal failed! Cause no snapshot at index: {}.", index);
            }
        } catch (Throwable e) {
            logger.warn("Compact journal exception!", e);
        }
    }

    public JournalKeeperState getState() {
        return state;
    }

    /**
     * This method returns the metric instance of given name, instance will be created if not exists.
     * if config.isEnableMetric() equals false, just return a dummy metric.
     * @param name name of the metric
     * @return the metric instance of given name.
     */
    @Override
    public JMetric getMetric(String name) {
        if(config.isEnableMetric()) {
            return metricMap.computeIfAbsent(name, metricFactory::create);
        } else {
            return DUMMY_METRIC;
        }
    }
    @Override
    public boolean isMetricEnabled() {return config.isEnableMetric();}
    @Override
    public void removeMetric(String name) {
        if(config.isEnableMetric()) {
            metricMap.remove(name);
        }
    }

    private void printMetrics() {
        metricMap.values()
            .stream()
                .map(JMetric::getAndReset)
                .map(JMetricSupport::formatNs)
                .forEach(logger::info);
        onPrintMetric();
    }


    // for monitor only

    URI getLeaderUri() {
        return leaderUri;
    }

    Journal getJournal() {
        return journal;
    }

    void installSnapshot(long offset, long lastIncludedIndex, int lastIncludedTerm, byte[] data, boolean isDone) throws IOException {
        synchronized (partialSnapshot) {
            logger.info("Install snapshot, offset: {}, lastIncludedIndex: {}, lastIncludedTerm: {}, data length: {}, isDone: {}... " +
                            "journal minIndex: {}, maxIndex: {}, commitIndex: {}...",
                    ThreadSafeFormat.formatWithComma(offset),
                    ThreadSafeFormat.formatWithComma(lastIncludedIndex),
                    lastIncludedTerm,
                    data.length,
                    isDone,
                    ThreadSafeFormat.formatWithComma(journal.minIndex()),
                    ThreadSafeFormat.formatWithComma(journal.maxIndex()),
                    ThreadSafeFormat.formatWithComma(journal.commitIndex())
            );

            JournalKeeperState snapshot;
            long lastApplied = lastIncludedIndex + 1;
            Path snapshotPath = snapshotsPath().resolve(String.valueOf(lastApplied));
            partialSnapshot.installTrunk(offset, data, snapshotPath);

            if (isDone) {
                logger.info("All snapshot files received, discard any existing snapshot with a same or smaller index...");
                // discard any existing snapshot with a same or smaller index
                NavigableMap> headMap = snapshots.headMap(lastApplied, true);
                while (!headMap.isEmpty()) {
                    snapshot = headMap.remove(headMap.firstKey());
                    logger.info("Discard snapshot: {}.", snapshot.getPath());
                    snapshot.close();
                    snapshot.clear();
                }
                logger.info("add the installed snapshot to snapshots: {}...", snapshotPath);
                partialSnapshot.finish();
                // add the installed snapshot to snapshots.
                snapshot = new JournalKeeperState<>(stateFactory, metadataPersistence);
                snapshot.recover(snapshotPath, properties);
                snapshots.put(lastApplied, snapshot);

                logger.info("New installed snapshot: {}.", snapshot.getJournalSnapshot());
                // If existing log entry has same index and term as snapshot’s
                // last included entry, retain log entries following it.
                // Discard the entire log

                logger.info("Compact journal entries, journal: {}...", journal);
                threads.stopThread(ThreadNames.FLUSH_JOURNAL_THREAD);
                if (journal.minIndex() >= lastIncludedIndex &&
                        lastIncludedIndex < journal.maxIndex() &&
                        journal.getTerm(lastIncludedIndex) == lastIncludedTerm) {
                    journal.compact(snapshot.getJournalSnapshot());

                } else {
                    journal.clear(snapshot.getJournalSnapshot());
                }
                threads.startThread(ThreadNames.FLUSH_JOURNAL_THREAD);
                logger.info("Compact journal finished, journal: {}.", journal);

                // Reset state machine using snapshot contents (and load
                // snapshot’s cluster configuration)

                logger.info("Use the new installed snapshot as server's state...");
                threads.stopThread(ThreadNames.STATE_MACHINE_THREAD);
                state.close();
                state.clear();
                snapshot.dump(statePath());
                state.recover(statePath(), properties);
                threads.startThread(ThreadNames.STATE_MACHINE_THREAD);
                logger.info("Install snapshot successfully!");
            }
        }
    }

    /**
     * This method will be invoked when metric
     */
    protected void onPrintMetric() {}
    public static class Config {
        public final static int DEFAULT_SNAPSHOT_INTERVAL_SEC = 0;
        public final static long DEFAULT_RPC_TIMEOUT_MS = 1000L;
        public final static long DEFAULT_FLUSH_INTERVAL_MS = 50L;
        public final static int DEFAULT_GET_STATE_BATCH_SIZE = 1024 * 1024;
        public final static boolean DEFAULT_ENABLE_METRIC = false;
        public final static int DEFAULT_PRINT_METRIC_INTERVAL_SEC = 0;
        public final static int DEFAULT_JOURNAL_RETENTION_MIN = 0;

        public final static String SNAPSHOT_INTERVAL_SEC_KEY = "snapshot_interval_sec";
        public final static String RPC_TIMEOUT_MS_KEY = "rpc_timeout_ms";
        public final static String FLUSH_INTERVAL_MS_KEY = "flush_interval_ms";
        public final static String WORKING_DIR_KEY = "working_dir";
        public final static String GET_STATE_BATCH_SIZE_KEY = "get_state_batch_size";
        public final static String ENABLE_METRIC_KEY = "enable_metric";
        public final static String PRINT_METRIC_INTERVAL_SEC_KEY = "print_metric_interval_sec";
        public final static String JOURNAL_RETENTION_MIN_KEY = "journal_retention_min";

        private int snapshotIntervalSec = DEFAULT_SNAPSHOT_INTERVAL_SEC;
        private long rpcTimeoutMs = DEFAULT_RPC_TIMEOUT_MS;
        private long flushIntervalMs = DEFAULT_FLUSH_INTERVAL_MS;
        private Path workingDir = Paths.get(System.getProperty("user.dir")).resolve("journalkeeper");
        private int getStateBatchSize = DEFAULT_GET_STATE_BATCH_SIZE;
        private boolean enableMetric = DEFAULT_ENABLE_METRIC;
        private int printMetricIntervalSec = DEFAULT_PRINT_METRIC_INTERVAL_SEC;
        private int journalRetentionMin = DEFAULT_JOURNAL_RETENTION_MIN;

        int getSnapshotIntervalSec() {
            return snapshotIntervalSec;
        }

        void setSnapshotIntervalSec(int snapshotIntervalSec) {
            this.snapshotIntervalSec = snapshotIntervalSec;
        }

        long getRpcTimeoutMs() {
            return rpcTimeoutMs;
        }

        void setRpcTimeoutMs(long rpcTimeoutMs) {
            this.rpcTimeoutMs = rpcTimeoutMs;
        }

        public long getFlushIntervalMs() {
            return flushIntervalMs;
        }

        public void setFlushIntervalMs(long flushIntervalMs) {
            this.flushIntervalMs = flushIntervalMs;
        }

        public Path getWorkingDir() {
            return workingDir;
        }

        public void setWorkingDir(Path workingDir) {
            this.workingDir = workingDir;
        }

        public int getGetStateBatchSize() {
            return getStateBatchSize;
        }

        public void setGetStateBatchSize(int getStateBatchSize) {
            this.getStateBatchSize = getStateBatchSize;
        }

        public boolean isEnableMetric() {
            return enableMetric;
        }

        public void setEnableMetric(boolean enableMetric) {
            this.enableMetric = enableMetric;
        }

        public int getPrintMetricIntervalSec() {
            return printMetricIntervalSec;
        }

        public void setPrintMetricIntervalSec(int printMetricIntervalSec) {
            this.printMetricIntervalSec = printMetricIntervalSec;
        }

        public int getJournalRetentionMin() {
            return journalRetentionMin;
        }

        public void setJournalRetentionMin(int journalRetentionMin) {
            this.journalRetentionMin = journalRetentionMin;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy