Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH under
* one or more contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright ownership.
* Licensed under the Camunda License 1.0. You may not use this file
* except in compliance with the Camunda License 1.0.
*/
package io.camunda.zeebe.broker.system.partitions.impl;
import io.atomix.raft.RaftApplicationEntryCommittedPositionListener;
import io.camunda.zeebe.broker.system.partitions.NoEntryAtSnapshotPosition;
import io.camunda.zeebe.broker.system.partitions.StateController;
import io.camunda.zeebe.logstreams.impl.Loggers;
import io.camunda.zeebe.scheduler.Actor;
import io.camunda.zeebe.scheduler.future.ActorFuture;
import io.camunda.zeebe.scheduler.future.CompletableActorFuture;
import io.camunda.zeebe.snapshots.PersistedSnapshot;
import io.camunda.zeebe.snapshots.SnapshotException;
import io.camunda.zeebe.snapshots.SnapshotException.SnapshotNotFoundException;
import io.camunda.zeebe.snapshots.TransientSnapshot;
import io.camunda.zeebe.stream.impl.StreamProcessor;
import io.camunda.zeebe.stream.impl.StreamProcessorMode;
import io.camunda.zeebe.util.health.FailureListener;
import io.camunda.zeebe.util.health.HealthMonitorable;
import io.camunda.zeebe.util.health.HealthReport;
import java.time.Duration;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import org.slf4j.Logger;
public final class AsyncSnapshotDirector extends Actor
implements RaftApplicationEntryCommittedPositionListener, HealthMonitorable {
public static final Duration MINIMUM_SNAPSHOT_PERIOD = Duration.ofMinutes(1);
private static final Logger LOG = Loggers.SNAPSHOT_LOGGER;
private static final String LOG_MSG_WAIT_UNTIL_COMMITTED =
"Finished taking temporary snapshot, need to wait until last written event position {} is committed, current commit position is {}. After that snapshot will be committed.";
private static final String ERROR_MSG_ON_RESOLVE_PROCESSED_POS =
"Unexpected error in resolving last processed position.";
private static final String ERROR_MSG_ON_RESOLVE_WRITTEN_POS =
"Unexpected error in resolving last written position.";
private static final String ERROR_MSG_MOVE_SNAPSHOT =
"Unexpected exception occurred on moving valid snapshot.";
private final StateController stateController;
private final Duration snapshotRate;
private final String processorName;
private final StreamProcessor streamProcessor;
private final String actorName;
private final StreamProcessorMode streamProcessorMode;
private final Callable> flushLog;
private final Set listeners = new HashSet<>();
private final int partitionId;
private final TreeMap> commitAwaiters = new TreeMap<>();
private CompletableActorFuture ongoingSnapshotFuture;
@SuppressWarnings("java:S3077") // allow volatile here, health is immutable
private volatile HealthReport healthReport = HealthReport.healthy(this);
private long commitPosition;
private AsyncSnapshotDirector(
final int partitionId,
final StreamProcessor streamProcessor,
final StateController stateController,
final Duration snapshotRate,
final StreamProcessorMode streamProcessorMode,
final Callable> flushLog) {
this.streamProcessor = streamProcessor;
this.stateController = stateController;
processorName = streamProcessor.getName();
this.snapshotRate = snapshotRate;
this.partitionId = partitionId;
actorName = buildActorName("SnapshotDirector", this.partitionId);
this.streamProcessorMode = streamProcessorMode;
this.flushLog = flushLog;
}
@Override
protected Map createContext() {
final var context = super.createContext();
context.put(ACTOR_PROP_PARTITION_ID, Integer.toString(partitionId));
return context;
}
@Override
public String getName() {
return actorName;
}
@Override
protected void onActorStarting() {
final var firstSnapshotTime =
RandomDuration.getRandomDurationMinuteBased(MINIMUM_SNAPSHOT_PERIOD, snapshotRate);
actor.schedule(firstSnapshotTime, this::scheduleSnapshotOnRate);
}
@Override
public ActorFuture closeAsync() {
if (actor.isClosed()) {
return CompletableActorFuture.completed(null);
}
return super.closeAsync();
}
@Override
protected void handleFailure(final Throwable failure) {
LOG.error(
"No snapshot was taken due to failure in '{}'. Will try to take snapshot after snapshot period {}.",
actorName,
snapshotRate,
failure);
resetStateOnFailure(failure);
healthReport = HealthReport.unhealthy(this).withIssue(failure);
for (final var listener : listeners) {
listener.onFailure(healthReport);
}
}
/**
* Create an AsyncSnapshotDirector that can take snapshot when the StreamProcessor is in
* continuous replay mode.
*
* @param nodeId id of this broker
* @param partitionId partition id
* @param streamProcessor stream processor for the partition
* @param stateController state controller that manages state
* @param snapshotRate rate at which the snapshot is taken
* @return snapshot director
*/
public static AsyncSnapshotDirector ofReplayMode(
final int nodeId,
final int partitionId,
final StreamProcessor streamProcessor,
final StateController stateController,
final Duration snapshotRate,
final Callable> flushLog) {
return new AsyncSnapshotDirector(
partitionId,
streamProcessor,
stateController,
snapshotRate,
StreamProcessorMode.REPLAY,
flushLog);
}
/**
* Create an AsyncSnapshotDirector that can take snapshot when the StreamProcessor is in
* processing mode
*
* @param nodeId id of this broker
* @param partitionId partition id
* @param streamProcessor stream processor for the partition
* @param stateController state controller that manages state
* @param snapshotRate rate at which the snapshot is taken
* @return snapshot director
*/
public static AsyncSnapshotDirector ofProcessingMode(
final int nodeId,
final int partitionId,
final StreamProcessor streamProcessor,
final StateController stateController,
final Duration snapshotRate,
final Callable> flushLog) {
return new AsyncSnapshotDirector(
partitionId,
streamProcessor,
stateController,
snapshotRate,
StreamProcessorMode.PROCESSING,
flushLog);
}
private void scheduleSnapshotOnRate() {
actor.runAtFixedRate(snapshotRate, this::trySnapshot);
trySnapshot();
}
/**
* Directly take a snapshot, independently of the scheduled snapshots.
*
* @return A future that is completed successfully when the snapshot was taken. If the snapshot
* was skipped, the future is also completed successfully but with a null.
*/
public CompletableActorFuture forceSnapshot() {
final var newSnapshotFuture = new CompletableActorFuture();
actor.call(() -> trySnapshot().onComplete(newSnapshotFuture));
return newSnapshotFuture;
}
@Override
public HealthReport getHealthReport() {
return healthReport;
}
@Override
public void addFailureListener(final FailureListener listener) {
actor.run(() -> listeners.add(listener));
}
@Override
public void removeFailureListener(final FailureListener failureListener) {
actor.run(() -> listeners.remove(failureListener));
}
// Try taking a snapshot. Snapshot may be skipped if there is already an ongoing snapshot or if
// there is nothing to snapshot. Future is completed with null if the snapshot is skipped.
// Otherwise, future is completed with the committed snapshot, or completed exceptionally if
// snapshotting fails.
private ActorFuture trySnapshot() {
if (ongoingSnapshotFuture != null) {
LOG.debug("Already taking snapshot, skipping this request for a new snapshot");
return CompletableActorFuture.completed(null);
}
final CompletableActorFuture snapshotFuture = new CompletableActorFuture<>();
ongoingSnapshotFuture = snapshotFuture;
final InProgressSnapshot inProgressSnapshot = new InProgressSnapshot();
streamProcessor
.getLastProcessedPositionAsync()
.onComplete(
(position, error) -> {
if (error != null) {
LOG.error(ERROR_MSG_ON_RESOLVE_PROCESSED_POS, error);
snapshotFuture.completeExceptionally(error);
} else if (position == StreamProcessor.UNSET_POSITION) {
LOG.debug(
"We will skip taking this snapshot, because we haven't processed anything yet.");
snapshotFuture.complete(null);
} else {
inProgressSnapshot.lowerBoundSnapshotPosition = position;
snapshot(inProgressSnapshot).onComplete(snapshotFuture);
}
});
snapshotFuture.onComplete(
(snapshot, snapshotError) -> {
if (snapshotError != null && inProgressSnapshot.pendingSnapshot != null) {
inProgressSnapshot.pendingSnapshot.abort();
}
// We allow only one ongoing snapshot. Reset the future to indicate there is
// no ongoing snapshot.
ongoingSnapshotFuture = null;
});
return snapshotFuture;
}
private ActorFuture snapshot(final InProgressSnapshot inProgressSnapshot) {
final ActorFuture takeTransientSnapshotFuture = actor.createFuture();
final ActorFuture getLastWrittenPositionFuture = actor.createFuture();
final ActorFuture lastWrittenPositionCommittedFuture = actor.createFuture();
final ActorFuture journalFlushFuture = actor.createFuture();
final ActorFuture snapshotPersistedFuture = actor.createFuture();
takeTransientSnapshot(inProgressSnapshot).onComplete(takeTransientSnapshotFuture);
takeTransientSnapshotFuture.onComplete(
proceed(
getLastWrittenPositionFuture::completeExceptionally,
() ->
getLastWrittenPosition(inProgressSnapshot)
.onComplete(getLastWrittenPositionFuture)));
getLastWrittenPositionFuture.onComplete(
proceed(
lastWrittenPositionCommittedFuture::completeExceptionally,
() ->
waitUntilLastWrittenPositionIsCommitted(inProgressSnapshot)
.onComplete(lastWrittenPositionCommittedFuture)));
lastWrittenPositionCommittedFuture.onComplete(
proceed(
journalFlushFuture::completeExceptionally,
() -> flushJournal().onComplete(journalFlushFuture)));
journalFlushFuture.onComplete(
proceed(
snapshotPersistedFuture::completeExceptionally,
() -> persistSnapshot(inProgressSnapshot).onComplete(snapshotPersistedFuture)));
return snapshotPersistedFuture;
}
private ActorFuture flushJournal() {
final CompletableActorFuture future = new CompletableActorFuture<>();
try {
flushLog
.call()
.whenComplete(
(ignore, error) -> {
if (error != null) {
LOG.warn("Failed to flush journal before committing snapshot", error);
future.completeExceptionally(error);
} else {
future.complete(null);
}
});
} catch (final Exception e) {
LOG.warn("Failed to flush journal before committing snapshot", e);
future.completeExceptionally(e);
}
return future;
}
private ActorFuture persistSnapshot(
final InProgressSnapshot inProgressSnapshot) {
final var snapshotPersisted =
inProgressSnapshot
.pendingSnapshot
.withLastFollowupEventPosition(inProgressSnapshot.lastWrittenPosition)
.persist();
snapshotPersisted.onComplete(
(snapshot, persistError) -> {
if (persistError != null) {
if (persistError instanceof SnapshotNotFoundException) {
LOG.warn(
"Failed to persist transient snapshot {}. Nothing to worry if a newer snapshot exists.",
inProgressSnapshot.pendingSnapshot,
persistError);
} else {
LOG.error(ERROR_MSG_MOVE_SNAPSHOT, persistError);
}
}
});
return snapshotPersisted;
}
private ActorFuture getLastWrittenPosition(final InProgressSnapshot inProgressSnapshot) {
final ActorFuture lastWrittenPositionReceived = new CompletableActorFuture<>();
streamProcessor
.getLastWrittenPositionAsync()
.onComplete(
(position, error) -> {
if (error != null) {
LOG.error(ERROR_MSG_ON_RESOLVE_WRITTEN_POS, error);
lastWrittenPositionReceived.completeExceptionally(error);
} else {
inProgressSnapshot.lastWrittenPosition = position;
lastWrittenPositionReceived.complete(null);
}
});
return lastWrittenPositionReceived;
}
private ActorFuture waitUntilLastWrittenPositionIsCommitted(
final InProgressSnapshot inProgressSnapshot) {
if (streamProcessorMode == StreamProcessorMode.REPLAY
|| commitPosition >= inProgressSnapshot.lastWrittenPosition) {
return CompletableActorFuture.completed(null);
} else {
LOG.info(
LOG_MSG_WAIT_UNTIL_COMMITTED, inProgressSnapshot.lastWrittenPosition, commitPosition);
return commitAwaiters.computeIfAbsent(
inProgressSnapshot.lastWrittenPosition, k -> new CompletableActorFuture<>());
}
}
private ActorFuture takeTransientSnapshot(final InProgressSnapshot inProgressSnapshot) {
final ActorFuture snapshotTaken = new CompletableActorFuture<>();
stateController
.takeTransientSnapshot(inProgressSnapshot.lowerBoundSnapshotPosition)
.onComplete(
(snapshot, error) -> {
if (error != null) {
logSnapshotTakenError(error);
snapshotTaken.completeExceptionally(error);
} else {
inProgressSnapshot.pendingSnapshot = snapshot;
snapshotTaken.complete(null);
onRecovered();
}
});
return snapshotTaken;
}
void logSnapshotTakenError(final Throwable snapshotTakenError) {
if (snapshotTakenError instanceof SnapshotException.SnapshotAlreadyExistsException) {
LOG.debug("Did not take a snapshot. {}", snapshotTakenError.getMessage());
} else if (snapshotTakenError instanceof NoEntryAtSnapshotPosition
&& streamProcessorMode == StreamProcessorMode.REPLAY) {
LOG.debug(
"Did not take a snapshot: {}. Most likely this partition has not received the entry yet. Will retry in {}",
snapshotTakenError.getMessage(),
snapshotRate);
} else {
LOG.error("Failed to take a snapshot for {}", processorName, snapshotTakenError);
}
}
private void onRecovered() {
if (!healthReport.isHealthy()) {
healthReport = HealthReport.healthy(this);
listeners.forEach(FailureListener::onRecovered);
}
}
@Override
public void onCommit(final long committedPosition) {
newPositionCommitted(committedPosition);
}
public void newPositionCommitted(final long currentCommitPosition) {
actor.run(
() -> {
commitPosition = currentCommitPosition;
final var futuresToComplete = commitAwaiters.headMap(commitPosition, true);
futuresToComplete.forEach((k, f) -> f.complete(null));
futuresToComplete.clear();
});
}
private void resetStateOnFailure(final Throwable failure) {
if (ongoingSnapshotFuture != null && !ongoingSnapshotFuture.isDone()) {
ongoingSnapshotFuture.completeExceptionally(failure);
}
ongoingSnapshotFuture = null;
}
private BiConsumer proceed(
final Consumer onError, final Runnable nextStep) {
return (ignore, error) -> {
if (error != null) {
onError.accept(error);
} else {
nextStep.run();
}
};
}
private static final class InProgressSnapshot {
private long lastWrittenPosition;
private TransientSnapshot pendingSnapshot;
private long lowerBoundSnapshotPosition;
}
}