org.apache.flink.runtime.checkpoint.PendingCheckpoint Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.checkpoint;
import org.apache.flink.api.common.JobID;
import org.apache.flink.runtime.checkpoint.savepoint.Savepoint;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointStore;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointV2;
import org.apache.flink.runtime.concurrent.Future;
import org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.state.ChainedStateHandle;
import org.apache.flink.runtime.state.KeyedStateHandle;
import org.apache.flink.runtime.state.OperatorStateHandle;
import org.apache.flink.runtime.state.StateUtil;
import org.apache.flink.runtime.state.StreamStateHandle;
import org.apache.flink.runtime.state.filesystem.FileStateHandle;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledFuture;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* A pending checkpoint is a checkpoint that has been started, but has not been
* acknowledged by all tasks that need to acknowledge it. Once all tasks have
* acknowledged it, it becomes a {@link CompletedCheckpoint}.
*
* Note that the pending checkpoint, as well as the successful checkpoint keep the
* state handles always as serialized values, never as actual values.
*/
public class PendingCheckpoint {
/**
* Result of the {@link PendingCheckpoint#acknowledgedTasks} method.
*/
public enum TaskAcknowledgeResult {
SUCCESS, // successful acknowledge of the task
DUPLICATE, // acknowledge message is a duplicate
UNKNOWN, // unknown task acknowledged
DISCARDED // pending checkpoint has been discarded
}
// ------------------------------------------------------------------------
/** The PendingCheckpoint logs to the same logger as the CheckpointCoordinator */
private static final Logger LOG = LoggerFactory.getLogger(CheckpointCoordinator.class);
private final Object lock = new Object();
private final JobID jobId;
private final long checkpointId;
private final long checkpointTimestamp;
private final Map operatorStates;
private final Map notYetAcknowledgedTasks;
private final List masterState;
/** Set of acknowledged tasks */
private final Set acknowledgedTasks;
/** The checkpoint properties. If the checkpoint should be persisted
* externally, it happens in {@link #finalizeCheckpointExternalized()}. */
private final CheckpointProperties props;
/** Target directory to potentially persist checkpoint to; null
if none configured. */
private final String targetDirectory;
/** The promise to fulfill once the checkpoint has been completed. */
private final FlinkCompletableFuture onCompletionPromise;
/** The executor for potentially blocking I/O operations, like state disposal */
private final Executor executor;
private int numAcknowledgedTasks;
private boolean discarded;
/** Optional stats tracker callback. */
@Nullable
private PendingCheckpointStats statsCallback;
private volatile ScheduledFuture> cancellerHandle;
// --------------------------------------------------------------------------------------------
public PendingCheckpoint(
JobID jobId,
long checkpointId,
long checkpointTimestamp,
Map verticesToConfirm,
CheckpointProperties props,
String targetDirectory,
Executor executor) {
// Sanity check
if (props.externalizeCheckpoint() && targetDirectory == null) {
throw new NullPointerException("No target directory specified to persist checkpoint to.");
}
checkArgument(verticesToConfirm.size() > 0,
"Checkpoint needs at least one vertex that commits the checkpoint");
this.jobId = checkNotNull(jobId);
this.checkpointId = checkpointId;
this.checkpointTimestamp = checkpointTimestamp;
this.notYetAcknowledgedTasks = checkNotNull(verticesToConfirm);
this.props = checkNotNull(props);
this.targetDirectory = targetDirectory;
this.executor = Preconditions.checkNotNull(executor);
this.operatorStates = new HashMap<>();
this.masterState = new ArrayList<>();
this.acknowledgedTasks = new HashSet<>(verticesToConfirm.size());
this.onCompletionPromise = new FlinkCompletableFuture<>();
}
// --------------------------------------------------------------------------------------------
// ------------------------------------------------------------------------
// Properties
// ------------------------------------------------------------------------
public JobID getJobId() {
return jobId;
}
public long getCheckpointId() {
return checkpointId;
}
public long getCheckpointTimestamp() {
return checkpointTimestamp;
}
public int getNumberOfNonAcknowledgedTasks() {
return notYetAcknowledgedTasks.size();
}
public int getNumberOfAcknowledgedTasks() {
return numAcknowledgedTasks;
}
public Map getOperatorStates() {
return operatorStates;
}
public boolean isFullyAcknowledged() {
return this.notYetAcknowledgedTasks.isEmpty() && !discarded;
}
public boolean isDiscarded() {
return discarded;
}
/**
* Checks whether this checkpoint can be subsumed or whether it should always continue, regardless
* of newer checkpoints in progress.
*
* @return True if the checkpoint can be subsumed, false otherwise.
*/
public boolean canBeSubsumed() {
// If the checkpoint is forced, it cannot be subsumed.
return !props.forceCheckpoint();
}
CheckpointProperties getProps() {
return props;
}
String getTargetDirectory() {
return targetDirectory;
}
/**
* Sets the callback for tracking this pending checkpoint.
*
* @param trackerCallback Callback for collecting subtask stats.
*/
void setStatsCallback(@Nullable PendingCheckpointStats trackerCallback) {
this.statsCallback = trackerCallback;
}
/**
* Sets the handle for the canceller to this pending checkpoint. This method fails
* with an exception if a handle has already been set.
*
* @return true, if the handle was set, false, if the checkpoint is already disposed;
*/
public boolean setCancellerHandle(ScheduledFuture> cancellerHandle) {
synchronized (lock) {
if (this.cancellerHandle == null) {
if (!discarded) {
this.cancellerHandle = cancellerHandle;
return true;
} else {
return false;
}
}
else {
throw new IllegalStateException("A canceller handle was already set");
}
}
}
// ------------------------------------------------------------------------
// Progress and Completion
// ------------------------------------------------------------------------
/**
* Returns the completion future.
*
* @return A future to the completed checkpoint
*/
public Future getCompletionFuture() {
return onCompletionPromise;
}
public CompletedCheckpoint finalizeCheckpointExternalized() throws IOException {
synchronized (lock) {
checkState(isFullyAcknowledged(), "Pending checkpoint has not been fully acknowledged yet.");
// make sure we fulfill the promise with an exception if something fails
try {
// externalize the metadata
final Savepoint savepoint = new SavepointV2(checkpointId, operatorStates.values(), masterState);
// TEMP FIX - The savepoint store is strictly typed to file systems currently
// but the checkpoints think more generic. we need to work with file handles
// here until the savepoint serializer accepts a generic stream factory
// We have this branch here, because savepoints and externalized checkpoints
// currently behave differently.
// Savepoints:
// - Metadata file in unique directory
// - External pointer can be the directory
// Externalized checkpoints:
// - Multiple metadata files per directory possible (need to be unique)
// - External pointer needs to be the file itself
//
// This should be unified as part of the JobManager metadata stream factories.
if (props.isSavepoint()) {
final FileStateHandle metadataHandle = SavepointStore.storeSavepointToHandle(targetDirectory, savepoint);
final String externalPointer = metadataHandle.getFilePath().getParent().toString();
return finalizeInternal(metadataHandle, externalPointer);
} else {
final FileStateHandle metadataHandle = SavepointStore.storeExternalizedCheckpointToHandle(targetDirectory, savepoint);
final String externalPointer = metadataHandle.getFilePath().toString();
return finalizeInternal(metadataHandle, externalPointer);
}
}
catch (Throwable t) {
onCompletionPromise.completeExceptionally(t);
ExceptionUtils.rethrowIOException(t);
return null; // silence the compiler
}
}
}
public CompletedCheckpoint finalizeCheckpointNonExternalized() {
synchronized (lock) {
checkState(isFullyAcknowledged(), "Pending checkpoint has not been fully acknowledged yet.");
// make sure we fulfill the promise with an exception if something fails
try {
// finalize without external metadata
return finalizeInternal(null, null);
}
catch (Throwable t) {
onCompletionPromise.completeExceptionally(t);
ExceptionUtils.rethrow(t);
return null; // silence the compiler
}
}
}
@GuardedBy("lock")
private CompletedCheckpoint finalizeInternal(
@Nullable StreamStateHandle externalMetadata,
@Nullable String externalPointer) {
assert(Thread.holdsLock(lock));
CompletedCheckpoint completed = new CompletedCheckpoint(
jobId,
checkpointId,
checkpointTimestamp,
System.currentTimeMillis(),
operatorStates,
masterState,
props,
externalMetadata,
externalPointer);
onCompletionPromise.complete(completed);
// to prevent null-pointers from concurrent modification, copy reference onto stack
PendingCheckpointStats statsCallback = this.statsCallback;
if (statsCallback != null) {
// Finalize the statsCallback and give the completed checkpoint a
// callback for discards.
CompletedCheckpointStats.DiscardCallback discardCallback =
statsCallback.reportCompletedCheckpoint(externalPointer);
completed.setDiscardCallback(discardCallback);
}
// mark this pending checkpoint as disposed, but do NOT drop the state
dispose(false);
return completed;
}
/**
* Acknowledges the task with the given execution attempt id and the given subtask state.
*
* @param executionAttemptId of the acknowledged task
* @param subtaskState of the acknowledged task
* @param metrics Checkpoint metrics for the stats
* @return TaskAcknowledgeResult of the operation
*/
public TaskAcknowledgeResult acknowledgeTask(
ExecutionAttemptID executionAttemptId,
SubtaskState subtaskState,
CheckpointMetrics metrics) {
synchronized (lock) {
if (discarded) {
return TaskAcknowledgeResult.DISCARDED;
}
final ExecutionVertex vertex = notYetAcknowledgedTasks.remove(executionAttemptId);
if (vertex == null) {
if (acknowledgedTasks.contains(executionAttemptId)) {
return TaskAcknowledgeResult.DUPLICATE;
} else {
return TaskAcknowledgeResult.UNKNOWN;
}
} else {
acknowledgedTasks.add(executionAttemptId);
}
List operatorIDs = vertex.getJobVertex().getOperatorIDs();
int subtaskIndex = vertex.getParallelSubtaskIndex();
long ackTimestamp = System.currentTimeMillis();
long stateSize = 0;
if (subtaskState != null) {
stateSize = subtaskState.getStateSize();
@SuppressWarnings("deprecation")
ChainedStateHandle nonPartitionedState =
subtaskState.getLegacyOperatorState();
ChainedStateHandle partitioneableState =
subtaskState.getManagedOperatorState();
ChainedStateHandle rawOperatorState =
subtaskState.getRawOperatorState();
// break task state apart into separate operator states
for (int x = 0; x < operatorIDs.size(); x++) {
OperatorID operatorID = operatorIDs.get(x);
OperatorState operatorState = operatorStates.get(operatorID);
if (operatorState == null) {
operatorState = new OperatorState(
operatorID,
vertex.getTotalNumberOfParallelSubtasks(),
vertex.getMaxParallelism());
operatorStates.put(operatorID, operatorState);
}
KeyedStateHandle managedKeyedState = null;
KeyedStateHandle rawKeyedState = null;
// only the head operator retains the keyed state
if (x == operatorIDs.size() - 1) {
managedKeyedState = subtaskState.getManagedKeyedState();
rawKeyedState = subtaskState.getRawKeyedState();
}
OperatorSubtaskState operatorSubtaskState = new OperatorSubtaskState(
nonPartitionedState != null ? nonPartitionedState.get(x) : null,
partitioneableState != null ? partitioneableState.get(x) : null,
rawOperatorState != null ? rawOperatorState.get(x) : null,
managedKeyedState,
rawKeyedState);
operatorState.putState(subtaskIndex, operatorSubtaskState);
}
}
++numAcknowledgedTasks;
// publish the checkpoint statistics
// to prevent null-pointers from concurrent modification, copy reference onto stack
final PendingCheckpointStats statsCallback = this.statsCallback;
if (statsCallback != null) {
// Do this in millis because the web frontend works with them
long alignmentDurationMillis = metrics.getAlignmentDurationNanos() / 1_000_000;
SubtaskStateStats subtaskStateStats = new SubtaskStateStats(
subtaskIndex,
ackTimestamp,
stateSize,
metrics.getSyncDurationMillis(),
metrics.getAsyncDurationMillis(),
metrics.getBytesBufferedInAlignment(),
alignmentDurationMillis);
statsCallback.reportSubtaskStats(vertex.getJobvertexId(), subtaskStateStats);
}
return TaskAcknowledgeResult.SUCCESS;
}
}
/**
* Adds a master state (state generated on the checkpoint coordinator) to
* the pending checkpoint.
*
* @param state The state to add
*/
public void addMasterState(MasterState state) {
checkNotNull(state);
synchronized (lock) {
if (!discarded) {
masterState.add(state);
}
}
}
// ------------------------------------------------------------------------
// Cancellation
// ------------------------------------------------------------------------
/**
* Aborts a checkpoint because it expired (took too long).
*/
public void abortExpired() {
try {
Exception cause = new Exception("Checkpoint expired before completing");
onCompletionPromise.completeExceptionally(cause);
reportFailedCheckpoint(cause);
} finally {
dispose(true);
}
}
/**
* Aborts the pending checkpoint because a newer completed checkpoint subsumed it.
*/
public void abortSubsumed() {
try {
Exception cause = new Exception("Checkpoints has been subsumed");
onCompletionPromise.completeExceptionally(cause);
reportFailedCheckpoint(cause);
if (props.forceCheckpoint()) {
throw new IllegalStateException("Bug: forced checkpoints must never be subsumed");
}
} finally {
dispose(true);
}
}
public void abortDeclined() {
try {
Exception cause = new Exception("Checkpoint was declined (tasks not ready)");
onCompletionPromise.completeExceptionally(cause);
reportFailedCheckpoint(cause);
} finally {
dispose(true);
}
}
/**
* Aborts the pending checkpoint due to an error.
* @param cause The error's exception.
*/
public void abortError(Throwable cause) {
try {
Exception failure = new Exception("Checkpoint failed: " + cause.getMessage(), cause);
onCompletionPromise.completeExceptionally(failure);
reportFailedCheckpoint(failure);
} finally {
dispose(true);
}
}
private void dispose(boolean releaseState) {
synchronized (lock) {
try {
numAcknowledgedTasks = -1;
if (!discarded && releaseState) {
executor.execute(new Runnable() {
@Override
public void run() {
// discard the private states.
// unregistered shared states are still considered private at this point.
try {
StateUtil.bestEffortDiscardAllStateObjects(operatorStates.values());
} catch (Throwable t) {
LOG.warn("Could not properly dispose the private states in the pending checkpoint {} of job {}.",
checkpointId, jobId, t);
} finally {
operatorStates.clear();
}
}
});
}
} finally {
discarded = true;
notYetAcknowledgedTasks.clear();
acknowledgedTasks.clear();
cancelCanceller();
}
}
}
private void cancelCanceller() {
try {
final ScheduledFuture> canceller = this.cancellerHandle;
if (canceller != null) {
canceller.cancel(false);
}
}
catch (Exception e) {
// this code should not throw exceptions
LOG.warn("Error while cancelling checkpoint timeout task", e);
}
}
/**
* Reports a failed checkpoint with the given optional cause.
*
* @param cause The failure cause or null
.
*/
private void reportFailedCheckpoint(Exception cause) {
// to prevent null-pointers from concurrent modification, copy reference onto stack
final PendingCheckpointStats statsCallback = this.statsCallback;
if (statsCallback != null) {
long failureTimestamp = System.currentTimeMillis();
statsCallback.reportFailedCheckpoint(failureTimestamp, cause);
}
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
@Override
public String toString() {
return String.format("Pending Checkpoint %d @ %d - confirmed=%d, pending=%d",
checkpointId, checkpointTimestamp, getNumberOfAcknowledgedTasks(), getNumberOfNonAcknowledgedTasks());
}
}