org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-runtime Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint;

import org.apache.flink.core.io.SimpleVersionedSerializer;

import javax.annotation.Nullable;

import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;

/**
 * The interface for hooks that can be called by the checkpoint coordinator when triggering or
 * restoring a checkpoint. Such a hook is useful for example when preparing external systems for
 * taking or restoring checkpoints.
 *
 * The {@link #triggerCheckpoint(long, long, Executor)} method (called when triggering a
 * checkpoint) can return a result (via a future) that will be stored as part of the checkpoint
 * metadata. When restoring a checkpoint, that stored result will be given to the {@link
 * #restoreCheckpoint(long, Object)} method. The hook's {@link #getIdentifier() identifier} is used
 * to map data to hook in the presence of multiple hooks, and when resuming a savepoint that was
 * potentially created by a different job. The identifier has a similar role as for example the
 * operator UID in the streaming API.
 *
 * 
It is possible that a job fails (and is subsequently restarted) before any checkpoints were
 * successful. In that situation, the checkpoint coordination calls {@link #reset()} to give the
 * hook an opportunity to, for example, reset an external system to initial conditions.
 *
 * 
The MasterTriggerRestoreHook is defined when creating the streaming dataflow graph. It is
 * attached to the job graph, which gets sent to the cluster for execution. To avoid having to make
 * the hook itself serializable, these hooks are attached to the job graph via a {@link
 * MasterTriggerRestoreHook.Factory}.
 *
 * @param  The type of the data produced by the hook and stored as part of the checkpoint
 *     metadata. If the hook never stores any data, this can be typed to {@code Void}.
 */
public interface MasterTriggerRestoreHook {

    /**
     * Gets the identifier of this hook. The identifier is used to identify a specific hook in the
     * presence of multiple hooks and to give it the correct checkpointed data upon checkpoint
     * restoration.
     *
     * 
The identifier should be unique between different hooks of a job, but
     * deterministic/constant so that upon resuming a savepoint, the hook will get the correct data.
     * For example, if the hook calls into another storage system and persists namespace/schema
     * specific information, then the name of the storage system, together with the namespace/schema
     * name could be an appropriate identifier.
     *
     * 
When multiple hooks of the same name are created and attached to a job graph, only the
     * first one is actually used. This can be exploited to deduplicate hooks that would do the same
     * thing.
     *
     * @return The identifier of the hook.
     */
    String getIdentifier();

    /**
     * This method is called by the checkpoint coordinator to reset the hook when execution is
     * restarted in the absence of any checkpoint state.
     *
     * @throws Exception Exceptions encountered when calling the hook will cause execution to fail.
     */
    default void reset() throws Exception {}

    /**
     * Tear-down method for the hook.
     *
     * @throws Exception Exceptions encountered when calling close will be logged.
     */
    default void close() throws Exception {}

    /**
     * This method is called by the checkpoint coordinator prior when triggering a checkpoint, prior
     * to sending the "trigger checkpoint" messages to the source tasks.
     *
     * 
If the hook implementation wants to store data as part of the checkpoint, it may return
     * that data via a future, otherwise it should return null. The data is stored as part of the
     * checkpoint metadata under the hooks identifier (see {@link #getIdentifier()}).
     *
     * 
If the action by this hook needs to be executed synchronously, then this method should
     * directly execute the action synchronously. The returned future (if any) would typically be a
     * completed future.
     *
     * 
If the action should be executed asynchronously and only needs to complete before the
     * checkpoint is considered completed, then the method may use the given executor to execute the
     * actual action and would signal its completion by completing the future. For hooks that do not
     * need to store data, the future would be completed with null.
     *
     * 
Please note that this method should be non-blocking. Any heavy operation like IO operation
     * should be executed asynchronously with given executor.
     *
     * @param checkpointId The ID (logical timestamp, monotonously increasing) of the checkpoint
     * @param timestamp The wall clock timestamp when the checkpoint was triggered, for info/logging
     *     purposes.
     * @param executor The executor for asynchronous actions
     * @return Optionally, a future that signals when the hook has completed and that contains data
     *     to be stored with the checkpoint.
     * @throws Exception Exceptions encountered when calling the hook will cause the checkpoint to
     *     abort.
     */
    @Nullable
    CompletableFuture triggerCheckpoint(long checkpointId, long timestamp, Executor executor)
            throws Exception;

    /**
     * This method is called by the checkpoint coordinator prior to restoring the state of a
     * checkpoint. If the checkpoint did store data from this hook, that data will be passed to this
     * method.
     *
     * @param checkpointId The ID (logical timestamp) of the restored checkpoint
     * @param checkpointData The data originally stored in the checkpoint by this hook, possibly
     *     null.
     * @throws Exception Exceptions thrown while restoring the checkpoint will cause the restore
     *     operation to fail and to possibly fall back to another checkpoint.
     */
    void restoreCheckpoint(long checkpointId, @Nullable T checkpointData) throws Exception;

    /**
     * Creates a the serializer to (de)serializes the data stored by this hook. The serializer
     * serializes the result of the Future returned by the {@link #triggerCheckpoint(long, long,
     * Executor)} method, and deserializes the data stored in the checkpoint into the object passed
     * to the {@link #restoreCheckpoint(long, Object)} method.
     *
     * 
If the hook never returns any data to be stored, then this method may return null as the
     * serializer.
     *
     * @return The serializer to (de)serializes the data stored by this hook
     */
    @Nullable
    SimpleVersionedSerializer createCheckpointDataSerializer();

    // ------------------------------------------------------------------------
    //  factory
    // ------------------------------------------------------------------------

    /**
     * A factory to instantiate a {@code MasterTriggerRestoreHook}.
     *
     * The hooks are defined when creating the streaming dataflow graph and are attached to the
     * job graph, which gets sent to the cluster for execution. To avoid having to make the hook
     * implementation serializable, a serializable hook factory is actually attached to the job
     * graph instead of the hook implementation itself.
     */
    interface Factory extends java.io.Serializable {

        /** Instantiates the {@code MasterTriggerRestoreHook}. */
         MasterTriggerRestoreHook create();
    }
}