All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.checkpoint.CheckpointCoordinator Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint;

import akka.actor.ActorSystem;
import akka.actor.PoisonPill;
import akka.actor.Props;
import org.apache.flink.api.common.JobID;
import org.apache.flink.runtime.checkpoint.stats.CheckpointStatsTracker;
import org.apache.flink.runtime.checkpoint.stats.DisabledCheckpointStatsTracker;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.Execution;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.runtime.executiongraph.ExecutionJobVertex;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.instance.AkkaActorGateway;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobmanager.RecoveryMode;
import org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint;
import org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint;
import org.apache.flink.runtime.messages.checkpoint.NotifyCheckpointComplete;
import org.apache.flink.runtime.messages.checkpoint.TriggerCheckpoint;
import org.apache.flink.runtime.state.StateHandle;
import org.apache.flink.util.SerializedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;

import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * The checkpoint coordinator coordinates the distributed snapshots of operators and state.
 * It triggers the checkpoint by sending the messages to the relevant tasks and collects the
 * checkpoint acknowledgements. It also collects and maintains the overview of the state handles
 * reported by the tasks that acknowledge the checkpoint.
 *
 * 

Depending on the configured {@link RecoveryMode}, the behaviour of the {@link * CompletedCheckpointStore} and {@link CheckpointIDCounter} change. The default standalone * implementations don't support any recovery. */ public class CheckpointCoordinator { static final Logger LOG = LoggerFactory.getLogger(CheckpointCoordinator.class); /** The number of recent checkpoints whose IDs are remembered */ private static final int NUM_GHOST_CHECKPOINT_IDS = 16; /** Coordinator-wide lock to safeguard the checkpoint updates */ protected final Object lock = new Object(); /** The job whose checkpoint this coordinator coordinates */ private final JobID job; /** Tasks who need to be sent a message when a checkpoint is started */ private final ExecutionVertex[] tasksToTrigger; /** Tasks who need to acknowledge a checkpoint before it succeeds */ private final ExecutionVertex[] tasksToWaitFor; /** Tasks who need to be sent a message when a checkpoint is confirmed */ private final ExecutionVertex[] tasksToCommitTo; /** Map from checkpoint ID to the pending checkpoint */ private final Map pendingCheckpoints; /** Completed checkpoints. Implementations can be blocking. Make sure calls to methods * accessing this don't block the job manager actor and run asynchronously. */ private final CompletedCheckpointStore completedCheckpointStore; /** A list of recent checkpoint IDs, to identify late messages (vs invalid ones) */ private final ArrayDeque recentPendingCheckpoints; /** Checkpoint ID counter to ensure ascending IDs. In case of job manager failures, these * need to be ascending across job managers. */ protected final CheckpointIDCounter checkpointIdCounter; /** Class loader used to deserialize the state handles (as they may be user-defined) */ private final ClassLoader userClassLoader; /** The base checkpoint interval. Actual trigger time may be affected by the * max concurrent checkpoints and minimum-pause values */ private final long baseInterval; /** The max time (in ms) that a checkpoint may take */ private final long checkpointTimeout; /** The min time(in ms) to delay after a checkpoint could be triggered. Allows to * enforce minimum processing time between checkpoint attempts */ private final long minPauseBetweenCheckpoints; /** The maximum number of checkpoints that may be in progress at the same time */ private final int maxConcurrentCheckpointAttempts; /** The timer that handles the checkpoint timeouts and triggers periodic checkpoints */ private final Timer timer; /** Actor that receives status updates from the execution graph this coordinator works for */ private ActorGateway jobStatusListener; /** The number of consecutive failed trigger attempts */ private int numUnsuccessfulCheckpointsTriggers; private ScheduledTrigger currentPeriodicTrigger; private long lastTriggeredCheckpoint; /** Flag whether a triggered checkpoint should immediately schedule the next checkpoint. * Non-volatile, because only accessed in synchronized scope */ private boolean periodicScheduling; /** Flag whether a trigger request could not be handled immediately. Non-volatile, because only * accessed in synchronized scope */ private boolean triggerRequestQueued; /** Flag marking the coordinator as shut down (not accepting any messages any more) */ private volatile boolean shutdown; /** Shutdown hook thread to clean up state handles. */ private final Thread shutdownHook; /** Helper for tracking checkpoint statistics */ private final CheckpointStatsTracker statsTracker; protected final int numberKeyGroups; // -------------------------------------------------------------------------------------------- public CheckpointCoordinator( JobID job, long baseInterval, long checkpointTimeout, int numberKeyGroups, ExecutionVertex[] tasksToTrigger, ExecutionVertex[] tasksToWaitFor, ExecutionVertex[] tasksToCommitTo, ClassLoader userClassLoader, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore completedCheckpointStore, RecoveryMode recoveryMode) throws Exception { this(job, baseInterval, checkpointTimeout, 0L, Integer.MAX_VALUE, numberKeyGroups, tasksToTrigger, tasksToWaitFor, tasksToCommitTo, userClassLoader, checkpointIDCounter, completedCheckpointStore, recoveryMode, new DisabledCheckpointStatsTracker()); } public CheckpointCoordinator( JobID job, long baseInterval, long checkpointTimeout, long minPauseBetweenCheckpoints, int maxConcurrentCheckpointAttempts, int numberKeyGroups, ExecutionVertex[] tasksToTrigger, ExecutionVertex[] tasksToWaitFor, ExecutionVertex[] tasksToCommitTo, ClassLoader userClassLoader, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore completedCheckpointStore, RecoveryMode recoveryMode, CheckpointStatsTracker statsTracker) throws Exception { // Sanity check checkArgument(baseInterval > 0, "Checkpoint timeout must be larger than zero"); checkArgument(checkpointTimeout >= 1, "Checkpoint timeout must be larger than zero"); checkArgument(minPauseBetweenCheckpoints >= 0, "minPauseBetweenCheckpoints must be >= 0"); checkArgument(maxConcurrentCheckpointAttempts >= 1, "maxConcurrentCheckpointAttempts must be >= 1"); this.job = checkNotNull(job); this.baseInterval = baseInterval; this.checkpointTimeout = checkpointTimeout; this.minPauseBetweenCheckpoints = minPauseBetweenCheckpoints; this.maxConcurrentCheckpointAttempts = maxConcurrentCheckpointAttempts; this.tasksToTrigger = checkNotNull(tasksToTrigger); this.tasksToWaitFor = checkNotNull(tasksToWaitFor); this.tasksToCommitTo = checkNotNull(tasksToCommitTo); this.pendingCheckpoints = new LinkedHashMap(); this.completedCheckpointStore = checkNotNull(completedCheckpointStore); this.recentPendingCheckpoints = new ArrayDeque(NUM_GHOST_CHECKPOINT_IDS); this.userClassLoader = userClassLoader; // Started with the periodic scheduler this.checkpointIdCounter = checkNotNull(checkpointIDCounter); this.timer = new Timer("Checkpoint Timer", true); this.statsTracker = checkNotNull(statsTracker); if (recoveryMode == RecoveryMode.STANDALONE) { // Add shutdown hook to clean up state handles when no checkpoint recovery is // possible. In case of another configured recovery mode, the checkpoints need to be // available for the standby job managers. this.shutdownHook = new Thread(new Runnable() { @Override public void run() { try { CheckpointCoordinator.this.shutdown(); } catch (Throwable t) { LOG.error("Error during shutdown of checkpoint coordinator via " + "JVM shutdown hook: " + t.getMessage(), t); } } }); try { // Add JVM shutdown hook to call shutdown of service Runtime.getRuntime().addShutdownHook(shutdownHook); } catch (IllegalStateException ignored) { // JVM is already shutting down. No need to do anything. } catch (Throwable t) { LOG.error("Cannot register checkpoint coordinator shutdown hook.", t); } } else { this.shutdownHook = null; } this.numberKeyGroups = numberKeyGroups; } // -------------------------------------------------------------------------------------------- // Callbacks // -------------------------------------------------------------------------------------------- /** * Callback on shutdown of the coordinator. Called in lock scope. */ protected void onShutdown() { } /** * Callback on cancellation of a checkpoint. Called in lock scope. */ protected void onCancelCheckpoint(long canceledCheckpointId) { } /** * Callback on full acknowledgement of a checkpoint. Called in lock scope. */ protected void onFullyAcknowledgedCheckpoint(CompletedCheckpoint checkpoint) { } // -------------------------------------------------------------------------------------------- // Clean shutdown // -------------------------------------------------------------------------------------------- /** * Shuts down the checkpoint coordinator. * *

After this method has been called, the coordinator does not accept * and further messages and cannot trigger any further checkpoints. All * checkpoint state is discarded. */ public void shutdown() throws Exception { shutdown(true); } /** * Suspends the checkpoint coordinator. * *

After this method has been called, the coordinator does not accept * and further messages and cannot trigger any further checkpoints. * *

The difference to shutdown is that checkpoint state in the store * and counter is kept around if possible to recover later. */ public void suspend() throws Exception { shutdown(false); } /** * Shuts down the checkpoint coordinator. * * @param shutdownStoreAndCounter Depending on this flag the checkpoint * state services are shut down or suspended. */ private void shutdown(boolean shutdownStoreAndCounter) throws Exception { synchronized (lock) { try { if (!shutdown) { shutdown = true; LOG.info("Stopping checkpoint coordinator for job " + job); periodicScheduling = false; triggerRequestQueued = false; // shut down the thread that handles the timeouts and pending triggers timer.cancel(); // make sure that the actor does not linger if (jobStatusListener != null) { jobStatusListener.tell(PoisonPill.getInstance()); jobStatusListener = null; } // clear and discard all pending checkpoints for (PendingCheckpoint pending : pendingCheckpoints.values()) { pending.discard(userClassLoader); } pendingCheckpoints.clear(); if (shutdownStoreAndCounter) { completedCheckpointStore.shutdown(); checkpointIdCounter.shutdown(); } else { completedCheckpointStore.suspend(); checkpointIdCounter.suspend(); } onShutdown(); } } finally { // Remove shutdown hook to prevent resource leaks, unless this is invoked by the // shutdown hook itself. if (shutdownHook != null && shutdownHook != Thread.currentThread()) { try { Runtime.getRuntime().removeShutdownHook(shutdownHook); } catch (IllegalStateException ignored) { // race, JVM is in shutdown already, we can safely ignore this } catch (Throwable t) { LOG.warn("Error unregistering checkpoint coordinator shutdown hook.", t); } } } } } public boolean isShutdown() { return shutdown; } // -------------------------------------------------------------------------------------------- // Handling checkpoints and messages // -------------------------------------------------------------------------------------------- /** * Triggers a new checkpoint and uses the given timestamp as the checkpoint * timestamp. * * @param timestamp The timestamp for the checkpoint. */ public boolean triggerCheckpoint(long timestamp) throws Exception { return triggerCheckpoint(timestamp, -1); } /** * Triggers a new checkpoint and uses the given timestamp as the checkpoint * timestamp. * * @param timestamp The timestamp for the checkpoint. * @param nextCheckpointId The checkpoint ID to use for this checkpoint or -1 if * the checkpoint ID counter should be queried. */ public boolean triggerCheckpoint(long timestamp, long nextCheckpointId) throws Exception { // make some eager pre-checks synchronized (lock) { // abort if the coordinator has been shutdown in the meantime if (shutdown) { return false; } // sanity check: there should never be more than one trigger request queued if (triggerRequestQueued) { LOG.warn("Trying to trigger another checkpoint while one was queued already"); return false; } // if too many checkpoints are currently in progress, we need to mark that a request is queued if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) { triggerRequestQueued = true; if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } return false; } //make sure the minimum interval between checkpoints has passed if (lastTriggeredCheckpoint + minPauseBetweenCheckpoints > timestamp) { if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } ScheduledTrigger trigger = new ScheduledTrigger(); timer.scheduleAtFixedRate(trigger, minPauseBetweenCheckpoints, baseInterval); return false; } } // first check if all tasks that we need to trigger are running. // if not, abort the checkpoint ExecutionAttemptID[] triggerIDs = new ExecutionAttemptID[tasksToTrigger.length]; for (int i = 0; i < tasksToTrigger.length; i++) { Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt(); if (ee != null && ee.getState() == ExecutionState.RUNNING) { triggerIDs[i] = ee.getAttemptId(); } else { LOG.info("Checkpoint triggering task {} is not being executed at the moment. Aborting checkpoint.", tasksToTrigger[i].getSimpleName()); return false; } } // next, check if all tasks that need to acknowledge the checkpoint are running. // if not, abort the checkpoint Map ackTasks = new HashMap<>(tasksToWaitFor.length); for (ExecutionVertex ev : tasksToWaitFor) { Execution ee = ev.getCurrentExecutionAttempt(); if (ee != null) { ackTasks.put(ee.getAttemptId(), ev); } else { LOG.info("Checkpoint acknowledging task {} is not being executed at the moment. Aborting checkpoint.", ev.getSimpleName()); return false; } } // we will actually trigger this checkpoint! lastTriggeredCheckpoint = timestamp; final long checkpointID; if (nextCheckpointId < 0) { try { // this must happen outside the locked scope, because it communicates // with external services (in HA mode) and may block for a while. checkpointID = checkpointIdCounter.getAndIncrement(); } catch (Throwable t) { int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers; LOG.warn("Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t); return false; } } else { checkpointID = nextCheckpointId; } LOG.info("Triggering checkpoint " + checkpointID + " @ " + timestamp); final PendingCheckpoint checkpoint = new PendingCheckpoint(job, checkpointID, timestamp, ackTasks); // schedule the timer that will clean up the expired checkpoints TimerTask canceller = new TimerTask() { @Override public void run() { try { synchronized (lock) { // only do the work if the checkpoint is not discarded anyways // note that checkpoint completion discards the pending checkpoint object if (!checkpoint.isDiscarded()) { LOG.info("Checkpoint " + checkpointID + " expired before completing."); checkpoint.discard(userClassLoader); pendingCheckpoints.remove(checkpointID); rememberRecentCheckpointId(checkpointID); onCancelCheckpoint(checkpointID); triggerQueuedRequests(); } } } catch (Throwable t) { LOG.error("Exception while handling checkpoint timeout", t); } } }; try { // re-acquire the lock synchronized (lock) { // since we released the lock in the meantime, we need to re-check // that the conditions still hold. this is clumsy, but it allows us to // release the lock in the meantime while calls to external services are // blocking progress, and still gives us early checks that skip work // if no checkpoint can happen anyways if (shutdown) { return false; } else if (triggerRequestQueued) { LOG.warn("Trying to trigger another checkpoint while one was queued already"); return false; } else if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) { triggerRequestQueued = true; if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } return false; } pendingCheckpoints.put(checkpointID, checkpoint); timer.schedule(canceller, checkpointTimeout); } // end of lock scope // send the messages to the tasks that trigger their checkpoint for (int i = 0; i < tasksToTrigger.length; i++) { ExecutionAttemptID id = triggerIDs[i]; TriggerCheckpoint message = new TriggerCheckpoint(job, id, checkpointID, timestamp); tasksToTrigger[i].sendMessageToCurrentExecution(message, id); } numUnsuccessfulCheckpointsTriggers = 0; return true; } catch (Throwable t) { // guard the map against concurrent modifications synchronized (lock) { pendingCheckpoints.remove(checkpointID); } int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers; LOG.warn("Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t); if (!checkpoint.isDiscarded()) { checkpoint.discard(userClassLoader); } return false; } } /** * Receives a {@link DeclineCheckpoint} message and returns whether the * message was associated with a pending checkpoint. * * @param message Checkpoint decline from the task manager * * @return Flag indicating whether the declined checkpoint was associated * with a pending checkpoint. */ public boolean receiveDeclineMessage(DeclineCheckpoint message) throws Exception { if (shutdown || message == null) { return false; } if (!job.equals(message.getJob())) { LOG.error("Received DeclineCheckpoint message for wrong job: {}", message); return false; } final long checkpointId = message.getCheckpointId(); PendingCheckpoint checkpoint; // Flag indicating whether the ack message was for a known pending // checkpoint. boolean isPendingCheckpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return false; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { isPendingCheckpoint = true; LOG.info("Discarding checkpoint " + checkpointId + " because of checkpoint decline from task " + message.getTaskExecutionId()); pendingCheckpoints.remove(checkpointId); checkpoint.discard(userClassLoader); rememberRecentCheckpointId(checkpointId); onCancelCheckpoint(checkpointId); boolean haveMoreRecentPending = false; Iterator> entries = pendingCheckpoints.entrySet().iterator(); while (entries.hasNext()) { PendingCheckpoint p = entries.next().getValue(); if (!p.isDiscarded() && p.getCheckpointTimestamp() >= checkpoint.getCheckpointTimestamp()) { haveMoreRecentPending = true; break; } } if (!haveMoreRecentPending && !triggerRequestQueued) { LOG.info("Triggering new checkpoint because of discarded checkpoint " + checkpointId); triggerCheckpoint(System.currentTimeMillis()); } else if (!haveMoreRecentPending) { LOG.info("Promoting queued checkpoint request because of discarded checkpoint " + checkpointId); triggerQueuedRequests(); } } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) if (recentPendingCheckpoints.contains(checkpointId)) { isPendingCheckpoint = true; LOG.info("Received another decline checkpoint message for now expired checkpoint attempt " + checkpointId); } else { isPendingCheckpoint = false; } } } return isPendingCheckpoint; } /** * Receives an AcknowledgeCheckpoint message and returns whether the * message was associated with a pending checkpoint. * * @param message Checkpoint ack from the task manager * * @return Flag indicating whether the ack'd checkpoint was associated * with a pending checkpoint. * * @throws Exception If the checkpoint cannot be added to the completed checkpoint store. */ public boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message) throws Exception { if (shutdown || message == null) { return false; } if (!job.equals(message.getJob())) { LOG.error("Received AcknowledgeCheckpoint message for wrong job: {}", message); return false; } final long checkpointId = message.getCheckpointId(); CompletedCheckpoint completed = null; PendingCheckpoint checkpoint; // Flag indicating whether the ack message was for a known pending // checkpoint. boolean isPendingCheckpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return false; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { isPendingCheckpoint = true; if (checkpoint.acknowledgeTask( message.getTaskExecutionId(), message.getState(), message.getStateSize(), null)) { // TODO: Give KV-state to the acknowledgeTask method if (checkpoint.isFullyAcknowledged()) { completed = checkpoint.finalizeCheckpoint(); completedCheckpointStore.addCheckpoint(completed); LOG.info("Completed checkpoint " + checkpointId + " (in " + completed.getDuration() + " ms)"); if (LOG.isDebugEnabled()) { StringBuilder builder = new StringBuilder(); for (Map.Entry entry: completed.getTaskStates().entrySet()) { builder.append("JobVertexID: ").append(entry.getKey()).append(" {").append(entry.getValue()).append("}"); } LOG.debug(builder.toString()); } pendingCheckpoints.remove(checkpointId); rememberRecentCheckpointId(checkpointId); dropSubsumedCheckpoints(completed.getTimestamp()); onFullyAcknowledgedCheckpoint(completed); triggerQueuedRequests(); } } else { // checkpoint did not accept message LOG.error("Received duplicate or invalid acknowledge message for checkpoint " + checkpointId + " , task " + message.getTaskExecutionId()); } } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) if (recentPendingCheckpoints.contains(checkpointId)) { isPendingCheckpoint = true; LOG.warn("Received late message for now expired checkpoint attempt " + checkpointId); } else { isPendingCheckpoint = false; } } } // send the confirmation messages to the necessary targets. we do this here // to be outside the lock scope if (completed != null) { final long timestamp = completed.getTimestamp(); for (ExecutionVertex ev : tasksToCommitTo) { Execution ee = ev.getCurrentExecutionAttempt(); if (ee != null) { ExecutionAttemptID attemptId = ee.getAttemptId(); NotifyCheckpointComplete notifyMessage = new NotifyCheckpointComplete(job, attemptId, checkpointId, timestamp); ev.sendMessageToCurrentExecution(notifyMessage, ee.getAttemptId()); } } statsTracker.onCompletedCheckpoint(completed); } return isPendingCheckpoint; } private void rememberRecentCheckpointId(long id) { if (recentPendingCheckpoints.size() >= NUM_GHOST_CHECKPOINT_IDS) { recentPendingCheckpoints.removeFirst(); } recentPendingCheckpoints.addLast(id); } private void dropSubsumedCheckpoints(long timestamp) throws Exception { Iterator> entries = pendingCheckpoints.entrySet().iterator(); while (entries.hasNext()) { PendingCheckpoint p = entries.next().getValue(); if (p.getCheckpointTimestamp() < timestamp) { rememberRecentCheckpointId(p.getCheckpointId()); p.discard(userClassLoader); onCancelCheckpoint(p.getCheckpointId()); entries.remove(); } } } /** * Triggers the queued request, if there is one. * *

NOTE: The caller of this method must hold the lock when invoking the method! */ private void triggerQueuedRequests() throws Exception { if (triggerRequestQueued) { triggerRequestQueued = false; // trigger the checkpoint from the trigger timer, to finish the work of this thread before // starting with the next checkpoint ScheduledTrigger trigger = new ScheduledTrigger(); if (periodicScheduling) { if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); } currentPeriodicTrigger = trigger; timer.scheduleAtFixedRate(trigger, 0L, baseInterval); } else { timer.schedule(trigger, 0L); } } } // -------------------------------------------------------------------------------------------- // Checkpoint State Restoring // -------------------------------------------------------------------------------------------- public boolean restoreLatestCheckpointedState( Map tasks, boolean errorIfNoCheckpoint, boolean allOrNothingState) throws Exception { synchronized (lock) { if (shutdown) { throw new IllegalStateException("CheckpointCoordinator is shut down"); } // Recover the checkpoints completedCheckpointStore.recover(); // restore from the latest checkpoint CompletedCheckpoint latest = completedCheckpointStore.getLatestCheckpoint(); if (latest == null) { if (errorIfNoCheckpoint) { throw new IllegalStateException("No completed checkpoint available"); } else { return false; } } for (Map.Entry taskGroupStateEntry: latest.getTaskStates().entrySet()) { TaskState taskState = taskGroupStateEntry.getValue(); ExecutionJobVertex executionJobVertex = tasks.get(taskGroupStateEntry.getKey()); if (executionJobVertex != null) { // check that we only restore the state if the parallelism has not been changed if (taskState.getParallelism() != executionJobVertex.getParallelism()) { throw new RuntimeException("Cannot restore the latest checkpoint because " + "the parallelism changed. The operator" + executionJobVertex.getJobVertexId() + " has parallelism " + executionJobVertex.getParallelism() + " whereas the corresponding" + "state object has a parallelism of " + taskState.getParallelism()); } int counter = 0; List> keyGroupPartitions = createKeyGroupPartitions(numberKeyGroups, executionJobVertex.getParallelism()); for (int i = 0; i < executionJobVertex.getParallelism(); i++) { SubtaskState subtaskState = taskState.getState(i); SerializedValue> state = null; if (subtaskState != null) { // count the number of executions for which we set a state counter++; state = subtaskState.getState(); } Map>> kvStateForTaskMap = taskState.getUnwrappedKvStates(keyGroupPartitions.get(i)); Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[i].getCurrentExecutionAttempt(); currentExecutionAttempt.setInitialState(state, kvStateForTaskMap); } if (allOrNothingState && counter > 0 && counter < executionJobVertex.getParallelism()) { throw new IllegalStateException("The checkpoint contained state only for " + "a subset of tasks for vertex " + executionJobVertex); } } else { throw new IllegalStateException("There is no execution job vertex for the job" + " vertex ID " + taskGroupStateEntry.getKey()); } } return true; } } /** * Groups the available set of key groups into key group partitions. A key group partition is * the set of key groups which is assigned to the same task. Each set of the returned list * constitutes a key group partition. * * @param numberKeyGroups Number of available key groups (indexed from 0 to numberKeyGroups - 1) * @param parallelism Parallelism to generate the key group partitioning for * @return List of key group partitions */ protected List> createKeyGroupPartitions(int numberKeyGroups, int parallelism) { ArrayList> result = new ArrayList<>(parallelism); for (int p = 0; p < parallelism; p++) { HashSet keyGroupPartition = new HashSet<>(); for (int k = p; k < numberKeyGroups; k += parallelism) { keyGroupPartition.add(k); } result.add(keyGroupPartition); } return result; } // -------------------------------------------------------------------------------------------- // Accessors // -------------------------------------------------------------------------------------------- public int getNumberOfPendingCheckpoints() { return this.pendingCheckpoints.size(); } public int getNumberOfRetainedSuccessfulCheckpoints() { synchronized (lock) { return completedCheckpointStore.getNumberOfRetainedCheckpoints(); } } public Map getPendingCheckpoints() { synchronized (lock) { return new HashMap(this.pendingCheckpoints); } } public List getSuccessfulCheckpoints() throws Exception { synchronized (lock) { return completedCheckpointStore.getAllCheckpoints(); } } protected long getAndIncrementCheckpointId() { try { // this must happen outside the locked scope, because it communicates // with external services (in HA mode) and may block for a while. return checkpointIdCounter.getAndIncrement(); } catch (Throwable t) { int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers; LOG.warn("Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t); return -1; } } protected ActorGateway getJobStatusListener() { return jobStatusListener; } protected void setJobStatusListener(ActorGateway jobStatusListener) { this.jobStatusListener = jobStatusListener; } // -------------------------------------------------------------------------------------------- // Periodic scheduling of checkpoints // -------------------------------------------------------------------------------------------- public void startCheckpointScheduler() throws Exception { synchronized (lock) { if (shutdown) { throw new IllegalArgumentException("Checkpoint coordinator is shut down"); } // make sure all prior timers are cancelled stopCheckpointScheduler(); try { // Multiple start calls are OK checkpointIdCounter.start(); } catch (Exception e) { String msg = "Failed to start checkpoint ID counter: " + e.getMessage(); throw new RuntimeException(msg, e); } periodicScheduling = true; currentPeriodicTrigger = new ScheduledTrigger(); timer.scheduleAtFixedRate(currentPeriodicTrigger, baseInterval, baseInterval); } } public void stopCheckpointScheduler() throws Exception { synchronized (lock) { triggerRequestQueued = false; periodicScheduling = false; if (currentPeriodicTrigger != null) { currentPeriodicTrigger.cancel(); currentPeriodicTrigger = null; } for (PendingCheckpoint p : pendingCheckpoints.values()) { p.discard(userClassLoader); } pendingCheckpoints.clear(); numUnsuccessfulCheckpointsTriggers = 0; } } // ------------------------------------------------------------------------ // job status listener that schedules / cancels periodic checkpoints // ------------------------------------------------------------------------ public ActorGateway createActivatorDeactivator(ActorSystem actorSystem, UUID leaderSessionID) { synchronized (lock) { if (shutdown) { throw new IllegalArgumentException("Checkpoint coordinator is shut down"); } if (jobStatusListener == null) { Props props = Props.create(CheckpointCoordinatorDeActivator.class, this, leaderSessionID); // wrap the ActorRef in a AkkaActorGateway to support message decoration jobStatusListener = new AkkaActorGateway(actorSystem.actorOf(props), leaderSessionID); } return jobStatusListener; } } // ------------------------------------------------------------------------ private class ScheduledTrigger extends TimerTask { @Override public void run() { try { triggerCheckpoint(System.currentTimeMillis()); } catch (Exception e) { LOG.error("Exception while triggering checkpoint", e); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy