org.apache.flink.runtime.executiongraph.ExecutionGraph Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.executiongraph;
import akka.actor.ActorSystem;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.accumulators.AccumulatorHelper;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Gauge;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.StoppingException;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult;
import org.apache.flink.runtime.blob.BlobKey;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
import org.apache.flink.runtime.checkpoint.CheckpointIDCounter;
import org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointCoordinator;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointStore;
import org.apache.flink.runtime.checkpoint.stats.CheckpointStatsTracker;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.execution.SuppressRestartsException;
import org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader;
import org.apache.flink.runtime.executiongraph.archive.ExecutionConfigSummary;
import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.ScheduleMode;
import org.apache.flink.runtime.jobmanager.RecoveryMode;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.Scheduler;
import org.apache.flink.runtime.messages.ExecutionGraphMessages;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.util.SerializableObject;
import org.apache.flink.runtime.util.SerializedThrowable;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.SerializedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.ExecutionContext;
import scala.concurrent.ExecutionContext$;
import scala.concurrent.duration.FiniteDuration;
import java.io.IOException;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* The execution graph is the central data structure that coordinates the distributed
* execution of a data flow. It keeps representations of each parallel task, each
* intermediate result, and the communication between them.
*
* The execution graph consists of the following constructs:
*
* - The {@link ExecutionJobVertex} represents one vertex from the JobGraph (usually one operation like
* "map" or "join") during execution. It holds the aggregated state of all parallel subtasks.
* The ExecutionJobVertex is identified inside the graph by the {@link JobVertexID}, which it takes
* from the JobGraph's corresponding JobVertex.
* - The {@link ExecutionVertex} represents one parallel subtask. For each ExecutionJobVertex, there are
* as many ExecutionVertices as the parallelism. The ExecutionVertex is identified by
* the ExecutionJobVertex and the number of the parallel subtask
* - The {@link Execution} is one attempt to execute a ExecutionVertex. There may be multiple Executions
* for the ExecutionVertex, in case of a failure, or in the case where some data needs to be recomputed
* because it is no longer available when requested by later operations. An Execution is always
* identified by an {@link ExecutionAttemptID}. All messages between the JobManager and the TaskManager
* about deployment of tasks and updates in the task status always use the ExecutionAttemptID to
* address the message receiver.
*
*
* The ExecutionGraph implements {@link java.io.Serializable}, because it can be archived by
* sending it to an archive actor via an actor message. The execution graph does contain some
* non-serializable fields. These fields are not required in the archived form and are cleared
* in the {@link #prepareForArchiving()} method.
*/
public class ExecutionGraph implements Serializable {
private static final long serialVersionUID = 42L;
private static final AtomicReferenceFieldUpdater STATE_UPDATER =
AtomicReferenceFieldUpdater.newUpdater(ExecutionGraph.class, JobStatus.class, "state");
/** The log object used for debugging. */
static final Logger LOG = LoggerFactory.getLogger(ExecutionGraph.class);
static final String RESTARTING_TIME_METRIC_NAME = "restartingTime";
// --------------------------------------------------------------------------------------------
/** The lock used to secure all access to mutable fields, especially the tracking of progress
* within the job. */
private final SerializableObject progressLock = new SerializableObject();
/** Job specific information like the job id, job name, job configuration, etc. */
private final JobInformation jobInformation;
/** Serialized version of the job specific information. This is done to avoid multiple
* serializations of the same data when creating a TaskDeploymentDescriptor.
*/
private final SerializedValue serializedJobInformation;
/** {@code true} if all source tasks are stoppable. */
private boolean isStoppable = true;
/** All job vertices that are part of this graph */
private final ConcurrentHashMap tasks;
/** All vertices, in the order in which they were created **/
private final List verticesInCreationOrder;
/** All intermediate results that are part of this graph */
private final ConcurrentHashMap intermediateResults;
/** The currently executed tasks, for callbacks */
private final ConcurrentHashMap currentExecutions;
/** Listeners that receive messages when the entire job switches it status (such as from
* RUNNING to FINISHED) */
private final List jobStatusListenerActors;
/** Listeners that receive messages whenever a single task execution changes its status */
private final List executionListenerActors;
/** Timestamps (in milliseconds as returned by {@code System.currentTimeMillis()} when
* the execution graph transitioned into a certain state. The index into this array is the
* ordinal of the enum value, i.e. the timestamp when the graph went into state "RUNNING" is
* at {@code stateTimestamps[RUNNING.ordinal()]}. */
private final long[] stateTimestamps;
/** The timeout for all messages that require a response/acknowledgement */
private final FiniteDuration timeout;
// ------ Configuration of the Execution -------
/** Flag to indicate whether the scheduler may queue tasks for execution, or needs to be able
* to deploy them immediately. */
private boolean allowQueuedScheduling = false;
/** The mode of scheduling. Decides how to select the initial set of tasks to be deployed.
* May indicate to deploy all sources, or to deploy everything, or to deploy via backtracking
* from results than need to be materialized. */
private ScheduleMode scheduleMode = ScheduleMode.FROM_SOURCES;
/** Flag to indicate whether the Graph has been archived */
private boolean isArchived = false;
// ------ Execution status and progress. These values are volatile, and accessed under the lock -------
/** Current status of the job execution */
private volatile JobStatus state = JobStatus.CREATED;
/** The exception that caused the job to fail. This is set to the first root exception
* that was not recoverable and triggered job failure */
private volatile Throwable failureCause;
/** The number of job vertices that have reached a terminal state */
private volatile int numFinishedJobVertices;
// ------ Fields that are relevant to the execution and need to be cleared before archiving -------
/** The scheduler to use for scheduling new tasks as they are needed */
@SuppressWarnings("NonSerializableFieldInSerializableClass")
private Scheduler scheduler;
/** Strategy to use for restarts */
@SuppressWarnings("NonSerializableFieldInSerializableClass")
private RestartStrategy restartStrategy;
/** The classloader for the user code. Needed for calls into user code classes */
@SuppressWarnings("NonSerializableFieldInSerializableClass")
private ClassLoader userClassLoader;
/** The coordinator for checkpoints, if snapshot checkpoints are enabled */
@SuppressWarnings("NonSerializableFieldInSerializableClass")
private CheckpointCoordinator checkpointCoordinator;
/** The coordinator for savepoints, if snapshot checkpoints are enabled */
private transient SavepointCoordinator savepointCoordinator;
/** Checkpoint stats tracker seperate from the coordinator in order to be
* available after archiving. */
@SuppressWarnings("NonSerializableFieldInSerializableClass")
private CheckpointStatsTracker checkpointStatsTracker;
/** The execution context which is used to execute futures. */
private final transient Executor futureExecutor;
private final transient ExecutionContext futureExecutionContext;
/** The executor which is used to execute blocking io operations */
private final transient Executor ioExecutor;
// ------ Fields that are only relevant for archived execution graphs ------------
private String jsonPlan;
/** Serializable summary of all job config values, e.g. for web interface */
private ExecutionConfigSummary executionConfigSummary;
// --------------------------------------------------------------------------------------------
// Constructors
// --------------------------------------------------------------------------------------------
/**
* This constructor is for tests only, because it does not include class loading information.
*/
ExecutionGraph(
Executor futureExecutor,
Executor ioExecutor,
JobID jobId,
String jobName,
Configuration jobConfig,
SerializedValue serializedConfig,
FiniteDuration timeout,
RestartStrategy restartStrategy,
Scheduler scheduler) throws IOException {
this(
futureExecutor,
ioExecutor,
jobId,
jobName,
jobConfig,
serializedConfig,
timeout,
restartStrategy,
new ArrayList(),
new ArrayList(),
scheduler,
ExecutionGraph.class.getClassLoader(),
new UnregisteredMetricsGroup()
);
}
public ExecutionGraph(
Executor futureExecutor,
Executor ioExecutor,
JobID jobId,
String jobName,
Configuration jobConfig,
SerializedValue serializedConfig,
FiniteDuration timeout,
RestartStrategy restartStrategy,
List requiredJarFiles,
List requiredClasspaths,
Scheduler scheduler,
ClassLoader userClassLoader,
MetricGroup metricGroup) throws IOException {
checkNotNull(jobId);
checkNotNull(jobName);
checkNotNull(jobConfig);
this.jobInformation = new JobInformation(
jobId,
jobName,
serializedConfig,
jobConfig,
requiredJarFiles,
requiredClasspaths);
// serialize the job information to do the serialisation work only once
this.serializedJobInformation = new SerializedValue<>(jobInformation);
this.futureExecutor = Preconditions.checkNotNull(futureExecutor);
this.futureExecutionContext = ExecutionContext$.MODULE$.fromExecutor(futureExecutor);
this.ioExecutor = Preconditions.checkNotNull(ioExecutor);
this.scheduler = Preconditions.checkNotNull(scheduler, "scheduler");
this.userClassLoader = Preconditions.checkNotNull(userClassLoader, "userClassLoader");
this.tasks = new ConcurrentHashMap();
this.intermediateResults = new ConcurrentHashMap();
this.verticesInCreationOrder = new ArrayList();
this.currentExecutions = new ConcurrentHashMap();
this.jobStatusListenerActors = new CopyOnWriteArrayList();
this.executionListenerActors = new CopyOnWriteArrayList();
this.stateTimestamps = new long[JobStatus.values().length];
this.stateTimestamps[JobStatus.CREATED.ordinal()] = System.currentTimeMillis();
this.timeout = timeout;
this.restartStrategy = restartStrategy;
metricGroup.gauge(RESTARTING_TIME_METRIC_NAME, new RestartTimeGauge());
// create a summary of all relevant data accessed in the web interface's JobConfigHandler
try {
ExecutionConfig executionConfig = serializedConfig.deserializeValue(userClassLoader);
if (executionConfig != null) {
this.executionConfigSummary = new ExecutionConfigSummary(executionConfig);
}
} catch (IOException | ClassNotFoundException e) {
LOG.error("Couldn't create ExecutionConfigSummary for job {} ", getJobID(), e);
}
}
// --------------------------------------------------------------------------------------------
// Configuration of Data-flow wide execution settings
// --------------------------------------------------------------------------------------------
/**
* Gets the number of job vertices currently held by this execution graph.
* @return The current number of job vertices.
*/
public int getNumberOfExecutionJobVertices() {
return this.verticesInCreationOrder.size();
}
public boolean isQueuedSchedulingAllowed() {
return this.allowQueuedScheduling;
}
public void setQueuedSchedulingAllowed(boolean allowed) {
this.allowQueuedScheduling = allowed;
}
public void setScheduleMode(ScheduleMode scheduleMode) {
this.scheduleMode = scheduleMode;
}
public ScheduleMode getScheduleMode() {
return scheduleMode;
}
public boolean isArchived() {
return isArchived;
}
public void enableSnapshotCheckpointing(
long interval,
long checkpointTimeout,
long minPauseBetweenCheckpoints,
int maxConcurrentCheckpoints,
int numberKeyGroups,
List verticesToTrigger,
List verticesToWaitFor,
List verticesToCommitTo,
ActorSystem actorSystem,
UUID leaderSessionID,
CheckpointIDCounter checkpointIDCounter,
CompletedCheckpointStore checkpointStore,
RecoveryMode recoveryMode,
SavepointStore savepointStore,
CheckpointStatsTracker statsTracker) throws Exception {
// simple sanity checks
if (interval < 10 || checkpointTimeout < 10) {
throw new IllegalArgumentException();
}
if (state != JobStatus.CREATED) {
throw new IllegalStateException("Job must be in CREATED state");
}
ExecutionVertex[] tasksToTrigger = collectExecutionVertices(verticesToTrigger);
ExecutionVertex[] tasksToWaitFor = collectExecutionVertices(verticesToWaitFor);
ExecutionVertex[] tasksToCommitTo = collectExecutionVertices(verticesToCommitTo);
// disable to make sure existing checkpoint coordinators are cleared
disableSnaphotCheckpointing();
checkpointStatsTracker = Objects.requireNonNull(statsTracker, "Checkpoint stats tracker");
// interval of max long value indicates disable periodic checkpoint,
// the CheckpoitnCoordinator should be created only if the interval is not max value
if (interval != Long.MAX_VALUE) {
// create the coordinator that triggers and commits checkpoints and holds the state
checkpointCoordinator = new CheckpointCoordinator(
jobInformation.getJobId(),
interval,
checkpointTimeout,
minPauseBetweenCheckpoints,
maxConcurrentCheckpoints,
numberKeyGroups,
tasksToTrigger,
tasksToWaitFor,
tasksToCommitTo,
userClassLoader,
checkpointIDCounter,
checkpointStore,
recoveryMode,
checkpointStatsTracker,
ioExecutor);
// the periodic checkpoint scheduler is activated and deactivated as a result of
// job status changes (running -> on, all other states -> off)
registerJobStatusListener(
checkpointCoordinator.createActivatorDeactivator(actorSystem, leaderSessionID));
}
// Savepoint Coordinator
savepointCoordinator = new SavepointCoordinator(
jobInformation.getJobId(),
interval,
checkpointTimeout,
numberKeyGroups,
tasksToTrigger,
tasksToWaitFor,
tasksToCommitTo,
userClassLoader,
// Important: this counter needs to be shared with the periodic
// checkpoint coordinator.
checkpointIDCounter,
savepointStore,
checkpointStatsTracker,
ioExecutor);
registerJobStatusListener(savepointCoordinator
.createActivatorDeactivator(actorSystem, leaderSessionID));
}
/**
* Disables checkpointing.
*
* The shutdown of the checkpoint coordinator might block. Make sure that calls to this
* method don't block the job manager actor and run asynchronously.
*/
public void disableSnaphotCheckpointing() throws Exception {
if (state != JobStatus.CREATED) {
throw new IllegalStateException("Job must be in CREATED state");
}
if (checkpointCoordinator != null) {
checkpointCoordinator.shutdown();
checkpointCoordinator = null;
checkpointStatsTracker = null;
}
if (savepointCoordinator != null) {
savepointCoordinator.shutdown();
savepointCoordinator = null;
}
}
public CheckpointCoordinator getCheckpointCoordinator() {
return checkpointCoordinator;
}
public SavepointCoordinator getSavepointCoordinator() {
return savepointCoordinator;
}
public RestartStrategy getRestartStrategy() {
return restartStrategy;
}
public CheckpointStatsTracker getCheckpointStatsTracker() {
return checkpointStatsTracker;
}
private ExecutionVertex[] collectExecutionVertices(List jobVertices) {
if (jobVertices.size() == 1) {
ExecutionJobVertex jv = jobVertices.get(0);
if (jv.getGraph() != this) {
throw new IllegalArgumentException("Can only use ExecutionJobVertices of this ExecutionGraph");
}
return jv.getTaskVertices();
}
else {
ArrayList all = new ArrayList();
for (ExecutionJobVertex jv : jobVertices) {
if (jv.getGraph() != this) {
throw new IllegalArgumentException("Can only use ExecutionJobVertices of this ExecutionGraph");
}
all.addAll(Arrays.asList(jv.getTaskVertices()));
}
return all.toArray(new ExecutionVertex[all.size()]);
}
}
// --------------------------------------------------------------------------------------------
// Properties and Status of the Execution Graph
// --------------------------------------------------------------------------------------------
/**
* Returns a list of BLOB keys referring to the JAR files required to run this job
* @return list of BLOB keys referring to the JAR files required to run this job
*/
public Collection getRequiredJarFiles() {
return jobInformation.getRequiredJarFileBlobKeys();
}
/**
* Returns a list of classpaths referring to the directories/JAR files required to run this job
* @return list of classpaths referring to the directories/JAR files required to run this job
*/
public Collection getRequiredClasspaths() {
return jobInformation.getRequiredClasspathURLs();
}
// --------------------------------------------------------------------------------------------
public void setJsonPlan(String jsonPlan) {
this.jsonPlan = jsonPlan;
}
public String getJsonPlan() {
return jsonPlan;
}
public Scheduler getScheduler() {
return scheduler;
}
public SerializedValue getSerializedJobInformation() {
return serializedJobInformation;
}
public JobID getJobID() {
return jobInformation.getJobId();
}
public String getJobName() {
return jobInformation.getJobName();
}
public boolean isStoppable() {
return this.isStoppable;
}
public Configuration getJobConfiguration() {
return jobInformation.getJobConfiguration();
}
public ClassLoader getUserClassLoader() {
return this.userClassLoader;
}
public JobStatus getState() {
return state;
}
public Throwable getFailureCause() {
return failureCause;
}
public ExecutionJobVertex getJobVertex(JobVertexID id) {
return this.tasks.get(id);
}
public Map getAllVertices() {
return Collections.unmodifiableMap(this.tasks);
}
public Iterable getVerticesTopologically() {
// we return a specific iterator that does not fail with concurrent modifications
// the list is append only, so it is safe for that
final int numElements = this.verticesInCreationOrder.size();
return new Iterable() {
@Override
public Iterator iterator() {
return new Iterator() {
private int pos = 0;
@Override
public boolean hasNext() {
return pos < numElements;
}
@Override
public ExecutionJobVertex next() {
if (hasNext()) {
return verticesInCreationOrder.get(pos++);
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
public Map getAllIntermediateResults() {
return Collections.unmodifiableMap(this.intermediateResults);
}
public Iterable getAllExecutionVertices() {
return new Iterable() {
@Override
public Iterator iterator() {
return new AllVerticesIterator(getVerticesTopologically().iterator());
}
};
}
public long getStatusTimestamp(JobStatus status) {
return this.stateTimestamps[status.ordinal()];
}
/**
* Returns the ExecutionContext associated with this ExecutionGraph.
*
* @return ExecutionContext associated with this ExecutionGraph
*/
public Executor getFutureExecutor() {
return futureExecutor;
}
public ExecutionContext getFutureExecutionContext() {
return futureExecutionContext;
}
/**
* Gets the internal flink accumulator map of maps which contains some metrics.
* @return A map of accumulators for every executed task.
*/
public Map>> getFlinkAccumulators() {
Map>> flinkAccumulators =
new HashMap>>();
for (ExecutionVertex vertex : getAllExecutionVertices()) {
Map> taskAccs = vertex.getCurrentExecutionAttempt().getFlinkAccumulators();
flinkAccumulators.put(vertex.getCurrentExecutionAttempt().getAttemptId(), taskAccs);
}
return flinkAccumulators;
}
/**
* Merges all accumulator results from the tasks previously executed in the Executions.
* @return The accumulator map
*/
public Map> aggregateUserAccumulators() {
Map> userAccumulators = new HashMap>();
for (ExecutionVertex vertex : getAllExecutionVertices()) {
Map> next = vertex.getCurrentExecutionAttempt().getUserAccumulators();
if (next != null) {
AccumulatorHelper.mergeInto(userAccumulators, next);
}
}
return userAccumulators;
}
/**
* Gets a serialized accumulator map.
* @return The accumulator map with serialized accumulator values.
* @throws IOException
*/
public Map> getAccumulatorsSerialized() throws IOException {
Map> accumulatorMap = aggregateUserAccumulators();
Map> result = new HashMap>();
for (Map.Entry> entry : accumulatorMap.entrySet()) {
result.put(entry.getKey(), new SerializedValue