org.apache.flink.runtime.executiongraph.ExecutionGraph Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of flink-runtime_2.10 Show documentation
There is a newer version: 1.3.3
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.executiongraph;

import akka.actor.ActorSystem;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.accumulators.AccumulatorHelper;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Gauge;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.StoppingException;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult;
import org.apache.flink.runtime.blob.BlobKey;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
import org.apache.flink.runtime.checkpoint.CheckpointIDCounter;
import org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointCoordinator;
import org.apache.flink.runtime.checkpoint.savepoint.SavepointStore;
import org.apache.flink.runtime.checkpoint.stats.CheckpointStatsTracker;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.execution.SuppressRestartsException;
import org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader;
import org.apache.flink.runtime.executiongraph.archive.ExecutionConfigSummary;
import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.ScheduleMode;
import org.apache.flink.runtime.jobmanager.RecoveryMode;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.Scheduler;
import org.apache.flink.runtime.messages.ExecutionGraphMessages;
import org.apache.flink.runtime.taskmanager.TaskExecutionState;
import org.apache.flink.runtime.util.SerializableObject;
import org.apache.flink.runtime.util.SerializedThrowable;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.SerializedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.ExecutionContext;
import scala.concurrent.duration.FiniteDuration;

import java.io.IOException;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

import static org.apache.flink.util.Preconditions.checkNotNull;
/**
 * The execution graph is the central data structure that coordinates the distributed
 * execution of a data flow. It keeps representations of each parallel task, each
 * intermediate result, and the communication between them.
 *
 * The execution graph consists of the following constructs:
 * 
 *     The {@link ExecutionJobVertex} represents one vertex from the JobGraph (usually one operation like
 *         "map" or "join") during execution. It holds the aggregated state of all parallel subtasks.
 *         The ExecutionJobVertex is identified inside the graph by the {@link JobVertexID}, which it takes
 *         from the JobGraph's corresponding JobVertex.
 *     The {@link ExecutionVertex} represents one parallel subtask. For each ExecutionJobVertex, there are
 *         as many ExecutionVertices as the parallelism. The ExecutionVertex is identified by
 *         the ExecutionJobVertex and the number of the parallel subtask
 *     The {@link Execution} is one attempt to execute a ExecutionVertex. There may be multiple Executions
 *         for the ExecutionVertex, in case of a failure, or in the case where some data needs to be recomputed
 *         because it is no longer available when requested by later operations. An Execution is always
 *         identified by an {@link ExecutionAttemptID}. All messages between the JobManager and the TaskManager
 *         about deployment of tasks and updates in the task status always use the ExecutionAttemptID to
 *         address the message receiver.
 * 
 * 
 * The ExecutionGraph implements {@link java.io.Serializable}, because it can be archived by
 * sending it to an archive actor via an actor message. The execution graph does contain some
 * non-serializable fields. These fields are not required in the archived form and are cleared
 * in the {@link #prepareForArchiving()} method.
 */
public class ExecutionGraph implements Serializable {

	private static final long serialVersionUID = 42L;

	private static final AtomicReferenceFieldUpdater STATE_UPDATER =
			AtomicReferenceFieldUpdater.newUpdater(ExecutionGraph.class, JobStatus.class, "state");

	/** The log object used for debugging. */
	static final Logger LOG = LoggerFactory.getLogger(ExecutionGraph.class);

	static final String RESTARTING_TIME_METRIC_NAME = "restartingTime";

	// --------------------------------------------------------------------------------------------

	/** The lock used to secure all access to mutable fields, especially the tracking of progress
	 * within the job. */
	private final SerializableObject progressLock = new SerializableObject();

	/** The ID of the job this graph has been built for. */
	private final JobID jobID;

	/** The name of the original job graph. */
	private final String jobName;

	/** The job configuration that was originally attached to the JobGraph. */
	private final Configuration jobConfiguration;

	/** {@code true} if all source tasks are stoppable. */
	private boolean isStoppable = true;

	/** All job vertices that are part of this graph */
	private final ConcurrentHashMap tasks;

	/** All vertices, in the order in which they were created **/
	private final List verticesInCreationOrder;

	/** All intermediate results that are part of this graph */
	private final ConcurrentHashMap intermediateResults;

	/** The currently executed tasks, for callbacks */
	private final ConcurrentHashMap currentExecutions;

	/** A list of all libraries required during the job execution. Libraries have to be stored
	 * inside the BlobService and are referenced via the BLOB keys. */
	private final List requiredJarFiles;

	/** A list of all classpaths required during the job execution. Classpaths have to be
	 * accessible on all nodes in the cluster. */
	private final List requiredClasspaths;

	/** Listeners that receive messages when the entire job switches it status (such as from
	 * RUNNING to FINISHED) */
	private final List jobStatusListenerActors;

	/** Listeners that receive messages whenever a single task execution changes its status */
	private final List executionListenerActors;

	/** Timestamps (in milliseconds as returned by {@code System.currentTimeMillis()} when
	 * the execution graph transitioned into a certain state. The index into this array is the
	 * ordinal of the enum value, i.e. the timestamp when the graph went into state "RUNNING" is
	 * at {@code stateTimestamps[RUNNING.ordinal()]}. */
	private final long[] stateTimestamps;

	/** The timeout for all messages that require a response/acknowledgement */
	private final FiniteDuration timeout;

	// ------ Configuration of the Execution -------

	/** The execution configuration (see {@link ExecutionConfig}) related to this specific job. */
	private SerializedValue serializedExecutionConfig;

	/** Flag to indicate whether the scheduler may queue tasks for execution, or needs to be able
	 * to deploy them immediately. */
	private boolean allowQueuedScheduling = false;

	/** The mode of scheduling. Decides how to select the initial set of tasks to be deployed.
	 * May indicate to deploy all sources, or to deploy everything, or to deploy via backtracking
	 * from results than need to be materialized. */
	private ScheduleMode scheduleMode = ScheduleMode.FROM_SOURCES;

	/** Flag to indicate whether the Graph has been archived */
	private boolean isArchived = false;

	// ------ Execution status and progress. These values are volatile, and accessed under the lock -------

	/** Current status of the job execution */
	private volatile JobStatus state = JobStatus.CREATED;

	/** The exception that caused the job to fail. This is set to the first root exception
	 * that was not recoverable and triggered job failure */
	private volatile Throwable failureCause;

	/** The number of job vertices that have reached a terminal state */
	private volatile int numFinishedJobVertices;

	// ------ Fields that are relevant to the execution and need to be cleared before archiving  -------

	/** The scheduler to use for scheduling new tasks as they are needed */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private Scheduler scheduler;

	/** Strategy to use for restarts */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private RestartStrategy restartStrategy;

	/** The classloader for the user code. Needed for calls into user code classes */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private ClassLoader userClassLoader;

	/** The coordinator for checkpoints, if snapshot checkpoints are enabled */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private CheckpointCoordinator checkpointCoordinator;

	/** The coordinator for savepoints, if snapshot checkpoints are enabled */
	private transient SavepointCoordinator savepointCoordinator;

	/** Checkpoint stats tracker seperate from the coordinator in order to be
	 * available after archiving. */
	private CheckpointStatsTracker checkpointStatsTracker;

	/** The execution context which is used to execute futures. */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private ExecutionContext executionContext;

	// ------ Fields that are only relevant for archived execution graphs ------------
	private String jsonPlan;

	/** Serializable summary of all job config values, e.g. for web interface */
	private ExecutionConfigSummary executionConfigSummary;

	// --------------------------------------------------------------------------------------------
	//   Constructors
	// --------------------------------------------------------------------------------------------

	/**
	 * This constructor is for tests only, because it does not include class loading information.
	 */
	ExecutionGraph(
			ExecutionContext executionContext,
			JobID jobId,
			String jobName,
			Configuration jobConfig,
			SerializedValue serializedConfig,
			FiniteDuration timeout,
			RestartStrategy restartStrategy) {
		this(
			executionContext,
			jobId,
			jobName,
			jobConfig,
			serializedConfig,
			timeout,
			restartStrategy,
			new ArrayList(),
			new ArrayList(),
			ExecutionGraph.class.getClassLoader(),
			new UnregisteredMetricsGroup()
		);
	}

	public ExecutionGraph(
			ExecutionContext executionContext,
			JobID jobId,
			String jobName,
			Configuration jobConfig,
			SerializedValue serializedConfig,
			FiniteDuration timeout,
			RestartStrategy restartStrategy,
			List requiredJarFiles,
			List requiredClasspaths,
			ClassLoader userClassLoader,
			MetricGroup metricGroup) {

		checkNotNull(executionContext);
		checkNotNull(jobId);
		checkNotNull(jobName);
		checkNotNull(jobConfig);
		checkNotNull(userClassLoader);

		this.executionContext = executionContext;

		this.jobID = jobId;
		this.jobName = jobName;
		this.jobConfiguration = jobConfig;
		this.userClassLoader = userClassLoader;

		this.tasks = new ConcurrentHashMap();
		this.intermediateResults = new ConcurrentHashMap();
		this.verticesInCreationOrder = new ArrayList();
		this.currentExecutions = new ConcurrentHashMap();

		this.jobStatusListenerActors  = new CopyOnWriteArrayList();
		this.executionListenerActors = new CopyOnWriteArrayList();

		this.stateTimestamps = new long[JobStatus.values().length];
		this.stateTimestamps[JobStatus.CREATED.ordinal()] = System.currentTimeMillis();

		this.requiredJarFiles = requiredJarFiles;
		this.requiredClasspaths = requiredClasspaths;

		this.serializedExecutionConfig = checkNotNull(serializedConfig);

		this.timeout = timeout;

		this.restartStrategy = restartStrategy;

		metricGroup.gauge(RESTARTING_TIME_METRIC_NAME, new RestartTimeGauge());

		// create a summary of all relevant data accessed in the web interface's JobConfigHandler
		try {
			ExecutionConfig executionConfig = serializedConfig.deserializeValue(userClassLoader);
			if (executionConfig != null) {
				this.executionConfigSummary = new ExecutionConfigSummary(executionConfig);
			}
		} catch (IOException | ClassNotFoundException e) {
			LOG.error("Couldn't create ExecutionConfigSummary for job {} ", jobID, e);
		}
	}

	// --------------------------------------------------------------------------------------------
	//  Configuration of Data-flow wide execution settings
	// --------------------------------------------------------------------------------------------

	/**
	 * Gets the number of job vertices currently held by this execution graph.
	 * @return The current number of job vertices.
	 */
	public int getNumberOfExecutionJobVertices() {
		return this.verticesInCreationOrder.size();
	}

	public boolean isQueuedSchedulingAllowed() {
		return this.allowQueuedScheduling;
	}

	public void setQueuedSchedulingAllowed(boolean allowed) {
		this.allowQueuedScheduling = allowed;
	}

	public void setScheduleMode(ScheduleMode scheduleMode) {
		this.scheduleMode = scheduleMode;
	}

	public ScheduleMode getScheduleMode() {
		return scheduleMode;
	}

	public boolean isArchived() {
		return isArchived;
	}

	public void enableSnapshotCheckpointing(
			long interval,
			long checkpointTimeout,
			long minPauseBetweenCheckpoints,
			int maxConcurrentCheckpoints,
			int numberKeyGroups,
			List verticesToTrigger,
			List verticesToWaitFor,
			List verticesToCommitTo,
			ActorSystem actorSystem,
			UUID leaderSessionID,
			CheckpointIDCounter checkpointIDCounter,
			CompletedCheckpointStore checkpointStore,
			RecoveryMode recoveryMode,
			SavepointStore savepointStore,
			CheckpointStatsTracker statsTracker) throws Exception {

		// simple sanity checks
		if (interval < 10 || checkpointTimeout < 10) {
			throw new IllegalArgumentException();
		}
		if (state != JobStatus.CREATED) {
			throw new IllegalStateException("Job must be in CREATED state");
		}

		ExecutionVertex[] tasksToTrigger = collectExecutionVertices(verticesToTrigger);
		ExecutionVertex[] tasksToWaitFor = collectExecutionVertices(verticesToWaitFor);
		ExecutionVertex[] tasksToCommitTo = collectExecutionVertices(verticesToCommitTo);

		// disable to make sure existing checkpoint coordinators are cleared
		disableSnaphotCheckpointing();

		checkpointStatsTracker = Objects.requireNonNull(statsTracker, "Checkpoint stats tracker");

		// create the coordinator that triggers and commits checkpoints and holds the state
		checkpointCoordinator = new CheckpointCoordinator(
				jobID,
				interval,
				checkpointTimeout,
				minPauseBetweenCheckpoints,
				maxConcurrentCheckpoints,
				numberKeyGroups,
				tasksToTrigger,
				tasksToWaitFor,
				tasksToCommitTo,
				userClassLoader,
				checkpointIDCounter,
				checkpointStore,
				recoveryMode,
				checkpointStatsTracker);

		// the periodic checkpoint scheduler is activated and deactivated as a result of
		// job status changes (running -> on, all other states -> off)
		registerJobStatusListener(
				checkpointCoordinator.createActivatorDeactivator(actorSystem, leaderSessionID));

		// Savepoint Coordinator
		savepointCoordinator = new SavepointCoordinator(
				jobID,
				interval,
				checkpointTimeout,
				numberKeyGroups,
				tasksToTrigger,
				tasksToWaitFor,
				tasksToCommitTo,
				userClassLoader,
				// Important: this counter needs to be shared with the periodic
				// checkpoint coordinator.
				checkpointIDCounter,
				savepointStore,
				checkpointStatsTracker);

		registerJobStatusListener(savepointCoordinator
				.createActivatorDeactivator(actorSystem, leaderSessionID));
	}

	/**
	 * Disables checkpointing.
	 *
	 * The shutdown of the checkpoint coordinator might block. Make sure that calls to this
	 * method don't block the job manager actor and run asynchronously.
	 */
	public void disableSnaphotCheckpointing() throws Exception {
		if (state != JobStatus.CREATED) {
			throw new IllegalStateException("Job must be in CREATED state");
		}

		if (checkpointCoordinator != null) {
			checkpointCoordinator.shutdown();
			checkpointCoordinator = null;
			checkpointStatsTracker = null;
		}

		if (savepointCoordinator != null) {
			savepointCoordinator.shutdown();
			savepointCoordinator = null;
		}
	}

	public CheckpointCoordinator getCheckpointCoordinator() {
		return checkpointCoordinator;
	}

	public SavepointCoordinator getSavepointCoordinator() {
		return savepointCoordinator;
	}

	public RestartStrategy getRestartStrategy() {
		return restartStrategy;
	}

	public CheckpointStatsTracker getCheckpointStatsTracker() {
		return checkpointStatsTracker;
	}

	private ExecutionVertex[] collectExecutionVertices(List jobVertices) {
		if (jobVertices.size() == 1) {
			ExecutionJobVertex jv = jobVertices.get(0);
			if (jv.getGraph() != this) {
				throw new IllegalArgumentException("Can only use ExecutionJobVertices of this ExecutionGraph");
			}
			return jv.getTaskVertices();
		}
		else {
			ArrayList all = new ArrayList();
			for (ExecutionJobVertex jv : jobVertices) {
				if (jv.getGraph() != this) {
					throw new IllegalArgumentException("Can only use ExecutionJobVertices of this ExecutionGraph");
				}
				all.addAll(Arrays.asList(jv.getTaskVertices()));
			}
			return all.toArray(new ExecutionVertex[all.size()]);
		}
	}

	// --------------------------------------------------------------------------------------------
	//  Properties and Status of the Execution Graph
	// --------------------------------------------------------------------------------------------

	/**
	 * Returns a list of BLOB keys referring to the JAR files required to run this job
	 * @return list of BLOB keys referring to the JAR files required to run this job
	 */
	public List getRequiredJarFiles() {
		return this.requiredJarFiles;
	}

	/**
	 * Returns a list of classpaths referring to the directories/JAR files required to run this job
	 * @return list of classpaths referring to the directories/JAR files required to run this job
	 */
	public List getRequiredClasspaths() {
		return this.requiredClasspaths;
	}

	// --------------------------------------------------------------------------------------------

	public void setJsonPlan(String jsonPlan) {
		this.jsonPlan = jsonPlan;
	}

	public String getJsonPlan() {
		return jsonPlan;
	}

	public Scheduler getScheduler() {
		return scheduler;
	}

	public JobID getJobID() {
		return jobID;
	}

	public String getJobName() {
		return jobName;
	}

	public boolean isStoppable() {
		return this.isStoppable;
	}

	public Configuration getJobConfiguration() {
		return jobConfiguration;
	}

	public ClassLoader getUserClassLoader() {
		return this.userClassLoader;
	}

	public JobStatus getState() {
		return state;
	}

	public Throwable getFailureCause() {
		return failureCause;
	}

	public ExecutionJobVertex getJobVertex(JobVertexID id) {
		return this.tasks.get(id);
	}

	public Map getAllVertices() {
		return Collections.unmodifiableMap(this.tasks);
	}

	public Iterable getVerticesTopologically() {
		// we return a specific iterator that does not fail with concurrent modifications
		// the list is append only, so it is safe for that
		final int numElements = this.verticesInCreationOrder.size();

		return new Iterable() {
			@Override
			public Iterator iterator() {
				return new Iterator() {
					private int pos = 0;

					@Override
					public boolean hasNext() {
						return pos < numElements;
					}

					@Override
					public ExecutionJobVertex next() {
						if (hasNext()) {
							return verticesInCreationOrder.get(pos++);
						} else {
							throw new NoSuchElementException();
						}
					}

					@Override
					public void remove() {
						throw new UnsupportedOperationException();
					}
				};
			}
		};
	}

	public Map getAllIntermediateResults() {
		return Collections.unmodifiableMap(this.intermediateResults);
	}

	public Iterable getAllExecutionVertices() {
		return new Iterable() {
			@Override
			public Iterator iterator() {
				return new AllVerticesIterator(getVerticesTopologically().iterator());
			}
		};
	}

	public long getStatusTimestamp(JobStatus status) {
		return this.stateTimestamps[status.ordinal()];
	}

	/**
	 * Returns the ExecutionContext associated with this ExecutionGraph.
	 *
	 * @return ExecutionContext associated with this ExecutionGraph
	 */
	public ExecutionContext getExecutionContext() {
		return executionContext;
	}

	/**
	 * Gets the internal flink accumulator map of maps which contains some metrics.
	 * @return A map of accumulators for every executed task.
	 */
	public Map>> getFlinkAccumulators() {
		Map>> flinkAccumulators =
				new HashMap>>();

		for (ExecutionVertex vertex : getAllExecutionVertices()) {
			Map> taskAccs = vertex.getCurrentExecutionAttempt().getFlinkAccumulators();
			flinkAccumulators.put(vertex.getCurrentExecutionAttempt().getAttemptId(), taskAccs);
		}

		return flinkAccumulators;
	}

	/**
	 * Merges all accumulator results from the tasks previously executed in the Executions.
	 * @return The accumulator map
	 */
	public Map> aggregateUserAccumulators() {

		Map> userAccumulators = new HashMap>();

		for (ExecutionVertex vertex : getAllExecutionVertices()) {
			Map> next = vertex.getCurrentExecutionAttempt().getUserAccumulators();
			if (next != null) {
				AccumulatorHelper.mergeInto(userAccumulators, next);
			}
		}

		return userAccumulators;
	}

	/**
	 * Gets a serialized accumulator map.
	 * @return The accumulator map with serialized accumulator values.
	 * @throws IOException
	 */
	public Map> getAccumulatorsSerialized() throws IOException {

		Map> accumulatorMap = aggregateUserAccumulators();

		Map> result = new HashMap>();
		for (Map.Entry> entry : accumulatorMap.entrySet()) {
			result.put(entry.getKey(), new SerializedValue