org.apache.flink.runtime.executiongraph.DefaultExecutionGraph Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.executiongraph;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.ArchivedExecutionConfig;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.JobStatus;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.accumulators.AccumulatorHelper;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.SimpleCounter;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult;
import org.apache.flink.runtime.blob.BlobWriter;
import org.apache.flink.runtime.blob.PermanentBlobKey;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
import org.apache.flink.runtime.checkpoint.CheckpointFailureManager;
import org.apache.flink.runtime.checkpoint.CheckpointIDCounter;
import org.apache.flink.runtime.checkpoint.CheckpointPlanCalculator;
import org.apache.flink.runtime.checkpoint.CheckpointStatsSnapshot;
import org.apache.flink.runtime.checkpoint.CheckpointStatsTracker;
import org.apache.flink.runtime.checkpoint.CheckpointsCleaner;
import org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
import org.apache.flink.runtime.checkpoint.DefaultCheckpointPlanCalculator;
import org.apache.flink.runtime.checkpoint.ExecutionAttemptMappingProvider;
import org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook;
import org.apache.flink.runtime.checkpoint.OperatorCoordinatorCheckpointContext;
import org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.concurrent.FutureUtils.ConjunctFuture;
import org.apache.flink.runtime.concurrent.ScheduledExecutorServiceAdapter;
import org.apache.flink.runtime.deployment.TaskDeploymentDescriptorFactory;
import org.apache.flink.runtime.entrypoint.ClusterEntryPointExceptionUtils;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.failover.flip1.ResultPartitionAvailabilityChecker;
import org.apache.flink.runtime.executiongraph.failover.flip1.partitionrelease.PartitionReleaseStrategy;
import org.apache.flink.runtime.io.network.partition.JobMasterPartitionTracker;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration;
import org.apache.flink.runtime.query.KvStateLocationRegistry;
import org.apache.flink.runtime.scheduler.InternalFailuresListener;
import org.apache.flink.runtime.scheduler.VertexParallelismInformation;
import org.apache.flink.runtime.scheduler.VertexParallelismStore;
import org.apache.flink.runtime.scheduler.adapter.DefaultExecutionTopology;
import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
import org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex;
import org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition;
import org.apache.flink.runtime.scheduler.strategy.SchedulingTopology;
import org.apache.flink.runtime.shuffle.ShuffleMaster;
import org.apache.flink.runtime.state.CheckpointStorage;
import org.apache.flink.runtime.state.SharedStateRegistry;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.runtime.taskmanager.DispatcherThreadFactory;
import org.apache.flink.types.Either;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.OptionalFailure;
import org.apache.flink.util.SerializedThrowable;
import org.apache.flink.util.SerializedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.stream.Collectors;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/** Default implementation of the {@link ExecutionGraph}. */
public class DefaultExecutionGraph implements ExecutionGraph, InternalExecutionGraphAccessor {
/** The log object used for debugging. */
static final Logger LOG = LoggerFactory.getLogger(ExecutionGraph.class);
// --------------------------------------------------------------------------------------------
/** Job specific information like the job id, job name, job configuration, etc. */
private final JobInformation jobInformation;
/** Serialized job information or a blob key pointing to the offloaded job information. */
private final Either, PermanentBlobKey> jobInformationOrBlobKey;
/** The executor which is used to execute futures. */
private final ScheduledExecutorService futureExecutor;
/** The executor which is used to execute blocking io operations. */
private final Executor ioExecutor;
/** Executor that runs tasks in the job manager's main thread. */
@Nonnull private ComponentMainThreadExecutor jobMasterMainThreadExecutor;
/** {@code true} if all source tasks are stoppable. */
private boolean isStoppable = true;
/** All job vertices that are part of this graph. */
private final Map tasks;
/** All vertices, in the order in which they were created. * */
private final List verticesInCreationOrder;
/** All intermediate results that are part of this graph. */
private final Map intermediateResults;
/** The currently executed tasks, for callbacks. */
private final Map currentExecutions;
/**
* Listeners that receive messages when the entire job switches it status (such as from RUNNING
* to FINISHED).
*/
private final List jobStatusListeners;
/**
* Timestamps (in milliseconds as returned by {@code System.currentTimeMillis()} when the
* execution graph transitioned into a certain state. The index into this array is the ordinal
* of the enum value, i.e. the timestamp when the graph went into state "RUNNING" is at {@code
* stateTimestamps[RUNNING.ordinal()]}.
*/
private final long[] stateTimestamps;
/** The timeout for all messages that require a response/acknowledgement. */
private final Time rpcTimeout;
/** The classloader for the user code. Needed for calls into user code classes. */
private final ClassLoader userClassLoader;
/** Registered KvState instances reported by the TaskManagers. */
private final KvStateLocationRegistry kvStateLocationRegistry;
/** Blob writer used to offload RPC messages. */
private final BlobWriter blobWriter;
/** The total number of vertices currently in the execution graph. */
private int numVerticesTotal;
private final PartitionReleaseStrategy.Factory partitionReleaseStrategyFactory;
private PartitionReleaseStrategy partitionReleaseStrategy;
private DefaultExecutionTopology executionTopology;
@Nullable private InternalFailuresListener internalTaskFailuresListener;
/** Counts all restarts. Used by other Gauges/Meters and does not register to metric group. */
private final Counter numberOfRestartsCounter = new SimpleCounter();
// ------ Configuration of the Execution -------
private final TaskDeploymentDescriptorFactory.PartitionLocationConstraint
partitionLocationConstraint;
/** The maximum number of prior execution attempts kept in history. */
private final int maxPriorAttemptsHistoryLength;
// ------ Execution status and progress. These values are volatile, and accessed under the lock
// -------
private int numFinishedVertices;
/** Current status of the job execution. */
private volatile JobStatus state = JobStatus.CREATED;
/** A future that completes once the job has reached a terminal state. */
private final CompletableFuture terminationFuture = new CompletableFuture<>();
/**
* The exception that caused the job to fail. This is set to the first root exception that was
* not recoverable and triggered job failure.
*/
private Throwable failureCause;
/**
* The extended failure cause information for the job. This exists in addition to
* 'failureCause', to let 'failureCause' be a strong reference to the exception, while this info
* holds no strong reference to any user-defined classes.
*/
private ErrorInfo failureInfo;
private final JobMasterPartitionTracker partitionTracker;
private final ResultPartitionAvailabilityChecker resultPartitionAvailabilityChecker;
/** Future for an ongoing or completed scheduling action. */
@Nullable private CompletableFuture schedulingFuture;
private final VertexAttemptNumberStore initialAttemptCounts;
private final VertexParallelismStore parallelismStore;
// ------ Fields that are relevant to the execution and need to be cleared before archiving
// -------
/** The coordinator for checkpoints, if snapshot checkpoints are enabled. */
@Nullable private CheckpointCoordinator checkpointCoordinator;
/** TODO, replace it with main thread executor. */
@Nullable private ScheduledExecutorService checkpointCoordinatorTimer;
/**
* Checkpoint stats tracker separate from the coordinator in order to be available after
* archiving.
*/
private CheckpointStatsTracker checkpointStatsTracker;
// ------ Fields that are only relevant for archived execution graphs ------------
@Nullable private String stateBackendName;
@Nullable private String checkpointStorageName;
private String jsonPlan;
/** Shuffle master to register partitions for task deployment. */
private final ShuffleMaster> shuffleMaster;
private final ExecutionDeploymentListener executionDeploymentListener;
private final ExecutionStateUpdateListener executionStateUpdateListener;
private final EdgeManager edgeManager;
private final Map executionVerticesById;
private final Map
resultPartitionsById;
// --------------------------------------------------------------------------------------------
// Constructors
// --------------------------------------------------------------------------------------------
public DefaultExecutionGraph(
JobInformation jobInformation,
ScheduledExecutorService futureExecutor,
Executor ioExecutor,
Time rpcTimeout,
int maxPriorAttemptsHistoryLength,
ClassLoader userClassLoader,
BlobWriter blobWriter,
PartitionReleaseStrategy.Factory partitionReleaseStrategyFactory,
ShuffleMaster> shuffleMaster,
JobMasterPartitionTracker partitionTracker,
TaskDeploymentDescriptorFactory.PartitionLocationConstraint partitionLocationConstraint,
ExecutionDeploymentListener executionDeploymentListener,
ExecutionStateUpdateListener executionStateUpdateListener,
long initializationTimestamp,
VertexAttemptNumberStore initialAttemptCounts,
VertexParallelismStore vertexParallelismStore)
throws IOException {
this.jobInformation = checkNotNull(jobInformation);
this.blobWriter = checkNotNull(blobWriter);
this.partitionLocationConstraint = checkNotNull(partitionLocationConstraint);
this.jobInformationOrBlobKey =
BlobWriter.serializeAndTryOffload(
jobInformation, jobInformation.getJobId(), blobWriter);
this.futureExecutor = checkNotNull(futureExecutor);
this.ioExecutor = checkNotNull(ioExecutor);
this.userClassLoader = checkNotNull(userClassLoader, "userClassLoader");
this.tasks = new HashMap<>(16);
this.intermediateResults = new HashMap<>(16);
this.verticesInCreationOrder = new ArrayList<>(16);
this.currentExecutions = new HashMap<>(16);
this.jobStatusListeners = new ArrayList<>();
this.stateTimestamps = new long[JobStatus.values().length];
this.stateTimestamps[JobStatus.INITIALIZING.ordinal()] = initializationTimestamp;
this.stateTimestamps[JobStatus.CREATED.ordinal()] = System.currentTimeMillis();
this.rpcTimeout = checkNotNull(rpcTimeout);
this.partitionReleaseStrategyFactory = checkNotNull(partitionReleaseStrategyFactory);
this.kvStateLocationRegistry =
new KvStateLocationRegistry(jobInformation.getJobId(), getAllVertices());
this.maxPriorAttemptsHistoryLength = maxPriorAttemptsHistoryLength;
this.schedulingFuture = null;
this.jobMasterMainThreadExecutor =
new ComponentMainThreadExecutor.DummyComponentMainThreadExecutor(
"ExecutionGraph is not initialized with proper main thread executor. "
+ "Call to ExecutionGraph.start(...) required.");
this.shuffleMaster = checkNotNull(shuffleMaster);
this.partitionTracker = checkNotNull(partitionTracker);
this.resultPartitionAvailabilityChecker =
new ExecutionGraphResultPartitionAvailabilityChecker(
this::createResultPartitionId, partitionTracker);
this.executionDeploymentListener = executionDeploymentListener;
this.executionStateUpdateListener = executionStateUpdateListener;
this.initialAttemptCounts = initialAttemptCounts;
this.parallelismStore = vertexParallelismStore;
this.edgeManager = new EdgeManager();
this.executionVerticesById = new HashMap<>();
this.resultPartitionsById = new HashMap<>();
}
@Override
public void start(@Nonnull ComponentMainThreadExecutor jobMasterMainThreadExecutor) {
this.jobMasterMainThreadExecutor = jobMasterMainThreadExecutor;
}
// --------------------------------------------------------------------------------------------
// Configuration of Data-flow wide execution settings
// --------------------------------------------------------------------------------------------
@Override
public SchedulingTopology getSchedulingTopology() {
return executionTopology;
}
@Override
public TaskDeploymentDescriptorFactory.PartitionLocationConstraint
getPartitionLocationConstraint() {
return partitionLocationConstraint;
}
@Override
@Nonnull
public ComponentMainThreadExecutor getJobMasterMainThreadExecutor() {
return jobMasterMainThreadExecutor;
}
@Override
public Optional getStateBackendName() {
return Optional.ofNullable(stateBackendName);
}
@Override
public Optional getCheckpointStorageName() {
return Optional.ofNullable(checkpointStorageName);
}
@Override
public void enableCheckpointing(
CheckpointCoordinatorConfiguration chkConfig,
List> masterHooks,
CheckpointIDCounter checkpointIDCounter,
CompletedCheckpointStore checkpointStore,
StateBackend checkpointStateBackend,
CheckpointStorage checkpointStorage,
CheckpointStatsTracker statsTracker,
CheckpointsCleaner checkpointsCleaner) {
checkState(state == JobStatus.CREATED, "Job must be in CREATED state");
checkState(checkpointCoordinator == null, "checkpointing already enabled");
final Collection operatorCoordinators =
buildOpCoordinatorCheckpointContexts();
checkpointStatsTracker = checkNotNull(statsTracker, "CheckpointStatsTracker");
CheckpointFailureManager failureManager =
new CheckpointFailureManager(
chkConfig.getTolerableCheckpointFailureNumber(),
new CheckpointFailureManager.FailJobCallback() {
@Override
public void failJob(Throwable cause) {
getJobMasterMainThreadExecutor().execute(() -> failGlobal(cause));
}
@Override
public void failJobDueToTaskFailure(
Throwable cause, ExecutionAttemptID failingTask) {
getJobMasterMainThreadExecutor()
.execute(
() ->
failGlobalIfExecutionIsStillRunning(
cause, failingTask));
}
});
checkState(checkpointCoordinatorTimer == null);
checkpointCoordinatorTimer =
Executors.newSingleThreadScheduledExecutor(
new DispatcherThreadFactory(
Thread.currentThread().getThreadGroup(), "Checkpoint Timer"));
// create the coordinator that triggers and commits checkpoints and holds the state
checkpointCoordinator =
new CheckpointCoordinator(
jobInformation.getJobId(),
chkConfig,
operatorCoordinators,
checkpointIDCounter,
checkpointStore,
checkpointStorage,
ioExecutor,
checkpointsCleaner,
new ScheduledExecutorServiceAdapter(checkpointCoordinatorTimer),
SharedStateRegistry.DEFAULT_FACTORY,
failureManager,
createCheckpointPlanCalculator(),
new ExecutionAttemptMappingProvider(getAllExecutionVertices()));
// register the master hooks on the checkpoint coordinator
for (MasterTriggerRestoreHook> hook : masterHooks) {
if (!checkpointCoordinator.addMasterHook(hook)) {
LOG.warn(
"Trying to register multiple checkpoint hooks with the name: {}",
hook.getIdentifier());
}
}
checkpointCoordinator.setCheckpointStatsTracker(checkpointStatsTracker);
if (checkpointCoordinator.isPeriodicCheckpointingConfigured()) {
// the periodic checkpoint scheduler is activated and deactivated as a result of
// job status changes (running -> on, all other states -> off)
registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());
}
this.stateBackendName = checkpointStateBackend.getClass().getSimpleName();
this.checkpointStorageName = checkpointStorage.getClass().getSimpleName();
}
private CheckpointPlanCalculator createCheckpointPlanCalculator() {
return new DefaultCheckpointPlanCalculator(
getJobID(),
new ExecutionGraphCheckpointPlanCalculatorContext(this),
getVerticesTopologically());
}
@Override
@Nullable
public CheckpointCoordinator getCheckpointCoordinator() {
return checkpointCoordinator;
}
@Override
public KvStateLocationRegistry getKvStateLocationRegistry() {
return kvStateLocationRegistry;
}
@Override
public CheckpointCoordinatorConfiguration getCheckpointCoordinatorConfiguration() {
if (checkpointStatsTracker != null) {
return checkpointStatsTracker.getJobCheckpointingConfiguration();
} else {
return null;
}
}
@Override
public CheckpointStatsSnapshot getCheckpointStatsSnapshot() {
if (checkpointStatsTracker != null) {
return checkpointStatsTracker.createSnapshot();
} else {
return null;
}
}
private Collection
buildOpCoordinatorCheckpointContexts() {
final ArrayList contexts = new ArrayList<>();
for (final ExecutionJobVertex vertex : verticesInCreationOrder) {
contexts.addAll(vertex.getOperatorCoordinators());
}
contexts.trimToSize();
return contexts;
}
// --------------------------------------------------------------------------------------------
// Properties and Status of the Execution Graph
// --------------------------------------------------------------------------------------------
@Override
public void setJsonPlan(String jsonPlan) {
this.jsonPlan = jsonPlan;
}
@Override
public String getJsonPlan() {
return jsonPlan;
}
@Override
public Either, PermanentBlobKey> getJobInformationOrBlobKey() {
return jobInformationOrBlobKey;
}
@Override
public JobID getJobID() {
return jobInformation.getJobId();
}
@Override
public String getJobName() {
return jobInformation.getJobName();
}
@Override
public boolean isStoppable() {
return this.isStoppable;
}
@Override
public Configuration getJobConfiguration() {
return jobInformation.getJobConfiguration();
}
@Override
public ClassLoader getUserClassLoader() {
return this.userClassLoader;
}
@Override
public JobStatus getState() {
return state;
}
@Override
public Throwable getFailureCause() {
return failureCause;
}
public ErrorInfo getFailureInfo() {
return failureInfo;
}
@Override
public long getNumberOfRestarts() {
return numberOfRestartsCounter.getCount();
}
@Override
public int getNumFinishedVertices() {
return numFinishedVertices;
}
@Override
public ExecutionJobVertex getJobVertex(JobVertexID id) {
return this.tasks.get(id);
}
@Override
public Map getAllVertices() {
return Collections.unmodifiableMap(this.tasks);
}
@Override
public Iterable getVerticesTopologically() {
// we return a specific iterator that does not fail with concurrent modifications
// the list is append only, so it is safe for that
final int numElements = this.verticesInCreationOrder.size();
return new Iterable() {
@Override
public Iterator iterator() {
return new Iterator() {
private int pos = 0;
@Override
public boolean hasNext() {
return pos < numElements;
}
@Override
public ExecutionJobVertex next() {
if (hasNext()) {
return verticesInCreationOrder.get(pos++);
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
@Override
public int getTotalNumberOfVertices() {
return numVerticesTotal;
}
@Override
public Map getAllIntermediateResults() {
return Collections.unmodifiableMap(this.intermediateResults);
}
@Override
public Iterable getAllExecutionVertices() {
return new Iterable() {
@Override
public Iterator iterator() {
return new AllVerticesIterator(getVerticesTopologically().iterator());
}
};
}
@Override
public EdgeManager getEdgeManager() {
return edgeManager;
}
@Override
public ExecutionVertex getExecutionVertexOrThrow(ExecutionVertexID id) {
return checkNotNull(executionVerticesById.get(id));
}
@Override
public IntermediateResultPartition getResultPartitionOrThrow(
final IntermediateResultPartitionID id) {
return checkNotNull(resultPartitionsById.get(id));
}
@Override
public long getStatusTimestamp(JobStatus status) {
return this.stateTimestamps[status.ordinal()];
}
@Override
public final BlobWriter getBlobWriter() {
return blobWriter;
}
@Override
public Executor getFutureExecutor() {
return futureExecutor;
}
@Override
public Map>> aggregateUserAccumulators() {
Map>> userAccumulators = new HashMap<>();
for (ExecutionVertex vertex : getAllExecutionVertices()) {
Map> next =
vertex.getCurrentExecutionAttempt().getUserAccumulators();
if (next != null) {
AccumulatorHelper.mergeInto(userAccumulators, next);
}
}
return userAccumulators;
}
/**
* Gets a serialized accumulator map.
*
* @return The accumulator map with serialized accumulator values.
*/
@Override
public Map>> getAccumulatorsSerialized() {
return aggregateUserAccumulators().entrySet().stream()
.collect(
Collectors.toMap(
Map.Entry::getKey,
entry -> serializeAccumulator(entry.getKey(), entry.getValue())));
}
private static SerializedValue> serializeAccumulator(
String name, OptionalFailure> accumulator) {
try {
if (accumulator.isFailure()) {
return new SerializedValue<>(
OptionalFailure.ofFailure(accumulator.getFailureCause()));
}
return new SerializedValue<>(
OptionalFailure.of(accumulator.getUnchecked().getLocalValue()));
} catch (IOException ioe) {
LOG.error("Could not serialize accumulator " + name + '.', ioe);
try {
return new SerializedValue<>(OptionalFailure.ofFailure(ioe));
} catch (IOException e) {
throw new RuntimeException(
"It should never happen that we cannot serialize the accumulator serialization exception.",
e);
}
}
}
/**
* Returns the a stringified version of the user-defined accumulators.
*
* @return an Array containing the StringifiedAccumulatorResult objects
*/
@Override
public StringifiedAccumulatorResult[] getAccumulatorResultsStringified() {
Map>> accumulatorMap =
aggregateUserAccumulators();
return StringifiedAccumulatorResult.stringifyAccumulatorResults(accumulatorMap);
}
@Override
public void setInternalTaskFailuresListener(
final InternalFailuresListener internalTaskFailuresListener) {
checkNotNull(internalTaskFailuresListener);
checkState(
this.internalTaskFailuresListener == null,
"internalTaskFailuresListener can be only set once");
this.internalTaskFailuresListener = internalTaskFailuresListener;
}
// --------------------------------------------------------------------------------------------
// Actions
// --------------------------------------------------------------------------------------------
@Override
public void attachJobGraph(List topologiallySorted) throws JobException {
assertRunningInJobMasterMainThread();
LOG.debug(
"Attaching {} topologically sorted vertices to existing job graph with {} "
+ "vertices and {} intermediate results.",
topologiallySorted.size(),
tasks.size(),
intermediateResults.size());
final ArrayList newExecJobVertices =
new ArrayList<>(topologiallySorted.size());
final long createTimestamp = System.currentTimeMillis();
for (JobVertex jobVertex : topologiallySorted) {
if (jobVertex.isInputVertex() && !jobVertex.isStoppable()) {
this.isStoppable = false;
}
VertexParallelismInformation parallelismInfo =
parallelismStore.getParallelismInfo(jobVertex.getID());
// create the execution job vertex and attach it to the graph
ExecutionJobVertex ejv =
new ExecutionJobVertex(
this,
jobVertex,
maxPriorAttemptsHistoryLength,
rpcTimeout,
createTimestamp,
parallelismInfo,
initialAttemptCounts.getAttemptCounts(jobVertex.getID()));
ejv.connectToPredecessors(this.intermediateResults);
ExecutionJobVertex previousTask = this.tasks.putIfAbsent(jobVertex.getID(), ejv);
if (previousTask != null) {
throw new JobException(
String.format(
"Encountered two job vertices with ID %s : previous=[%s] / new=[%s]",
jobVertex.getID(), ejv, previousTask));
}
for (IntermediateResult res : ejv.getProducedDataSets()) {
IntermediateResult previousDataSet =
this.intermediateResults.putIfAbsent(res.getId(), res);
if (previousDataSet != null) {
throw new JobException(
String.format(
"Encountered two intermediate data set with ID %s : previous=[%s] / new=[%s]",
res.getId(), res, previousDataSet));
}
}
this.verticesInCreationOrder.add(ejv);
this.numVerticesTotal += ejv.getParallelism();
newExecJobVertices.add(ejv);
}
registerExecutionVerticesAndResultPartitions(this.verticesInCreationOrder);
// the topology assigning should happen before notifying new vertices to failoverStrategy
executionTopology = DefaultExecutionTopology.fromExecutionGraph(this);
partitionReleaseStrategy =
partitionReleaseStrategyFactory.createInstance(getSchedulingTopology());
}
@Override
public void transitionToRunning() {
if (!transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {
throw new IllegalStateException(
"Job may only be scheduled from state " + JobStatus.CREATED);
}
}
@Override
public void cancel() {
assertRunningInJobMasterMainThread();
while (true) {
JobStatus current = state;
if (current == JobStatus.RUNNING
|| current == JobStatus.CREATED
|| current == JobStatus.RESTARTING) {
if (transitionState(current, JobStatus.CANCELLING)) {
incrementRestarts();
final CompletableFuture ongoingSchedulingFuture = schedulingFuture;
// cancel ongoing scheduling action
if (ongoingSchedulingFuture != null) {
ongoingSchedulingFuture.cancel(false);
}
final ConjunctFuture allTerminal = cancelVerticesAsync();
allTerminal.whenComplete(
(Void value, Throwable throwable) -> {
if (throwable != null) {
transitionState(
JobStatus.CANCELLING,
JobStatus.FAILED,
new FlinkException(
"Could not cancel job "
+ getJobName()
+ " because not all execution job vertices could be cancelled.",
throwable));
} else {
// cancellations may currently be overridden by failures which
// trigger
// restarts, so we need to pass a proper restart global version
// here
allVerticesInTerminalState();
}
});
return;
}
}
// Executions are being canceled. Go into cancelling and wait for
// all vertices to be in their final state.
else if (current == JobStatus.FAILING) {
if (transitionState(current, JobStatus.CANCELLING)) {
return;
}
} else {
// no need to treat other states
return;
}
}
}
@VisibleForTesting
protected ConjunctFuture cancelVerticesAsync() {
final ArrayList> futures =
new ArrayList<>(verticesInCreationOrder.size());
// cancel all tasks (that still need cancelling)
for (ExecutionJobVertex ejv : verticesInCreationOrder) {
futures.add(ejv.cancelWithFuture());
}
// we build a future that is complete once all vertices have reached a terminal state
return FutureUtils.waitForAll(futures);
}
@Override
public void suspend(Throwable suspensionCause) {
assertRunningInJobMasterMainThread();
if (state.isTerminalState()) {
// stay in a terminal state
return;
} else if (transitionState(state, JobStatus.SUSPENDED, suspensionCause)) {
initFailureCause(suspensionCause, System.currentTimeMillis());
incrementRestarts();
// cancel ongoing scheduling action
if (schedulingFuture != null) {
schedulingFuture.cancel(false);
}
final ArrayList> executionJobVertexTerminationFutures =
new ArrayList<>(verticesInCreationOrder.size());
for (ExecutionJobVertex ejv : verticesInCreationOrder) {
executionJobVertexTerminationFutures.add(ejv.suspend());
}
final ConjunctFuture jobVerticesTerminationFuture =
FutureUtils.waitForAll(executionJobVertexTerminationFutures);
checkState(jobVerticesTerminationFuture.isDone(), "Suspend needs to happen atomically");
jobVerticesTerminationFuture.whenComplete(
(Void ignored, Throwable throwable) -> {
if (throwable != null) {
LOG.debug("Could not properly suspend the execution graph.", throwable);
}
onTerminalState(state);
LOG.info("Job {} has been suspended.", getJobID());
});
} else {
throw new IllegalStateException(
String.format(
"Could not suspend because transition from %s to %s failed.",
state, JobStatus.SUSPENDED));
}
}
void failGlobalIfExecutionIsStillRunning(Throwable cause, ExecutionAttemptID failingAttempt) {
final Execution failedExecution = currentExecutions.get(failingAttempt);
if (failedExecution != null
&& (failedExecution.getState() == ExecutionState.RUNNING
|| failedExecution.getState() == ExecutionState.INITIALIZING)) {
failGlobal(cause);
} else {
LOG.debug(
"The failing attempt {} belongs to an already not"
+ " running task thus won't fail the job",
failingAttempt);
}
}
@Override
public void failGlobal(Throwable t) {
checkState(internalTaskFailuresListener != null);
internalTaskFailuresListener.notifyGlobalFailure(t);
}
/**
* Returns the serializable {@link ArchivedExecutionConfig}.
*
* @return ArchivedExecutionConfig which may be null in case of errors
*/
@Override
public ArchivedExecutionConfig getArchivedExecutionConfig() {
// create a summary of all relevant data accessed in the web interface's JobConfigHandler
try {
ExecutionConfig executionConfig =
jobInformation.getSerializedExecutionConfig().deserializeValue(userClassLoader);
if (executionConfig != null) {
return executionConfig.archive();
}
} catch (IOException | ClassNotFoundException e) {
LOG.error("Couldn't create ArchivedExecutionConfig for job {} ", getJobID(), e);
}
return null;
}
@Override
public CompletableFuture getTerminationFuture() {
return terminationFuture;
}
@Override
@VisibleForTesting
public JobStatus waitUntilTerminal() throws InterruptedException {
try {
return terminationFuture.get();
} catch (ExecutionException e) {
// this should never happen
// it would be a bug, so we don't expect this to be handled and throw
// an unchecked exception here
throw new RuntimeException(e);
}
}
// ------------------------------------------------------------------------
// State Transitions
// ------------------------------------------------------------------------
@Override
public boolean transitionState(JobStatus current, JobStatus newState) {
return transitionState(current, newState, null);
}
private void transitionState(JobStatus newState, Throwable error) {
transitionState(state, newState, error);
}
private boolean transitionState(JobStatus current, JobStatus newState, Throwable error) {
assertRunningInJobMasterMainThread();
// consistency check
if (current.isTerminalState()) {
String message = "Job is trying to leave terminal state " + current;
LOG.error(message);
throw new IllegalStateException(message);
}
// now do the actual state transition
if (state == current) {
state = newState;
LOG.info(
"Job {} ({}) switched from state {} to {}.",
getJobName(),
getJobID(),
current,
newState,
error);
stateTimestamps[newState.ordinal()] = System.currentTimeMillis();
notifyJobStatusChange(newState, error);
return true;
} else {
return false;
}
}
@Override
public void incrementRestarts() {
numberOfRestartsCounter.inc();
}
@Override
public void initFailureCause(Throwable t, long timestamp) {
this.failureCause = t;
this.failureInfo = new ErrorInfo(t, timestamp);
}
// ------------------------------------------------------------------------
// Job Status Progress
// ------------------------------------------------------------------------
/**
* Called whenever a vertex reaches state FINISHED (completed successfully). Once all vertices
* are in the FINISHED state, the program is successfully done.
*/
@Override
public void vertexFinished() {
assertRunningInJobMasterMainThread();
final int numFinished = ++numFinishedVertices;
if (numFinished == numVerticesTotal) {
// done :-)
// check whether we are still in "RUNNING" and trigger the final cleanup
if (state == JobStatus.RUNNING) {
// we do the final cleanup in the I/O executor, because it may involve
// some heavier work
try {
for (ExecutionJobVertex ejv : verticesInCreationOrder) {
ejv.getJobVertex().finalizeOnMaster(getUserClassLoader());
}
} catch (Throwable t) {
ExceptionUtils.rethrowIfFatalError(t);
ClusterEntryPointExceptionUtils.tryEnrichClusterEntryPointError(t);
failGlobal(new Exception("Failed to finalize execution on master", t));
return;
}
// if we do not make this state transition, then a concurrent
// cancellation or failure happened
if (transitionState(JobStatus.RUNNING, JobStatus.FINISHED)) {
onTerminalState(JobStatus.FINISHED);
}
}
}
}
@Override
public void vertexUnFinished() {
assertRunningInJobMasterMainThread();
numFinishedVertices--;
}
/**
* This method is a callback during cancellation/failover and called when all tasks have reached
* a terminal state (cancelled/failed/finished).
*/
private void allVerticesInTerminalState() {
assertRunningInJobMasterMainThread();
// we are done, transition to the final state
JobStatus current;
while (true) {
current = this.state;
if (current == JobStatus.RUNNING) {
failGlobal(
new Exception(
"ExecutionGraph went into allVerticesInTerminalState() from RUNNING"));
} else if (current == JobStatus.CANCELLING) {
if (transitionState(current, JobStatus.CANCELED)) {
onTerminalState(JobStatus.CANCELED);
break;
}
} else if (current == JobStatus.FAILING) {
break;
} else if (current.isGloballyTerminalState()) {
LOG.warn(
"Job has entered globally terminal state without waiting for all "
+ "job vertices to reach final state.");
break;
} else {
failGlobal(
new Exception(
"ExecutionGraph went into final state from state " + current));
break;
}
}
// done transitioning the state
}
@Override
public void failJob(Throwable cause, long timestamp) {
if (state == JobStatus.FAILING || state.isTerminalState()) {
return;
}
transitionState(JobStatus.FAILING, cause);
initFailureCause(cause, timestamp);
FutureUtils.assertNoException(
cancelVerticesAsync()
.whenComplete(
(aVoid, throwable) -> {
if (transitionState(
JobStatus.FAILING, JobStatus.FAILED, cause)) {
onTerminalState(JobStatus.FAILED);
} else if (state == JobStatus.CANCELLING) {
transitionState(JobStatus.CANCELLING, JobStatus.CANCELED);
onTerminalState(JobStatus.CANCELED);
} else if (!state.isTerminalState()) {
throw new IllegalStateException(
"Cannot complete job failing from an unexpected state: "
+ state);
}
}));
}
private void onTerminalState(JobStatus status) {
LOG.debug("ExecutionGraph {} reached terminal state {}.", getJobID(), status);
try {
CheckpointCoordinator coord = this.checkpointCoordinator;
this.checkpointCoordinator = null;
if (coord != null) {
coord.shutdown();
}
if (checkpointCoordinatorTimer != null) {
checkpointCoordinatorTimer.shutdownNow();
checkpointCoordinatorTimer = null;
}
} catch (Exception e) {
LOG.error("Error while cleaning up after execution", e);
} finally {
terminationFuture.complete(status);
}
}
// --------------------------------------------------------------------------------------------
// Callbacks and Callback Utilities
// --------------------------------------------------------------------------------------------
@Override
public boolean updateState(TaskExecutionStateTransition state) {
assertRunningInJobMasterMainThread();
final Execution attempt = currentExecutions.get(state.getID());
if (attempt != null) {
try {
final boolean stateUpdated = updateStateInternal(state, attempt);
maybeReleasePartitions(attempt);
return stateUpdated;
} catch (Throwable t) {
ExceptionUtils.rethrowIfFatalErrorOrOOM(t);
// failures during updates leave the ExecutionGraph inconsistent
failGlobal(t);
return false;
}
} else {
return false;
}
}
private boolean updateStateInternal(
final TaskExecutionStateTransition state, final Execution attempt) {
Map> accumulators;
switch (state.getExecutionState()) {
case INITIALIZING:
return attempt.switchToRecovering();
case RUNNING:
return attempt.switchToRunning();
case FINISHED:
// this deserialization is exception-free
accumulators = deserializeAccumulators(state);
attempt.markFinished(accumulators, state.getIOMetrics());
return true;
case CANCELED:
// this deserialization is exception-free
accumulators = deserializeAccumulators(state);
attempt.completeCancelling(accumulators, state.getIOMetrics(), false);
return true;
case FAILED:
// this deserialization is exception-free
accumulators = deserializeAccumulators(state);
attempt.markFailed(
state.getError(userClassLoader),
state.getCancelTask(),
accumulators,
state.getIOMetrics(),
state.getReleasePartitions(),
true);
return true;
default:
// we mark as failed and return false, which triggers the TaskManager
// to remove the task
attempt.fail(
new Exception(
"TaskManager sent illegal state update: "
+ state.getExecutionState()));
return false;
}
}
private void maybeReleasePartitions(final Execution attempt) {
final ExecutionVertexID finishedExecutionVertex = attempt.getVertex().getID();
if (attempt.getState() == ExecutionState.FINISHED) {
final List releasablePartitions =
partitionReleaseStrategy.vertexFinished(finishedExecutionVertex);
releasePartitions(releasablePartitions);
} else {
partitionReleaseStrategy.vertexUnfinished(finishedExecutionVertex);
}
}
private void releasePartitions(final List releasablePartitions) {
if (releasablePartitions.size() > 0) {
final List partitionIds =
releasablePartitions.stream()
.map(this::createResultPartitionId)
.collect(Collectors.toList());
partitionTracker.stopTrackingAndReleasePartitions(partitionIds);
}
}
ResultPartitionID createResultPartitionId(
final IntermediateResultPartitionID resultPartitionId) {
final SchedulingResultPartition schedulingResultPartition =
getSchedulingTopology().getResultPartition(resultPartitionId);
final SchedulingExecutionVertex producer = schedulingResultPartition.getProducer();
final ExecutionVertexID producerId = producer.getId();
final JobVertexID jobVertexId = producerId.getJobVertexId();
final ExecutionJobVertex jobVertex = getJobVertex(jobVertexId);
checkNotNull(jobVertex, "Unknown job vertex %s", jobVertexId);
final ExecutionVertex[] taskVertices = jobVertex.getTaskVertices();
final int subtaskIndex = producerId.getSubtaskIndex();
checkState(
subtaskIndex < taskVertices.length,
"Invalid subtask index %d for job vertex %s",
subtaskIndex,
jobVertexId);
final ExecutionVertex taskVertex = taskVertices[subtaskIndex];
final Execution execution = taskVertex.getCurrentExecutionAttempt();
return new ResultPartitionID(resultPartitionId, execution.getAttemptId());
}
/**
* Deserializes accumulators from a task state update.
*
* This method never throws an exception!
*
* @param state The task execution state from which to deserialize the accumulators.
* @return The deserialized accumulators, of null, if there are no accumulators or an error
* occurred.
*/
private Map> deserializeAccumulators(
TaskExecutionStateTransition state) {
AccumulatorSnapshot serializedAccumulators = state.getAccumulators();
if (serializedAccumulators != null) {
try {
return serializedAccumulators.deserializeUserAccumulators(userClassLoader);
} catch (Throwable t) {
// we catch Throwable here to include all form of linking errors that may
// occur if user classes are missing in the classpath
LOG.error("Failed to deserialize final accumulator results.", t);
}
}
return null;
}
@Override
public void notifyPartitionDataAvailable(ResultPartitionID partitionId) {
assertRunningInJobMasterMainThread();
final Execution execution = currentExecutions.get(partitionId.getProducerId());
checkState(
execution != null,
"Cannot find execution for execution Id " + partitionId.getPartitionId() + ".");
execution.getVertex().notifyPartitionDataAvailable(partitionId);
}
@Override
public Map getRegisteredExecutions() {
return Collections.unmodifiableMap(currentExecutions);
}
@Override
public void registerExecution(Execution exec) {
assertRunningInJobMasterMainThread();
Execution previous = currentExecutions.putIfAbsent(exec.getAttemptId(), exec);
if (previous != null) {
failGlobal(
new Exception(
"Trying to register execution "
+ exec
+ " for already used ID "
+ exec.getAttemptId()));
}
}
@Override
public void deregisterExecution(Execution exec) {
assertRunningInJobMasterMainThread();
Execution contained = currentExecutions.remove(exec.getAttemptId());
if (contained != null && contained != exec) {
failGlobal(
new Exception(
"De-registering execution "
+ exec
+ " failed. Found for same ID execution "
+ contained));
}
}
private void registerExecutionVerticesAndResultPartitions(
List executionJobVertices) {
for (ExecutionJobVertex executionJobVertex : executionJobVertices) {
for (ExecutionVertex executionVertex : executionJobVertex.getTaskVertices()) {
executionVerticesById.put(executionVertex.getID(), executionVertex);
resultPartitionsById.putAll(executionVertex.getProducedPartitions());
}
}
}
@Override
public void updateAccumulators(AccumulatorSnapshot accumulatorSnapshot) {
Map> userAccumulators;
try {
userAccumulators = accumulatorSnapshot.deserializeUserAccumulators(userClassLoader);
ExecutionAttemptID execID = accumulatorSnapshot.getExecutionAttemptID();
Execution execution = currentExecutions.get(execID);
if (execution != null) {
execution.setAccumulators(userAccumulators);
} else {
LOG.debug("Received accumulator result for unknown execution {}.", execID);
}
} catch (Exception e) {
LOG.error("Cannot update accumulators for job {}.", getJobID(), e);
}
}
// --------------------------------------------------------------------------------------------
// Listeners & Observers
// --------------------------------------------------------------------------------------------
@Override
public void registerJobStatusListener(JobStatusListener listener) {
if (listener != null) {
jobStatusListeners.add(listener);
}
}
private void notifyJobStatusChange(JobStatus newState, Throwable error) {
if (jobStatusListeners.size() > 0) {
final long timestamp = System.currentTimeMillis();
final Throwable serializedError = error == null ? null : new SerializedThrowable(error);
for (JobStatusListener listener : jobStatusListeners) {
try {
listener.jobStatusChanges(getJobID(), newState, timestamp, serializedError);
} catch (Throwable t) {
LOG.warn("Error while notifying JobStatusListener", t);
}
}
}
}
@Override
public void notifyExecutionChange(
final Execution execution, final ExecutionState newExecutionState) {
executionStateUpdateListener.onStateUpdate(execution.getAttemptId(), newExecutionState);
}
private void assertRunningInJobMasterMainThread() {
if (!(jobMasterMainThreadExecutor
instanceof ComponentMainThreadExecutor.DummyComponentMainThreadExecutor)) {
jobMasterMainThreadExecutor.assertRunningInMainThread();
}
}
@Override
public void notifySchedulerNgAboutInternalTaskFailure(
final ExecutionAttemptID attemptId,
final Throwable t,
final boolean cancelTask,
final boolean releasePartitions) {
checkState(internalTaskFailuresListener != null);
internalTaskFailuresListener.notifyTaskFailure(attemptId, t, cancelTask, releasePartitions);
}
@Override
public ShuffleMaster> getShuffleMaster() {
return shuffleMaster;
}
@Override
public JobMasterPartitionTracker getPartitionTracker() {
return partitionTracker;
}
@Override
public ResultPartitionAvailabilityChecker getResultPartitionAvailabilityChecker() {
return resultPartitionAvailabilityChecker;
}
@Override
public PartitionReleaseStrategy getPartitionReleaseStrategy() {
return partitionReleaseStrategy;
}
@Override
public ExecutionDeploymentListener getExecutionDeploymentListener() {
return executionDeploymentListener;
}
}