org.apache.flink.runtime.executiongraph.Execution Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of flink-runtime_2.10 Show documentation
There is a newer version: 1.3.3
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.executiongraph;

import akka.dispatch.OnComplete;
import akka.dispatch.OnFailure;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult;
import org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor;
import org.apache.flink.runtime.deployment.PartialInputChannelDeploymentDescriptor;
import org.apache.flink.runtime.deployment.ResultPartitionLocation;
import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.instance.Instance;
import org.apache.flink.runtime.instance.InstanceConnectionInfo;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.instance.SimpleSlot;
import org.apache.flink.runtime.io.network.ConnectionID;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint;
import org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException;
import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit;
import org.apache.flink.runtime.jobmanager.scheduler.Scheduler;
import org.apache.flink.runtime.jobmanager.scheduler.SlotAllocationFuture;
import org.apache.flink.runtime.jobmanager.scheduler.SlotAllocationFutureAction;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.messages.Messages;
import org.apache.flink.runtime.messages.TaskMessages.TaskOperationResult;
import org.apache.flink.runtime.state.StateHandle;
import org.apache.flink.runtime.util.SerializableObject;
import org.apache.flink.util.SerializedValue;
import org.apache.flink.util.ExceptionUtils;

import org.slf4j.Logger;

import scala.concurrent.ExecutionContext;
import scala.concurrent.Future;
import scala.concurrent.duration.FiniteDuration;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

import static akka.dispatch.Futures.future;
import static org.apache.flink.runtime.execution.ExecutionState.CANCELED;
import static org.apache.flink.runtime.execution.ExecutionState.CANCELING;
import static org.apache.flink.runtime.execution.ExecutionState.CREATED;
import static org.apache.flink.runtime.execution.ExecutionState.DEPLOYING;
import static org.apache.flink.runtime.execution.ExecutionState.FAILED;
import static org.apache.flink.runtime.execution.ExecutionState.FINISHED;
import static org.apache.flink.runtime.execution.ExecutionState.RUNNING;
import static org.apache.flink.runtime.execution.ExecutionState.SCHEDULED;
import static org.apache.flink.runtime.messages.TaskMessages.CancelTask;
import static org.apache.flink.runtime.messages.TaskMessages.FailIntermediateResultPartitions;
import static org.apache.flink.runtime.messages.TaskMessages.StopTask;
import static org.apache.flink.runtime.messages.TaskMessages.SubmitTask;
import static org.apache.flink.runtime.messages.TaskMessages.UpdatePartitionInfo;
import static org.apache.flink.runtime.messages.TaskMessages.UpdateTaskSinglePartitionInfo;
import static org.apache.flink.runtime.messages.TaskMessages.createUpdateTaskMultiplePartitionInfos;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * A single execution of a vertex. While an {@link ExecutionVertex} can be executed multiple times (for recovery,
 * or other re-computation), this class tracks the state of a single execution of that vertex and the resources.
 * 
 * NOTE ABOUT THE DESIGN RATIONAL:
 * 
 * In several points of the code, we need to deal with possible concurrent state changes and actions.
 * For example, while the call to deploy a task (send it to the TaskManager) happens, the task gets cancelled.
 * 
 * We could lock the entire portion of the code (decision to deploy, deploy, set state to running) such that
 * it is guaranteed that any "cancel command" will only pick up after deployment is done and that the "cancel
 * command" call will never overtake the deploying call.
 * 
 * This blocks the threads big time, because the remote calls may take long. Depending of their locking behavior, it
 * may even result in distributed deadlocks (unless carefully avoided). We therefore use atomic state updates and
 * occasional double-checking to ensure that the state after a completed call is as expected, and trigger correcting
 * actions if it is not. Many actions are also idempotent (like canceling).
 */
public class Execution implements Serializable {

	private static final long serialVersionUID = 42L;

	private static final AtomicReferenceFieldUpdater STATE_UPDATER =
			AtomicReferenceFieldUpdater.newUpdater(Execution.class, ExecutionState.class, "state");

	private static final Logger LOG = ExecutionGraph.LOG;

	private static final int NUM_CANCEL_CALL_TRIES = 3;

	private static final int NUM_STOP_CALL_TRIES = 3;

	// --------------------------------------------------------------------------------------------

	private final ExecutionVertex vertex;

	private final ExecutionAttemptID attemptId;

	private final long[] stateTimestamps;

	private final int attemptNumber;

	private final FiniteDuration timeout;

	private ConcurrentLinkedQueue partialInputChannelDeploymentDescriptors;

	private volatile ExecutionState state = CREATED;

	private volatile SimpleSlot assignedResource;     // once assigned, never changes until the execution is archived

	private volatile Throwable failureCause;          // once assigned, never changes

	private volatile InstanceConnectionInfo assignedResourceLocation; // for the archived execution

	private SerializedValue> operatorState;

	private Map>> operatorKvState;

	/** The execution context which is used to execute futures. */
	@SuppressWarnings("NonSerializableFieldInSerializableClass")
	private ExecutionContext executionContext;

	/* Lock for updating the accumulators atomically. */
	private final SerializableObject accumulatorLock = new SerializableObject();

	/* Continuously updated map of user-defined accumulators */
	private volatile Map> userAccumulators;

	/* Continuously updated map of internal accumulators */
	private volatile Map> flinkAccumulators;

	// --------------------------------------------------------------------------------------------
	
	public Execution(
			ExecutionContext executionContext,
			ExecutionVertex vertex,
			int attemptNumber,
			long startTimestamp,
			FiniteDuration timeout) {
		this.executionContext = checkNotNull(executionContext);

		this.vertex = checkNotNull(vertex);
		this.attemptId = new ExecutionAttemptID();

		this.attemptNumber = attemptNumber;

		this.stateTimestamps = new long[ExecutionState.values().length];
		markTimestamp(ExecutionState.CREATED, startTimestamp);

		this.timeout = timeout;

		this.partialInputChannelDeploymentDescriptors = new ConcurrentLinkedQueue();
	}

	// --------------------------------------------------------------------------------------------
	//   Properties
	// --------------------------------------------------------------------------------------------

	public ExecutionVertex getVertex() {
		return vertex;
	}

	public ExecutionAttemptID getAttemptId() {
		return attemptId;
	}

	public int getAttemptNumber() {
		return attemptNumber;
	}

	public ExecutionState getState() {
		return state;
	}

	public SimpleSlot getAssignedResource() {
		return assignedResource;
	}

	public InstanceConnectionInfo getAssignedResourceLocation() {
		return assignedResourceLocation;
	}

	public Throwable getFailureCause() {
		return failureCause;
	}

	public long[] getStateTimestamps() {
		return stateTimestamps;
	}

	public long getStateTimestamp(ExecutionState state) {
		return this.stateTimestamps[state.ordinal()];
	}

	public boolean isFinished() {
		return state == FINISHED || state == FAILED || state == CANCELED;
	}

	/**
	 * This method cleans fields that are irrelevant for the archived execution attempt.
	 */
	public void prepareForArchiving() {
		if (assignedResource != null && assignedResource.isAlive()) {
			throw new IllegalStateException("Cannot archive Execution while the assigned resource is still running.");
		}
		assignedResource = null;

		executionContext = null;

		partialInputChannelDeploymentDescriptors.clear();
		partialInputChannelDeploymentDescriptors = null;
	}

	public void setInitialState(
		SerializedValue> initialState,
		Map>> initialKvState) {

		if (state != ExecutionState.CREATED) {
			throw new IllegalArgumentException("Can only assign operator state when execution attempt is in CREATED");
		}
		this.operatorState = initialState;
		this.operatorKvState = initialKvState;
	}

	// --------------------------------------------------------------------------------------------
	//  Actions
	// --------------------------------------------------------------------------------------------

	/**
	 * NOTE: This method only throws exceptions if it is in an illegal state to be scheduled, or if the tasks needs
	 *       to be scheduled immediately and no resource is available. If the task is accepted by the schedule, any
	 *       error sets the vertex state to failed and triggers the recovery logic.
	 * 
	 * @param scheduler The scheduler to use to schedule this execution attempt.
	 * @param queued Flag to indicate whether the scheduler may queue this task if it cannot
	 *               immediately deploy it.
	 * 
	 * @throws IllegalStateException Thrown, if the vertex is not in CREATED state, which is the only state that permits scheduling.
	 * @throws NoResourceAvailableException Thrown is no queued scheduling is allowed and no resources are currently available.
	 */
	public boolean scheduleForExecution(Scheduler scheduler, boolean queued) throws NoResourceAvailableException {
		if (scheduler == null) {
			throw new IllegalArgumentException("Cannot send null Scheduler when scheduling execution.");
		}

		final SlotSharingGroup sharingGroup = vertex.getJobVertex().getSlotSharingGroup();
		final CoLocationConstraint locationConstraint = vertex.getLocationConstraint();

		// sanity check
		if (locationConstraint != null && sharingGroup == null) {
			throw new RuntimeException("Trying to schedule with co-location constraint but without slot sharing allowed.");
		}

		if (transitionState(CREATED, SCHEDULED)) {

			ScheduledUnit toSchedule = locationConstraint == null ?
				new ScheduledUnit(this, sharingGroup) :
				new ScheduledUnit(this, sharingGroup, locationConstraint);

			// IMPORTANT: To prevent leaks of cluster resources, we need to make sure that slots are returned
			//     in all cases where the deployment failed. we use many try {} finally {} clauses to assure that
			if (queued) {
				SlotAllocationFuture future = scheduler.scheduleQueued(toSchedule);

				future.setFutureAction(new SlotAllocationFutureAction() {
					@Override
					public void slotAllocated(SimpleSlot slot) {
						try {
							deployToSlot(slot);
						}
						catch (Throwable t) {
							try {
								slot.releaseSlot();
							} finally {
								markFailed(t);
							}
						}
					}
				});
			}
			else {
				SimpleSlot slot = scheduler.scheduleImmediately(toSchedule);
				try {
					deployToSlot(slot);
				}
				catch (Throwable t) {
					try {
						slot.releaseSlot();
					} finally {
						markFailed(t);
					}
				}
			}

			return true;
		}
		else {
			// call race, already deployed, or already done
			return false;
		}
	}

	public void deployToSlot(final SimpleSlot slot) throws JobException {
		// sanity checks
		if (slot == null) {
			throw new NullPointerException();
		}
		if (!slot.isAlive()) {
			throw new JobException("Target slot for deployment is not alive.");
		}

		// make sure exactly one deployment call happens from the correct state
		// note: the transition from CREATED to DEPLOYING is for testing purposes only
		ExecutionState previous = this.state;
		if (previous == SCHEDULED || previous == CREATED) {
			if (!transitionState(previous, DEPLOYING)) {
				// race condition, someone else beat us to the deploying call.
				// this should actually not happen and indicates a race somewhere else
				throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
			}
		}
		else {
			// vertex may have been cancelled, or it was already scheduled
			throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
		}

		try {
			// good, we are allowed to deploy
			if (!slot.setExecutedVertex(this)) {
				throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
			}
			this.assignedResource = slot;
			this.assignedResourceLocation = slot.getInstance().getInstanceConnectionInfo();

			// race double check, did we fail/cancel and do we need to release the slot?
			if (this.state != DEPLOYING) {
				slot.releaseSlot();
				return;
			}

			if (LOG.isInfoEnabled()) {
				LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(),
						attemptNumber, slot.getInstance().getInstanceConnectionInfo().getHostname()));
			}

			final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(
				attemptId,
				slot,
				operatorState,
				operatorKvState,
				attemptNumber);

			// register this execution at the execution graph, to receive call backs
			vertex.getExecutionGraph().registerExecution(this);

			final Instance instance = slot.getInstance();
			final ActorGateway gateway = instance.getActorGateway();

			final Future