All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.executiongraph.ExecutionVertex Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.executiongraph;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.Archiveable;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.operators.ResourceSpec;
import org.apache.flink.api.common.resources.CommonExtendedResource;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.blob.PermanentBlobKey;
import org.apache.flink.runtime.checkpoint.JobManagerTaskRestore;
import org.apache.flink.runtime.clusterframework.types.AllocationID;
import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
import org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor;
import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor;
import org.apache.flink.runtime.deployment.PartialInputChannelDeploymentDescriptor;
import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor;
import org.apache.flink.runtime.deployment.ResultPartitionLocationTrackerProxy;
import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.instance.SimpleSlot;
import org.apache.flink.runtime.io.network.partition.BlockingShuffleType;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.io.network.partition.ResultPartitionType;
import org.apache.flink.runtime.jobgraph.ExecutionVertexID;
import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.LocationPreferenceConstraint;
import org.apache.flink.runtime.jobmaster.LogicalSlot;
import org.apache.flink.runtime.jobmaster.TaskNetworkMemoryUtil;
import org.apache.flink.runtime.jobmaster.failover.ResultDescriptor;
import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider;
import org.apache.flink.runtime.schedule.ExecutionVertexStatus;
import org.apache.flink.runtime.state.KeyGroupRangeAssignment;
import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
import org.apache.flink.runtime.util.EvictingBoundedList;
import org.apache.flink.types.Either;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.SerializedValue;

import org.slf4j.Logger;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;

import static org.apache.flink.runtime.execution.ExecutionState.FINISHED;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * The ExecutionVertex is a parallel subtask of the execution. It may be executed once, or several times, each of
 * which time it spawns an {@link Execution}.
 */
public class ExecutionVertex implements AccessExecutionVertex, Archiveable {

	private static final Logger LOG = ExecutionGraph.LOG;

	private static final int MAX_DISTINCT_LOCATIONS_TO_CONSIDER = 8;

	// --------------------------------------------------------------------------------------------

	private final ExecutionJobVertex jobVertex;

	private Map resultPartitions;

	private final ExecutionEdge[][] inputEdges;

	private final int subTaskIndex;

	private final ExecutionVertexID executionVertexID;

	private final EvictingBoundedList priorExecutions;

	private final Time timeout;

	/** The name in the format "myTask (2/7)", cached to avoid frequent string concatenations. */
	private final String taskNameWithSubtask;

	private volatile CoLocationConstraint locationConstraint;

	/** The current or latest execution attempt of this vertex's task. */
	private volatile Execution currentExecution;	// this field must never be null

	/** The create timestamp of execution. */
	private long createTimestamp;

	/** The location of the last execution. */
	private TaskManagerLocation latestPriorLocation = null;

	private final Map> assignedInputSplitsMap = new HashMap<>();

	private final Map inputSplitIndexMap = new HashMap<>();

	// --------------------------------------------------------------------------------------------

	/**
	 * Convenience constructor for tests. Sets various fields to default values.
	 */
	@VisibleForTesting
	ExecutionVertex(
			ExecutionJobVertex jobVertex,
			int subTaskIndex,
			IntermediateResult[] producedDataSets,
			Time timeout) {

		this(
				jobVertex,
				subTaskIndex,
				producedDataSets,
				timeout,
				1L,
				System.currentTimeMillis(),
				JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE.defaultValue());
	}

	/**
	 *
	 * @param timeout
	 *            The RPC timeout to use for deploy / cancel calls
	 * @param initialGlobalModVersion
	 *            The global modification version to initialize the first Execution with.
	 * @param createTimestamp
	 *            The timestamp for the vertex creation, used to initialize the first Execution with.
	 * @param maxPriorExecutionHistoryLength
	 *            The number of prior Executions (= execution attempts) to keep.
	 */
	public ExecutionVertex(
			ExecutionJobVertex jobVertex,
			int subTaskIndex,
			IntermediateResult[] producedDataSets,
			Time timeout,
			long initialGlobalModVersion,
			long createTimestamp,
			int maxPriorExecutionHistoryLength) {

		this.jobVertex = jobVertex;
		this.subTaskIndex = subTaskIndex;
		this.executionVertexID = new ExecutionVertexID(jobVertex.getJobVertexId(), subTaskIndex);
		this.taskNameWithSubtask = String.format("%s (%d/%d)",
				jobVertex.getJobVertex().getName(), subTaskIndex + 1, jobVertex.getParallelism());

		this.resultPartitions = new LinkedHashMap<>(producedDataSets.length, 1);

		for (IntermediateResult result : producedDataSets) {
			IntermediateResultPartition irp = new IntermediateResultPartition(result, this, subTaskIndex);
			result.setPartition(subTaskIndex, irp);

			resultPartitions.put(irp.getPartitionId(), irp);
		}

		this.inputEdges = new ExecutionEdge[jobVertex.getJobVertex().getInputs().size()][];

		this.priorExecutions = new EvictingBoundedList<>(maxPriorExecutionHistoryLength);

		this.createTimestamp = createTimestamp;

		this.currentExecution = new Execution(
			getExecutionGraph().getFutureExecutor(),
			this,
			0,
			initialGlobalModVersion,
			createTimestamp,
			timeout);

		// create a co-location scheduling hint, if necessary
		final CoLocationGroup clg = jobVertex.getCoLocationGroup();
		if (clg != null) {
			synchronized (clg) {
				this.locationConstraint = clg.getLocationConstraint(subTaskIndex);
			}
		}
		else {
			this.locationConstraint = null;
		}

		getExecutionGraph().registerExecution(currentExecution);

		this.timeout = timeout;
	}


	// --------------------------------------------------------------------------------------------
	//  Properties
	// --------------------------------------------------------------------------------------------

	public JobID getJobId() {
		return this.jobVertex.getJobId();
	}

	public ExecutionJobVertex getJobVertex() {
		return jobVertex;
	}

	public JobVertexID getJobvertexId() {
		return this.jobVertex.getJobVertexId();
	}

	public ExecutionVertexID getExecutionVertexID() {
		return executionVertexID;
	}

	public String getTaskName() {
		return this.jobVertex.getJobVertex().getName();
	}

	/**
	 * Creates a simple name representation in the style 'taskname (x/y)', where
	 * 'taskname' is the name as returned by {@link #getTaskName()}, 'x' is the parallel
	 * subtask index as returned by {@link #getParallelSubtaskIndex()}{@code + 1}, and 'y' is the total
	 * number of tasks, as returned by {@link #getTotalNumberOfParallelSubtasks()}.
	 *
	 * @return A simple name representation in the form 'myTask (2/7)'
	 */
	@Override
	public String getTaskNameWithSubtaskIndex() {
		return this.taskNameWithSubtask;
	}

	public int getTotalNumberOfParallelSubtasks() {
		return this.jobVertex.getParallelism();
	}

	public int getMaxParallelism() {
		return this.jobVertex.getMaxParallelism();
	}

	@Override
	public int getParallelSubtaskIndex() {
		return this.subTaskIndex;
	}

	public int getNumberOfInputs() {
		return this.inputEdges.length;
	}

	public ExecutionEdge[] getInputEdges(int input) {
		if (input < 0 || input >= this.inputEdges.length) {
			throw new IllegalArgumentException(String.format("Input %d is out of range [0..%d)", input, this.inputEdges.length));
		}
		return inputEdges[input];
	}

	public CoLocationConstraint getLocationConstraint() {
		return locationConstraint;
	}

	@Override
	public Execution getCurrentExecutionAttempt() {
		return currentExecution;
	}

	@Override
	public ExecutionState getExecutionState() {
		return currentExecution.getState();
	}

	@Override
	public long getStateTimestamp(ExecutionState state) {
		return currentExecution.getStateTimestamp(state);
	}

	@Override
	public String getFailureCauseAsString() {
		return ExceptionUtils.stringifyException(getFailureCause());
	}

	public Throwable getFailureCause() {
		return currentExecution.getFailureCause();
	}

	public CompletableFuture getCurrentTaskManagerLocationFuture() {
		return currentExecution.getTaskManagerLocationFuture();
	}

	public LogicalSlot getCurrentAssignedResource() {
		return currentExecution.getAssignedResource();
	}

	@Override
	public TaskManagerLocation getCurrentAssignedResourceLocation() {
		return currentExecution.getAssignedResourceLocation();
	}

	@Override
	public Execution getPriorExecutionAttempt(int attemptNumber) {
		synchronized (priorExecutions) {
			if (attemptNumber >= 0 && attemptNumber < priorExecutions.size()) {
				return priorExecutions.get(attemptNumber);
			} else {
				throw new IllegalArgumentException("attempt does not exist");
			}
		}
	}

	public Execution getLatestPriorExecution() {
		synchronized (priorExecutions) {
			final int size = priorExecutions.size();
			if (size > 0) {
				return priorExecutions.get(size - 1);
			}
			else {
				return null;
			}
		}
	}

	/**
	 * Gets the location where the latest completed/canceled/failed execution of the vertex's
	 * task happened.
	 *
	 * @return The latest prior execution location, or null, if there is none, yet.
	 */
	public TaskManagerLocation getLatestPriorLocation() {
		return latestPriorLocation;
	}

	public void setLatestPriorLocation(TaskManagerLocation location) {
		this.latestPriorLocation = location;
	}

	public AllocationID getLatestPriorAllocation() {
		Execution latestPriorExecution = getLatestPriorExecution();
		return latestPriorExecution != null ? latestPriorExecution.getAssignedAllocationID() : null;
	}

	EvictingBoundedList getCopyOfPriorExecutionsList() {
		synchronized (priorExecutions) {
			return new EvictingBoundedList<>(priorExecutions);
		}
	}

	public ExecutionGraph getExecutionGraph() {
		return this.jobVertex.getGraph();
	}

	public Map getProducedPartitions() {
		return resultPartitions;
	}

	public ExecutionVertexStatus getCurrentStatus() {
		return new ExecutionVertexStatus(executionVertexID, getExecutionState());
	}

	// --------------------------------------------------------------------------------------------
	//  Graph building
	// --------------------------------------------------------------------------------------------

	public void setInputExecutionEdges(ExecutionEdge[] edges, int inputNumber) {
		this.inputEdges[inputNumber] = edges;
	}

	/**
	 * Gets the overall preferred execution location for this vertex's current execution.
	 * The preference is determined as follows:
	 *
	 * 
    *
  1. If the task execution has state to load (from a checkpoint), then the location preference * is the location of the previous execution (if there is a previous execution attempt). *
  2. If the task execution has no state or no previous location, then the location preference * is based on the task's inputs. *
* * These rules should result in the following behavior: * *
    *
  • Stateless tasks are always scheduled based on co-location with inputs. *
  • Stateful tasks are on their initial attempt executed based on co-location with inputs. *
  • Repeated executions of stateful tasks try to co-locate the execution with its state. *
* * @return The preferred execution locations for the execution attempt. * * @see #getPreferredLocationsBasedOnState() * @see #getPreferredLocationsBasedOnInputs() */ public Collection> getPreferredLocations() { Collection> basedOnState = getPreferredLocationsBasedOnState(); return basedOnState != null ? basedOnState : getPreferredLocationsBasedOnInputs(); } /** * Gets the preferred location to execute the current task execution attempt, based on the state * that the execution attempt will resume. * * @return A size-one collection with the location preference, or null, if there is no * location preference based on the state. */ public Collection> getPreferredLocationsBasedOnState() { TaskManagerLocation priorLocation; if (currentExecution.getTaskRestore() != null && (priorLocation = getLatestPriorLocation()) != null) { return Collections.singleton(CompletableFuture.completedFuture(priorLocation)); } else { return null; } } /** * Gets the location preferences of the vertex's current task execution, as determined by the locations * of the predecessors from which it receives input data. * If there are more than MAX_DISTINCT_LOCATIONS_TO_CONSIDER different locations of source data, this * method returns {@code null} to indicate no location preference. * * @return The preferred locations based in input streams, or an empty iterable, * if there is no input-based preference. */ public Collection> getPreferredLocationsBasedOnInputs() { // otherwise, base the preferred locations on the input connections if (inputEdges == null) { return Collections.emptySet(); } else { Set> locations = new HashSet<>(getTotalNumberOfParallelSubtasks()); Set> inputLocations = new HashSet<>(getTotalNumberOfParallelSubtasks()); // go over all inputs for (int i = 0; i < inputEdges.length; i++) { inputLocations.clear(); ExecutionEdge[] sources = inputEdges[i]; if (sources != null) { // go over all input sources for (int k = 0; k < sources.length; k++) { // look-up assigned slot of input source CompletableFuture locationFuture = sources[k].getSource().getProducer().getCurrentTaskManagerLocationFuture(); // add input location inputLocations.add(locationFuture); // inputs which have too many distinct sources are not considered if (inputLocations.size() > MAX_DISTINCT_LOCATIONS_TO_CONSIDER) { inputLocations.clear(); break; } } } // keep the locations of the input with the least preferred locations if (locations.isEmpty() || // nothing assigned yet (!inputLocations.isEmpty() && inputLocations.size() < locations.size())) { // current input has fewer preferred locations locations.clear(); locations.addAll(inputLocations); } } return locations.isEmpty() ? Collections.emptyList() : locations; } } // -------------------------------------------------------------------------------------------- // Resources // -------------------------------------------------------------------------------------------- public ResourceProfile calculateResourceProfile() { if (jobVertex.getJobVertex().getMinResources().equals(ResourceSpec.DEFAULT)) { return ResourceProfile.UNKNOWN; } else { int networkMemory = calculateTaskNetworkMemory(); int additionalManagedMemory = calculateTaskExtraManagedMemory(); ResourceSpec additionalResourceSpec = ResourceSpec.newBuilder().addExtendedResource( new CommonExtendedResource(ResourceSpec.MANAGED_MEMORY_NAME, additionalManagedMemory)) .build(); return ResourceProfile.fromResourceSpec( getJobVertex().getJobVertex().getMinResources() .merge(additionalResourceSpec), networkMemory); } } @VisibleForTesting int calculateTaskNetworkMemory() { Configuration config = jobVertex.getGraph().getJobManagerConfiguration(); BlockingShuffleType shuffleType = BlockingShuffleType.getBlockingShuffleTypeFromConfiguration(config, LOG); int numInternalSubpartitions = 0; int numInternalResultPartitions = 0; for (IntermediateResultPartition irp : getProducedPartitions().values()) { if (!(shuffleType == BlockingShuffleType.YARN && irp.getIntermediateResult().getResultType().isBlocking())) { for (List consumer : irp.getConsumers()) { numInternalSubpartitions += consumer.size(); } ++numInternalResultPartitions; } } final int maxBlockingRequestsInFlight = config.getInteger(TaskManagerOptions.TASK_EXTERNAL_SHUFFLE_MAX_CONCURRENT_REQUESTS); int numPipelineChannels = 0; int numPipelineGates = 0; int numExternalBlockingChannels = 0; int numExternalBlockingGates = 0; for (int j = 0; j < getNumberOfInputs(); ++j) { ExecutionEdge[] edges = getInputEdges(j); checkState(edges.length > 0, "There should be at least on edge for each input"); // Check the result type by viewing the first edge boolean isExternalBlocking = edges[0].getSource().getIntermediateResult().getResultType().isBlocking() && shuffleType == BlockingShuffleType.YARN; if (isExternalBlocking) { numExternalBlockingChannels += edges.length; numExternalBlockingGates++; } else { numPipelineChannels += edges.length; numPipelineGates++; } } if (maxBlockingRequestsInFlight > 0) { numExternalBlockingChannels = Math.min(numExternalBlockingChannels, maxBlockingRequestsInFlight); // each blocking input gate should monopolize at least one piece of resource to // support input selection by operator numExternalBlockingChannels = Math.max(numExternalBlockingChannels, numExternalBlockingGates); } return TaskNetworkMemoryUtil.calculateTaskNetworkMemory(config, numInternalSubpartitions, numInternalResultPartitions, numPipelineChannels, numPipelineGates, numExternalBlockingChannels, numExternalBlockingGates); } private int calculateTaskExtraManagedMemory() { Configuration config = getJobVertex().getGraph().getJobManagerConfiguration(); // Calculates managed memory for external result partition. BlockingShuffleType shuffleType = BlockingShuffleType.getBlockingShuffleTypeFromConfiguration(config, LOG); int numExternalResultPartitions = 0; for (IntermediateResultPartition irp : getProducedPartitions().values()) { if (shuffleType == BlockingShuffleType.YARN && irp.getIntermediateResult().getResultType().isBlocking()) { numExternalResultPartitions++; } } int mapOutputMemoryInMB = config.getInteger(TaskManagerOptions.TASK_MANAGER_OUTPUT_MEMORY_MB); return mapOutputMemoryInMB * numExternalResultPartitions; } // -------------------------------------------------------------------------------------------- // Actions // -------------------------------------------------------------------------------------------- /** * Archives the current Execution and creates a new Execution for this vertex. * *

This method atomically checks if the ExecutionGraph is still of an expected * global mod. version and replaces the execution if that is the case. If the ExecutionGraph * has increased its global mod. version in the meantime, this operation fails. * *

This mechanism can be used to prevent conflicts between various concurrent recovery and * reconfiguration actions in a similar way as "optimistic concurrency control". * * @param timestamp * The creation timestamp for the new Execution * @param originatingGlobalModVersion * The * @return Returns the new created Execution. * * @throws GlobalModVersionMismatch Thrown, if the execution graph has a new global mod * version than the one passed to this message. */ public Execution resetForNewExecution(final long timestamp, final long originatingGlobalModVersion) throws GlobalModVersionMismatch { LOG.debug("Resetting execution vertex {} for new execution.", getTaskNameWithSubtaskIndex()); synchronized (priorExecutions) { // check if another global modification has been triggered since the // action that originally caused this reset/restart happened final long actualModVersion = getExecutionGraph().getGlobalModVersion(); if (actualModVersion > originatingGlobalModVersion) { // global change happened since, reject this action throw new GlobalModVersionMismatch(originatingGlobalModVersion, actualModVersion); } final Execution oldExecution = currentExecution; final ExecutionState oldState = oldExecution.getState(); if (oldState.isTerminal() || getExecutionGraph().getGraphManager().isReplaying()) { priorExecutions.add(oldExecution); latestPriorLocation = oldExecution.getAssignedResourceLocation(); final Execution newExecution = new Execution( getExecutionGraph().getFutureExecutor(), this, oldExecution.getAttemptNumber() + 1, originatingGlobalModVersion, timestamp, timeout); this.currentExecution = newExecution; CoLocationGroup grp = jobVertex.getCoLocationGroup(); if (grp != null) { this.locationConstraint = grp.getLocationConstraint(subTaskIndex); } // register this execution at the execution graph, to receive call backs getExecutionGraph().registerExecution(newExecution); // if the execution was 'FINISHED' before, tell the ExecutionGraph that // we take one step back on the road to reaching global FINISHED if (oldState == FINISHED) { getExecutionGraph().vertexUnFinished(); } //TODO: set this index according to checkpoint when batch support checkpoint. inputSplitIndexMap.clear(); // Reset intermediate results for (IntermediateResultPartition resultPartition : resultPartitions.values()) { resultPartition.resetForNewExecution(); } return newExecution; } else { throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState); } } } /** * Schedules the current execution of this ExecutionVertex. * * @param slotProvider to allocate the slots from * @param queued if the allocation can be queued * @param locationPreferenceConstraint constraint for the location preferences * @return Future which is completed once the execution is deployed. The future * can also completed exceptionally. */ public CompletableFuture scheduleForExecution( SlotProvider slotProvider, boolean queued, LocationPreferenceConstraint locationPreferenceConstraint) { return this.currentExecution.scheduleForExecution( slotProvider, queued, locationPreferenceConstraint); } @VisibleForTesting public void deployToSlot(SimpleSlot slot) throws JobException { if (this.currentExecution.tryAssignResource(slot)) { this.currentExecution.deploy(); } else { throw new IllegalStateException("Could not assign resource " + slot + " to current execution " + currentExecution + '.'); } } /** * * @return A future that completes once the execution has reached its final state. */ public CompletableFuture cancel() { // to avoid any case of mixup in the presence of concurrent calls, // we copy a reference to the stack to make sure both calls go to the same Execution. final Execution exec = this.currentExecution; exec.cancel(); return exec.getReleaseFuture(); } public void stop() { this.currentExecution.stop(); } public void fail(Throwable t) { this.currentExecution.fail(t); } /** * Schedules or updates the consumer tasks of the result partition with the given ID. */ void scheduleOrUpdateConsumers(ResultPartitionID partitionId) { final Execution execution = currentExecution; // Abort this request if there was a concurrent reset if (!partitionId.getProducerId().equals(execution.getAttemptId())) { return; } final IntermediateResultPartition partition = resultPartitions.get(partitionId.getPartitionId()); if (partition == null) { throw new IllegalStateException("Unknown partition " + partitionId + "."); } if (partition.getIntermediateResult().getResultType().isPipelined()) { // Schedule or update receivers of this partition partition.markDataProduced(); // Notify the scheduler to handle the consumable partition notifyAndUpdateConsumers(partition); } else { throw new IllegalArgumentException("ScheduleOrUpdateConsumers msg is only valid for" + "pipelined partitions."); } } protected void notifyAndUpdateConsumers(IntermediateResultPartition partition) { getExecutionGraph().getGraphManager().notifyResultPartitionConsumable( getExecutionVertexID(), partition.getIntermediateResult().getId(), partition.getPartitionNumber(), getCurrentAssignedResourceLocation()); getExecutionGraph().getFutureExecutor().execute(() -> { currentExecution.updateConsumers(partition.getConsumers()); }); } public void cachePartitionInfo(PartialInputChannelDeploymentDescriptor partitionInfo){ getCurrentExecutionAttempt().cachePartitionInfo(partitionInfo); } void clearAssignedInputSplits() { assignedInputSplitsMap.clear(); inputSplitIndexMap.clear(); } /** * Finish all blocking result partitions whose receivers can be scheduled/updated and notify. */ void finishPartitionsAndNotify() { for (IntermediateResultPartition partition : resultPartitions.values()) { partition.markFinished(); // Blocking partitions are consumable on finished if (partition.getResultType().isBlocking()) { notifyAndUpdateConsumers(partition); } } } void resetResultPartitionID(ResultPartitionID[] partitionIds) { Map newResultPartitions = new LinkedHashMap<>(resultPartitions.size()); Iterator iterator = resultPartitions.values().iterator(); for (int i = 0; i < resultPartitions.size(); i++) { IntermediateResultPartition resultPartition = iterator.next(); IntermediateResultPartitionID originId = resultPartition.getPartitionId(); resultPartition.setPartitionId(partitionIds[i].getPartitionId()); resultPartition.getIntermediateResult().resetLookupHelper(originId, partitionIds[i].getPartitionId()); newResultPartitions.put(resultPartition.getPartitionId(), resultPartition); } this.resultPartitions = newResultPartitions; } // the following two method is added for region failover // record the input split assigned to this task public void inputSplitAssigned(OperatorID operatorID, InputSplit inputSplit) { assignedInputSplitsMap.putIfAbsent(operatorID, new LinkedList<>()); assignedInputSplitsMap.get(operatorID).add(inputSplit); inputSplitIndexMap.put(operatorID, inputSplitIndexMap.getOrDefault(operatorID, 0) + 1); Preconditions.checkArgument(inputSplitIndexMap.get(operatorID) == assignedInputSplitsMap.get(operatorID).size()); } public InputSplit getNextInputSplitFromAssgined(OperatorID operatorID) { List assignedInputSplits = assignedInputSplitsMap.getOrDefault(operatorID, Collections.emptyList()); Integer inputSplitIndex = inputSplitIndexMap.getOrDefault(operatorID, 0); if (assignedInputSplits.isEmpty() || inputSplitIndex >= assignedInputSplits.size()) { return null; } InputSplit split = assignedInputSplits.get(inputSplitIndex++); inputSplitIndexMap.put(operatorID, inputSplitIndex); return split; } public Map> getAssignedInputSplits() { return Collections.unmodifiableMap(assignedInputSplitsMap); } /** * Recover the execution vertex status after job master failover. * * @param state The state in the log. * @param assignedInputSplits The assigned input splits of a finished executions. * @param resultDescriptor The result information of a finished executions. */ public void recoverStatus( ExecutionState state, Map> assignedInputSplits, ResultDescriptor resultDescriptor) { if (!ExecutionState.FINISHED.equals(state) && (assignedInputSplits != null && resultDescriptor != null)) { throw new FlinkRuntimeException("Can not assign input split or result partion when execution is " + state); } switch (state) { case FINISHED: currentExecution.getTaskManagerLocationFuture().complete(resultDescriptor.getTaskManagerLocation()); resetResultPartitionID(resultDescriptor.getResultPartitionIds()); currentExecution.markFinished(); if (assignedInputSplits != null) { assignedInputSplitsMap.clear(); assignedInputSplitsMap.putAll(assignedInputSplits); for (Map.Entry> opToInputs : assignedInputSplits.entrySet()) { getJobVertex().getSplitAssigner(opToInputs.getKey()).inputSplitsAssigned(subTaskIndex, opToInputs.getValue()); } } break; case RUNNING: currentExecution.switchToRunning(); break; case DEPLOYING: currentExecution.recoverState(state); break; default: throw new FlinkRuntimeException("Unsupported replaying the state " + state); } } /** * Recover the pipelined result partition consume status after job master failover. * * @param resultId The intermediate data set id in the log. */ public void recoverResultPartitionStatus( IntermediateDataSetID resultId, TaskManagerLocation location) { IntermediateResultPartition partitionToRecover = null; for (IntermediateResultPartition irp : getProducedPartitions().values()) { if (irp.getIntermediateResult().getId().equals(resultId)) { partitionToRecover = irp; } } if (partitionToRecover == null) { throw new FlinkRuntimeException("Can not find the intermediate result " + resultId + " on " + getTaskNameWithSubtaskIndex()); } if (!(ExecutionState.RUNNING.equals(currentExecution.getState()) && partitionToRecover.getResultType().isPipelined())) { throw new FlinkRuntimeException("Invalid state " + currentExecution.getState() + " for " + getTaskNameWithSubtaskIndex()); } currentExecution.getTaskManagerLocationFuture().complete(location); scheduleOrUpdateConsumers(new ResultPartitionID(partitionToRecover.getPartitionId(), currentExecution.getAttemptId())); } /** * Reconcile the execution with the running info reported by task executor. * @param executionId * @param attemptNumber * @param startTimestamp * @param partitionIds */ public boolean reconcileExecution( ExecutionState state, ExecutionAttemptID executionId, int attemptNumber, long startTimestamp, ResultPartitionID[] partitionIds, boolean[] partitionsConsumable, Map> assignedInputSplits, LogicalSlot slot) { LOG.debug("Reconcile execution vertex {} for current execution.", getTaskNameWithSubtaskIndex()); if (resultPartitions.size() != partitionIds.length) { LOG.info("Reconcile execution failed due to partition number with actual {}, expect {}.", partitionIds.length, resultPartitions.size()); return false; } // first, update the IntermediateResultPartition and reset the map resetResultPartitionID(partitionIds); // second, update the pipelined partition info to its consumers. for (int i = 0; i < partitionIds.length; i++) { IntermediateResultPartition partition = resultPartitions.get(partitionIds[i].getPartitionId()); if (partition.getResultType().isPipelined()) { if (partition.hasDataProduced() != partitionsConsumable[i]) { LOG.info("Reconcile execution {} failed due to partition {} consumable not equals to {}.", getTaskNameWithSubtaskIndex(), partition.getPartitionId(), partition.hasDataProduced()); currentExecution.getReconcileFuture().complete(currentExecution.getAttemptId()); return false; } } } // third, reset execution basic information getExecutionGraph().deregisterExecution(currentExecution); if (currentExecution.reconcileStatus(state, executionId, attemptNumber, startTimestamp, slot)) { getExecutionGraph().registerExecution(currentExecution); // forth, build the input split map inputSplitIndexMap.clear(); assignedInputSplitsMap.clear(); for (Map.Entry> opToInputs : assignedInputSplits.entrySet()) { for (InputSplit inputSplit : opToInputs.getValue()) { inputSplitAssigned(opToInputs.getKey(), inputSplit); } getJobVertex().getSplitAssigner(opToInputs.getKey()).inputSplitsAssigned(subTaskIndex, opToInputs.getValue()); } return true; } else { getExecutionGraph().registerExecution(currentExecution); return false; } } // -------------------------------------------------------------------------------------------- // Notifications from the Execution Attempt // -------------------------------------------------------------------------------------------- void executionFinished(Execution execution) { getExecutionGraph().vertexFinished(); } void executionCanceled(Execution execution) { // nothing to do } void executionFailed(Execution execution, Throwable cause) { // nothing to do } // -------------------------------------------------------------------------------------------- // Miscellaneous // -------------------------------------------------------------------------------------------- /** * Simply forward this notification. */ void notifyStateTransition(Execution execution, ExecutionState newState, Throwable error) { // only forward this notification if the execution is still the current execution // otherwise we have an outdated execution if (currentExecution == execution) { getExecutionGraph().notifyExecutionChange(execution, newState, error); } } /** * Creates a task deployment descriptor to deploy a subtask to the given target slot. * * TODO: This should actually be in the EXECUTION */ TaskDeploymentDescriptor createDeploymentDescriptor( ExecutionAttemptID executionId, LogicalSlot targetSlot, @Nullable JobManagerTaskRestore taskRestore, int attemptNumber) throws ExecutionGraphException { // Produced intermediate results List producedPartitions = new ArrayList<>(resultPartitions.size()); // Consumed intermediate results List consumedPartitions = new ArrayList<>(inputEdges.length); boolean lazyScheduling = getExecutionGraph().isLazyDeploymentAllowed(); for (IntermediateResultPartition partition : resultPartitions.values()) { List> consumers = partition.getConsumers(); if (consumers.isEmpty()) { //TODO this case only exists for test, currently there has to be exactly one consumer in real jobs! producedPartitions.add(ResultPartitionDeploymentDescriptor.from( partition, KeyGroupRangeAssignment.UPPER_BOUND_MAX_PARALLELISM, lazyScheduling)); } else { Preconditions.checkState(1 == consumers.size(), "Only one consumer supported in the current implementation! Found: " + consumers.size()); List consumer = consumers.get(0); ExecutionJobVertex vertex = consumer.get(0).getTarget().getJobVertex(); int maxParallelism = vertex.getMaxParallelism(); producedPartitions.add(ResultPartitionDeploymentDescriptor.from(partition, maxParallelism, lazyScheduling)); } } ResultPartitionLocationTrackerProxy resultPartitionLocationTrackerProxy = currentExecution.getVertex().getExecutionGraph().getResultPartitionLocationTrackerProxy(); for (ExecutionEdge[] edges : inputEdges) { InputChannelDeploymentDescriptor[] partitions = InputChannelDeploymentDescriptor.fromEdges( resultPartitionLocationTrackerProxy, edges, targetSlot.getTaskManagerLocation(), lazyScheduling); // If the produced partition has multiple consumers registered, we // need to request the one matching our sub task index. // TODO Refactor after removing the consumers from the intermediate result partitions int numConsumerEdges = edges[0].getSource().getConsumers().get(0).size(); int queueToRequest = subTaskIndex % numConsumerEdges; IntermediateResult consumedIntermediateResult = edges[0].getSource().getIntermediateResult(); final IntermediateDataSetID resultId = consumedIntermediateResult.getId(); final ResultPartitionType partitionType = consumedIntermediateResult.getResultType(); consumedPartitions.add(new InputGateDeploymentDescriptor(resultId, partitionType, queueToRequest, partitions)); } final Either, PermanentBlobKey> jobInformationOrBlobKey = getExecutionGraph().getJobInformationOrBlobKey(); final TaskDeploymentDescriptor.MaybeOffloaded serializedJobInformation; if (jobInformationOrBlobKey.isLeft()) { serializedJobInformation = new TaskDeploymentDescriptor.NonOffloaded<>(jobInformationOrBlobKey.left()); } else { serializedJobInformation = new TaskDeploymentDescriptor.Offloaded<>(jobInformationOrBlobKey.right()); } final Either, PermanentBlobKey> taskInformationOrBlobKey; try { taskInformationOrBlobKey = jobVertex.getTaskInformationOrBlobKey(); } catch (IOException e) { throw new ExecutionGraphException( "Could not create a serialized JobVertexInformation for " + jobVertex.getJobVertexId(), e); } final TaskDeploymentDescriptor.MaybeOffloaded serializedTaskInformation; if (taskInformationOrBlobKey.isLeft()) { serializedTaskInformation = new TaskDeploymentDescriptor.NonOffloaded<>(taskInformationOrBlobKey.left()); } else { serializedTaskInformation = new TaskDeploymentDescriptor.Offloaded<>(taskInformationOrBlobKey.right()); } return new TaskDeploymentDescriptor( getJobId(), serializedJobInformation, serializedTaskInformation, executionId, targetSlot.getAllocationId(), subTaskIndex, attemptNumber, targetSlot.getPhysicalSlotNumber(), createTimestamp, taskRestore, producedPartitions, consumedPartitions); } // -------------------------------------------------------------------------------------------- // Utilities // -------------------------------------------------------------------------------------------- @Override public String toString() { return getTaskNameWithSubtaskIndex(); } @Override public ArchivedExecutionVertex archive() { return new ArchivedExecutionVertex(this); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy