All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.executiongraph.failover.StrictRestartPipelinedRegionStrategy Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.executiongraph.failover;

import org.apache.flink.runtime.executiongraph.Execution;
import org.apache.flink.runtime.executiongraph.ExecutionEdge;
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.executiongraph.IntermediateResult;
import org.apache.flink.runtime.executiongraph.IntermediateResultPartition;
import org.apache.flink.runtime.io.network.partition.DataConsumptionException;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.FlinkException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Executor;

/**
 * A strict version of {@link RestartPipelinedRegionStrategy} which ensures data consistency
 * even if there are non-deterministic shuffles(rebalance, rescale..).
 */
public class StrictRestartPipelinedRegionStrategy extends RestartPipelinedRegionStrategy {

	/** The log object used for debugging. */
	private static final Logger LOG = LoggerFactory.getLogger(StrictRestartPipelinedRegionStrategy.class);

	/**
	 * Creates a new failover strategy to restart pipelined regions that works on the given
	 * execution graph and uses the execution graph's future executor to call restart actions.
	 *
	 * @param executionGraph The execution graph on which this FailoverStrategy will work
	 * @param regionFailLimit The max number a region can fail
	 */
	public StrictRestartPipelinedRegionStrategy(ExecutionGraph executionGraph, int regionFailLimit) {
		this(executionGraph, executionGraph.getFutureExecutor(), regionFailLimit);
	}

	/**
	 * Creates a new failover strategy to restart pipelined regions that works on the given
	 * execution graph and uses the given executor to call restart actions.
	 *
	 * @param executionGraph The execution graph on which this FailoverStrategy will work
	 * @param executor  The executor used for future actions
	 * @param regionFailLimit The max number a region can fail
	 */
	public StrictRestartPipelinedRegionStrategy(ExecutionGraph executionGraph, Executor executor, int regionFailLimit) {
		super(executionGraph, executor, regionFailLimit);
	}

	// ------------------------------------------------------------------------
	//  failover implementation
	// ------------------------------------------------------------------------ 

	@Override
	public void onTaskFailure(Execution taskExecution, Throwable cause) {
		final ExecutionVertex ev = taskExecution.getVertex();
		final FailoverRegion failoverRegion = vertexToRegion.get(ev);
		if (failoverRegion == null) {
			executionGraph.failGlobal(new FlinkException(
				"Can not find a failover region for the execution " + ev.getTaskNameWithSubtaskIndex(), cause));
			return;
		}

		List sortedRegionsToRestart = sortRegionsTopologically(
			getRegionsToRestart(taskExecution.getVertex(), cause));

		// Cancel and restart the region of the target vertex
		LOG.info("Recovering task failure for {} #{} ({}) via restarting {} failover regions",
				ev.getTaskNameWithSubtaskIndex(),
				taskExecution.getAttemptNumber(),
				taskExecution.getAttemptId(),
				sortedRegionsToRestart.size());

		for (FailoverRegion regionToRestart : sortedRegionsToRestart) {
			regionToRestart.onExecutionFail(taskExecution.getGlobalModVersion(), cause);
		}
	}

	/**
	 * All 'involved' regions are proposed to be restarted.
	 * The 'involved' regions are calculated with rules below:
	 * 1. The region containing the failed task is always involved
	 * 2. If an input result partition of an involved region is not available, i.e. Missing or Corrupted,
	 *    the region containing the partition producer task is involved
	 * 3. If a region is involved, all of its consumer regions are involved
	 */
	private Set getRegionsToRestart(ExecutionVertex failedVertex, Throwable cause) {
		IdentityHashMap regionsToRestart = new IdentityHashMap<>();
		IdentityHashMap visitedRegions = new IdentityHashMap<>();

		Queue regionsToVisit = new ArrayDeque<>();
		FailoverRegion rootFailedRegion = getRootFailedRegion(failedVertex, cause);
		visitedRegions.put(rootFailedRegion, null);
		regionsToVisit.add(rootFailedRegion);

		// start from the failed region to visit all involved regions
		while (!regionsToVisit.isEmpty()) {
			FailoverRegion regionToRestart = regionsToVisit.poll();

			// an involved region should be restarted
			regionsToRestart.put(regionToRestart, null);

			// all consumer regions of an involved region should be involved
			for (ExecutionVertex vertex : regionToRestart.getAllExecutionVertices()) {
				for (IntermediateResultPartition resultPartition : vertex.getProducedPartitions().values()) {
					for (List edges : resultPartition.getConsumers()) {
						for (ExecutionEdge edge : edges) {
							ExecutionVertex consumerVertex = edge.getTarget();
							FailoverRegion consumerRegion = vertexToRegion.get(consumerVertex);
							if (!visitedRegions.containsKey(consumerRegion)) {
								visitedRegions.put(consumerRegion, null);
								regionsToVisit.add(consumerRegion);
							}
						}
					}
				}
			}
		}

		return regionsToRestart.keySet();
	}

	private FailoverRegion getRootFailedRegion(ExecutionVertex failedVertex, Throwable cause) {
		Optional dataConsumptionException = ExceptionUtils.findThrowable(
			cause, DataConsumptionException.class);
		if (dataConsumptionException.isPresent()) {
			LOG.info("Try restarting producer of {} due to DataConsumptionException", failedVertex);
			ResultPartitionID predecessorResultPartition = dataConsumptionException.get().getResultPartitionId();
			Execution producer = executionGraph.getRegisteredExecutions().get(predecessorResultPartition.getProducerId());
			if (producer == null) {
				// If the producer has finished, it is removed from registeredExecutions and we need to locate it via the
				// ResultPartitionID and the down-stream task.
				for (IntermediateResult intermediateResult : failedVertex.getJobVertex().getInputs()) {
					IntermediateResultPartition resultPartition = intermediateResult.getPartitionOrNullById(
						predecessorResultPartition.getPartitionId());
					if (resultPartition != null) {
						Execution producerVertexCurrentAttempt = resultPartition.getProducer().getCurrentExecutionAttempt();
						if (producerVertexCurrentAttempt.getAttemptId().equals(predecessorResultPartition.getProducerId())) {
							producer = producerVertexCurrentAttempt;
						} else {
							LOG.warn("partition {} has already been disposed, skip restarting the producer.",
								predecessorResultPartition);
						}
						break;
					}
				}
			}
			return vertexToRegion.get(producer.getVertex());
		} else {
			return vertexToRegion.get(failedVertex);
		}
	}

	private List sortRegionsTopologically(Set regions) {
		final List regionsSorted = new ArrayList<>();
		for (FailoverRegion region : sortedRegions) {
			if (regions.contains(region)) {
				regionsSorted.add(region);
			}
		}
		return regionsSorted;
	}

	@Override
	public String getStrategyName() {
		return "Strict Pipelined Region Failover";
	}

	// ------------------------------------------------------------------------
	//  factory
	// ------------------------------------------------------------------------

	/**
	 * Factory that instantiates the StrictRestartPipelinedRegionStrategy.
	 */
	public static class Factory implements FailoverStrategy.Factory {

		private int regionFailLimit = 100;

		@Override
		public FailoverStrategy create(ExecutionGraph executionGraph) {
			return new StrictRestartPipelinedRegionStrategy(executionGraph, regionFailLimit);
		}

		public void setRegionFailLimit(int regionFailLimit) {
			this.regionFailLimit = regionFailLimit;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy