All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.scheduler.strategy.LazyFromSourcesSchedulingStrategy Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.scheduler.strategy;

import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.io.network.partition.ResultPartitionID;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.scheduler.DeploymentOption;
import org.apache.flink.runtime.scheduler.ExecutionVertexDeploymentOption;
import org.apache.flink.runtime.scheduler.SchedulerOperations;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static org.apache.flink.runtime.execution.ExecutionState.CREATED;
import static org.apache.flink.runtime.execution.ExecutionState.FINISHED;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * {@link SchedulingStrategy} instance for batch job which schedule vertices when input data are ready.
 */
public class LazyFromSourcesSchedulingStrategy implements SchedulingStrategy {

	private static final Predicate IS_IN_CREATED_EXECUTION_STATE = schedulingExecutionVertex -> CREATED == schedulingExecutionVertex.getState();

	private final SchedulerOperations schedulerOperations;

	private final SchedulingTopology schedulingTopology;

	private final Map deploymentOptions;

	private final InputDependencyConstraintChecker inputConstraintChecker;

	public LazyFromSourcesSchedulingStrategy(
			SchedulerOperations schedulerOperations,
			SchedulingTopology schedulingTopology) {

		this.schedulerOperations = checkNotNull(schedulerOperations);
		this.schedulingTopology = checkNotNull(schedulingTopology);
		this.deploymentOptions = new HashMap<>();
		this.inputConstraintChecker = new InputDependencyConstraintChecker();
	}

	@Override
	public void startScheduling() {
		final DeploymentOption updateOption = new DeploymentOption(true);
		final DeploymentOption nonUpdateOption = new DeploymentOption(false);

		for (SchedulingExecutionVertex schedulingVertex : schedulingTopology.getVertices()) {
			DeploymentOption option = nonUpdateOption;
			for (SchedulingResultPartition srp : schedulingVertex.getProducedResultPartitions()) {
				if (srp.getPartitionType().isPipelined()) {
					option = updateOption;
				}
				inputConstraintChecker.addSchedulingResultPartition(srp);
			}
			deploymentOptions.put(schedulingVertex.getId(), option);
		}

		allocateSlotsAndDeployExecutionVertexIds(getAllVerticesFromTopology());
	}

	@Override
	public void restartTasks(Set verticesToRestart) {
		// increase counter of the dataset first
		verticesToRestart
			.stream()
			.map(schedulingTopology::getVertexOrThrow)
			.flatMap(vertex -> vertex.getProducedResultPartitions().stream())
			.forEach(inputConstraintChecker::resetSchedulingResultPartition);

		allocateSlotsAndDeployExecutionVertexIds(verticesToRestart);
	}

	@Override
	public void onExecutionStateChange(ExecutionVertexID executionVertexId, ExecutionState executionState) {
		if (!FINISHED.equals(executionState)) {
			return;
		}

		final Set verticesToSchedule = schedulingTopology.getVertexOrThrow(executionVertexId)
			.getProducedResultPartitions()
			.stream()
			.flatMap(partition -> inputConstraintChecker.markSchedulingResultPartitionFinished(partition).stream())
			.flatMap(partition -> partition.getConsumers().stream())
			.collect(Collectors.toSet());

		allocateSlotsAndDeployExecutionVertices(verticesToSchedule);
	}

	@Override
	public void onPartitionConsumable(ExecutionVertexID executionVertexId, ResultPartitionID resultPartitionId) {
		final SchedulingResultPartition resultPartition = schedulingTopology
			.getResultPartitionOrThrow(resultPartitionId.getPartitionId());

		if (!resultPartition.getPartitionType().isPipelined()) {
			return;
		}

		final SchedulingExecutionVertex producerVertex = schedulingTopology.getVertexOrThrow(executionVertexId);
		if (!producerVertex.getProducedResultPartitions().contains(resultPartition)) {
			throw new IllegalStateException("partition " + resultPartitionId
					+ " is not the produced partition of " + executionVertexId);
		}

		allocateSlotsAndDeployExecutionVertices(resultPartition.getConsumers());
	}

	private void allocateSlotsAndDeployExecutionVertexIds(Set verticesToSchedule) {
		allocateSlotsAndDeployExecutionVertices(
			verticesToSchedule
				.stream()
				.map(schedulingTopology::getVertexOrThrow)
				.collect(Collectors.toList()));
	}

	private void allocateSlotsAndDeployExecutionVertices(final Collection schedulingExecutionVertices) {
		schedulerOperations.allocateSlotsAndDeploy(
			schedulingExecutionVertices
				.stream()
				.filter(isInputConstraintSatisfied().and(IS_IN_CREATED_EXECUTION_STATE))
				.map(SchedulingExecutionVertex::getId)
				.map(executionVertexID -> new ExecutionVertexDeploymentOption(
					executionVertexID,
					deploymentOptions.get(executionVertexID)))
				.collect(Collectors.toSet()));
	}

	private Predicate isInputConstraintSatisfied() {
		return inputConstraintChecker::check;
	}

	private Set getAllVerticesFromTopology() {
		return StreamSupport
			.stream(schedulingTopology.getVertices().spliterator(), false)
			.map(SchedulingExecutionVertex::getId)
			.collect(Collectors.toSet());
	}

	/**
	 * The factory for creating {@link LazyFromSourcesSchedulingStrategy}.
	 */
	public static class Factory implements SchedulingStrategyFactory {
		@Override
		public SchedulingStrategy createInstance(
				SchedulerOperations schedulerOperations,
				SchedulingTopology schedulingTopology,
				JobGraph jobGraph) {
			return new LazyFromSourcesSchedulingStrategy(schedulerOperations, schedulingTopology);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy