org.apache.flink.runtime.jobmaster.BestEffortExecutionSlotAllocator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of flink-runtime_2.11 Show documentation
There is a newer version: 1.5.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobmaster;

import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.clusterframework.types.SlotProfile;
import org.apache.flink.runtime.concurrent.FutureUtils;
import org.apache.flink.runtime.executiongraph.Execution;
import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit;
import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;

/**
 * A slot allocator which will try its best to allocate slots.
 * And when failed to allocate all slots, it will assign the returned slot to executions and notice the caller.
 */
public class BestEffortExecutionSlotAllocator implements ExecutionSlotAllocator{

	static final Logger LOG = LoggerFactory.getLogger(BestEffortExecutionSlotAllocator.class);

	private final SlotProvider slotProvider;

	private final boolean allowQueuedScheduling;

	public BestEffortExecutionSlotAllocator(
			SlotProvider slotProvider,
			boolean allowQueuedScheduling){
		this.slotProvider = slotProvider;
		this.allowQueuedScheduling = allowQueuedScheduling;
	}

	@Override
	public CompletableFuture> allocateSlotsFor(Collection executions, Time allocationTimeout) {

		// Important: reserve all the space we need up front.
		// that way we do not have any operation that can fail between allocating the slots
		// and adding them to the list. If we had a failure in between there, that would
		// cause the slots to get lost
		final boolean queued = allowQueuedScheduling;

		List slotRequestIds = new ArrayList<>(executions.size());
		List scheduledUnits = new ArrayList<>(executions.size());
		List slotProfiles = new ArrayList<>(executions.size());
		List scheduledExecutions = new ArrayList<>(executions.size());

		for (Execution exec : executions) {
			Tuple2 scheduleUnitAndSlotProfile = exec.prepareSchedulingResources();
			slotRequestIds.add(new SlotRequestId());
			scheduledUnits.add(scheduleUnitAndSlotProfile.f0);
			slotProfiles.add(scheduleUnitAndSlotProfile.f1);
			scheduledExecutions.add(exec);
		}

		if (slotRequestIds.isEmpty()) {
			return CompletableFuture.completedFuture(null);
		}

		LOG.info("Request {} slots in a group.", slotRequestIds.size());

		List> allocationFutures =
				slotProvider.allocateSlots(slotRequestIds, scheduledUnits, queued, slotProfiles, allocationTimeout);
		for (int i = 0; i < allocationFutures.size(); i++) {
			final int index = i;
			allocationFutures.get(i).whenComplete(
					(ignore, throwable) -> {
						if (throwable != null) {
							slotProvider.cancelSlotRequest(
									slotRequestIds.get(index),
									scheduledUnits.get(index).getSlotSharingGroupId(),
									scheduledUnits.get(index).getCoLocationConstraint(),
									throwable);
						}
					}
			);
		}
		// this future is complete once all slot futures are complete.
		// the future fails once one slot future fails.
		final FutureUtils.ConjunctFuture> allAllocationFutures = FutureUtils.combineAllInOrder(allocationFutures);

		CompletableFuture> returnFuture = allAllocationFutures
				.handle(
						(slots, throwable) -> {
							if (throwable != null) {
								LOG.info("Batch request {} slots, but only {} are fulfilled.",
										allAllocationFutures.getNumFuturesTotal(), allAllocationFutures.getNumFuturesCompleted(), throwable);

								// Complete all futures first, or else the execution fail may cause global failover,
								// and other executions may fail first without clear the pending request.
								List returnSlots = new ArrayList<>(allocationFutures.size());
								for (int i = 0; i < allocationFutures.size(); i++) {
									if (allocationFutures.get(i).completeExceptionally(throwable)) {
										returnSlots.add(i, null);
									} else {
										try {
											returnSlots.add(i, allocationFutures.get(i).get());
										} catch (Exception e) {
											returnSlots.add(i, null);
										}
									}
								}
								return returnSlots;
							} else {
								return slots;
							}
						}
				);
		returnFuture.exceptionally(
				throwable -> {
					if (throwable instanceof CancellationException) {
						for (int i = 0; i < allocationFutures.size(); i++) {
							if (!allocationFutures.get(i).completeExceptionally(throwable)) {
								try {
									LogicalSlot slot = allocationFutures.get(i).get();
									if (slot != null) {
										slot.releaseSlot(throwable);
									}
								} catch (Exception e) {
								}
							}
						}
						return null;
					} else {
						throw new CompletionException(throwable);
					}
				});
		return returnFuture;
	}
}