org.apache.flink.runtime.jobmanager.scheduler.Scheduler Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobmanager.scheduler;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.LinkedBlockingQueue;

import akka.dispatch.Futures;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

import org.apache.flink.runtime.instance.SlotSharingGroupAssignment;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.instance.SharedSlot;
import org.apache.flink.runtime.instance.SimpleSlot;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.instance.Instance;
import org.apache.flink.runtime.instance.InstanceDiedException;
import org.apache.flink.runtime.instance.InstanceListener;
import org.apache.flink.util.ExceptionUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.ExecutionContext;

/**
 * The scheduler is responsible for distributing the ready-to-run tasks among instances and slots.
 * 
 * The scheduler supports two scheduling modes:
 * 
 *     Immediate scheduling: A request for a task slot immediately returns a task slot, if one is
 *         available, or throws a {@link NoResourceAvailableException}.
 *     Queued Scheduling: A request for a task slot is queued and returns a future that will be
 *         fulfilled as soon as a slot becomes available.
 * 
 */
public class Scheduler implements InstanceListener, SlotAvailabilityListener {

	/** Scheduler-wide logger */
	private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class);
	
	
	/** All modifications to the scheduler structures are performed under a global scheduler lock */
	private final Object globalLock = new Object();
	
	/** All instances that the scheduler can deploy to */
	private final Set allInstances = new HashSet();
	
	/** All instances by hostname */
	private final HashMap> allInstancesByHost = new HashMap>();
	
	/** All instances that still have available resources */
	private final Queue instancesWithAvailableResources = new SetQueue();
	
	/** All tasks pending to be scheduled */
	private final Queue taskQueue = new ArrayDeque();
	
	
	private final BlockingQueue newlyAvailableInstances = new LinkedBlockingQueue();
	
	/** The number of slot allocations that had no location preference */
	private int unconstrainedAssignments;

	/** The number of slot allocations where locality could be respected */
	private int localizedAssignments;

	/** The number of slot allocations where locality could not be respected */
	private int nonLocalizedAssignments;

	/** The ExecutionContext which is used to execute newSlotAvailable futures. */
	private final ExecutionContext executionContext;

	// ------------------------------------------------------------------------

	/**
	 * Creates a new scheduler.
	 */
	public Scheduler(ExecutionContext executionContext) {
		this.executionContext = executionContext;
	}
	
	/**
	 * Shuts the scheduler down. After shut down no more tasks can be added to the scheduler.
	 */
	public void shutdown() {
		synchronized (globalLock) {
			for (Instance i : allInstances) {
				i.removeSlotListener();
				i.cancelAndReleaseAllSlots();
			}
			allInstances.clear();
			allInstancesByHost.clear();
			instancesWithAvailableResources.clear();
			taskQueue.clear();
		}
	}

	// ------------------------------------------------------------------------
	//  Scheduling
	// ------------------------------------------------------------------------
	
	public SimpleSlot scheduleImmediately(ScheduledUnit task) throws NoResourceAvailableException {
		Object ret = scheduleTask(task, false);
		if (ret instanceof SimpleSlot) {
			return (SimpleSlot) ret;
		}
		else {
			throw new RuntimeException();
		}
	}
	
	public SlotAllocationFuture scheduleQueued(ScheduledUnit task) throws NoResourceAvailableException {
		Object ret = scheduleTask(task, true);
		if (ret instanceof SimpleSlot) {
			return new SlotAllocationFuture((SimpleSlot) ret);
		}
		if (ret instanceof SlotAllocationFuture) {
			return (SlotAllocationFuture) ret;
		}
		else {
			throw new RuntimeException();
		}
	}
	
	/**
	 * Returns either a {@link org.apache.flink.runtime.instance.SimpleSlot}, or a {@link SlotAllocationFuture}.
	 */
	private Object scheduleTask(ScheduledUnit task, boolean queueIfNoResource) throws NoResourceAvailableException {
		if (task == null) {
			throw new NullPointerException();
		}
		if (LOG.isDebugEnabled()) {
			LOG.debug("Scheduling task " + task);
		}

		final ExecutionVertex vertex = task.getTaskToExecute().getVertex();
		
		final Iterable preferredLocations = vertex.getPreferredLocations();
		final boolean forceExternalLocation = vertex.isScheduleLocalOnly() &&
									preferredLocations != null && preferredLocations.iterator().hasNext();
	
		synchronized (globalLock) {
			
			SlotSharingGroup sharingUnit = task.getSlotSharingGroup();
			
			if (sharingUnit != null) {

				// 1)  === If the task has a slot sharing group, schedule with shared slots ===
				
				if (queueIfNoResource) {
					throw new IllegalArgumentException(
							"A task with a vertex sharing group was scheduled in a queued fashion.");
				}
				
				final SlotSharingGroupAssignment assignment = sharingUnit.getTaskAssignment();
				final CoLocationConstraint constraint = task.getLocationConstraint();
				
				// sanity check that we do not use an externally forced location and a co-location constraint together
				if (constraint != null && forceExternalLocation) {
					throw new IllegalArgumentException("The scheduling cannot be constrained simultaneously by a "
							+ "co-location constraint and an external location constraint.");
				}
				
				// get a slot from the group, if the group has one for us (and can fulfill the constraint)
				final SimpleSlot slotFromGroup;
				if (constraint == null) {
					slotFromGroup = assignment.getSlotForTask(vertex);
				}
				else {
					slotFromGroup = assignment.getSlotForTask(vertex, constraint);
				}

				SimpleSlot newSlot = null;
				SimpleSlot toUse = null;

				// the following needs to make sure any allocated slot is released in case of an error
				try {
					
					// check whether the slot from the group is already what we want.
					// any slot that is local, or where the assignment was unconstrained is good!
					if (slotFromGroup != null && slotFromGroup.getLocality() != Locality.NON_LOCAL) {
						
						// if this is the first slot for the co-location constraint, we lock
						// the location, because we are quite happy with the slot
						if (constraint != null && !constraint.isAssigned()) {
							constraint.lockLocation();
						}
						
						updateLocalityCounters(slotFromGroup, vertex);
						return slotFromGroup;
					}
					
					// the group did not have a local slot for us. see if we can one (or a better one)
					
					// our location preference is either determined by the location constraint, or by the
					// vertex's preferred locations
					final Iterable locations;
					final boolean localOnly;
					if (constraint != null && constraint.isAssigned()) {
						locations = Collections.singleton(constraint.getLocation());
						localOnly = true;
					}
					else {
						locations = vertex.getPreferredLocations();
						localOnly = forceExternalLocation;
					}
					
					newSlot = getNewSlotForSharingGroup(vertex, locations, assignment, constraint, localOnly);

					if (newSlot == null) {
						if (slotFromGroup == null) {
							// both null, which means there is nothing available at all
							
							if (constraint != null && constraint.isAssigned()) {
								// nothing is available on the node where the co-location constraint forces us to
								throw new NoResourceAvailableException("Could not allocate a slot on instance " +
										constraint.getLocation() + ", as required by the co-location constraint.");
							}
							else if (forceExternalLocation) {
								// could not satisfy the external location constraint
								String hosts = getHostnamesFromInstances(preferredLocations);
								throw new NoResourceAvailableException("Could not schedule task " + vertex
										+ " to any of the required hosts: " + hosts);
							}
							else {
								// simply nothing is available
								throw new NoResourceAvailableException(task, getNumberOfAvailableInstances(),
										getTotalNumberOfSlots(), getNumberOfAvailableSlots());
							}
						}
						else {
							// got a non-local from the group, and no new one, so we use the non-local
							// slot from the sharing group
							toUse = slotFromGroup;
						}
					}
					else if (slotFromGroup == null || !slotFromGroup.isAlive() || newSlot.getLocality() == Locality.LOCAL) {
						// if there is no slot from the group, or the new slot is local,
						// then we use the new slot
						if (slotFromGroup != null) {
							slotFromGroup.releaseSlot();
						}
						toUse = newSlot;
					}
					else {
						// both are available and usable. neither is local. in that case, we may
						// as well use the slot from the sharing group, to minimize the number of
						// instances that the job occupies
						newSlot.releaseSlot();
						toUse = slotFromGroup;
					}

					// if this is the first slot for the co-location constraint, we lock
					// the location, because we are going to use that slot
					if (constraint != null && !constraint.isAssigned()) {
						constraint.lockLocation();
					}
					
					updateLocalityCounters(toUse, vertex);
				}
				catch (NoResourceAvailableException e) {
					throw e;
				}
				catch (Throwable t) {
					if (slotFromGroup != null) {
						slotFromGroup.releaseSlot();
					}
					if (newSlot != null) {
						newSlot.releaseSlot();
					}

					ExceptionUtils.rethrow(t, "An error occurred while allocating a slot in a sharing group");
				}

				return toUse;
			}
			else {
				
				// 2) === schedule without hints and sharing ===
				
				SimpleSlot slot = getFreeSlotForTask(vertex, preferredLocations, forceExternalLocation);
				if (slot != null) {
					updateLocalityCounters(slot, vertex);
					return slot;
				}
				else {
					// no resource available now, so queue the request
					if (queueIfNoResource) {
						SlotAllocationFuture future = new SlotAllocationFuture();
						this.taskQueue.add(new QueuedTask(task, future));
						return future;
					}
					else if (forceExternalLocation) {
						String hosts = getHostnamesFromInstances(preferredLocations);
						throw new NoResourceAvailableException("Could not schedule task " + vertex
								+ " to any of the required hosts: " + hosts);
					}
					else {
						throw new NoResourceAvailableException(getNumberOfAvailableInstances(),
								getTotalNumberOfSlots(), getNumberOfAvailableSlots());
					}
				}
			}
		}
	}
	
	/**
	 * Gets a suitable instance to schedule the vertex execution to.
	 * 
	 * NOTE: This method does is not thread-safe, it needs to be synchronized by the caller.
	 * 
	 * @param vertex The task to run. 
	 * @return The instance to run the vertex on, it {@code null}, if no instance is available.
	 */
	protected SimpleSlot getFreeSlotForTask(ExecutionVertex vertex,
											Iterable requestedLocations,
											boolean localOnly) {
		// we need potentially to loop multiple times, because there may be false positives
		// in the set-with-available-instances
		while (true) {
			Pair instanceLocalityPair = findInstance(requestedLocations, localOnly);

			if (instanceLocalityPair == null){
				return null;
			}

			Instance instanceToUse = instanceLocalityPair.getLeft();
			Locality locality = instanceLocalityPair.getRight();

			try {
				SimpleSlot slot = instanceToUse.allocateSimpleSlot(vertex.getJobId());
				
				// if the instance has further available slots, re-add it to the set of available resources.
				if (instanceToUse.hasResourcesAvailable()) {
					this.instancesWithAvailableResources.add(instanceToUse);
				}
				
				if (slot != null) {
					slot.setLocality(locality);
					return slot;
				}
			}
			catch (InstanceDiedException e) {
				// the instance died it has not yet been propagated to this scheduler
				// remove the instance from the set of available instances
				removeInstance(instanceToUse);
			}
			
			// if we failed to get a slot, fall through the loop
		}
	}

	/**
	 * Tries to allocate a new slot for a vertex that is part of a slot sharing group. If one
	 * of the instances has a slot available, the method will allocate it as a shared slot, add that
	 * shared slot to the sharing group, and allocate a simple slot from that shared slot.
	 * 
	 * 
This method will try to allocate a slot from one of the local instances, and fall back to
	 * non-local instances, if permitted.
	 * 
	 * @param vertex The vertex to allocate the slot for.
	 * @param requestedLocations The locations that are considered local. May be null or empty, if the
	 *                           vertex has no location preferences.
	 * @param groupAssignment The slot sharing group of the vertex. Mandatory parameter.
	 * @param constraint The co-location constraint of the vertex. May be null.
	 * @param localOnly Flag to indicate if non-local choices are acceptable.
	 * 
	 * @return A sub-slot for the given vertex, or {@code null}, if no slot is available.
	 */
	protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex,
													Iterable requestedLocations,
													SlotSharingGroupAssignment groupAssignment,
													CoLocationConstraint constraint,
													boolean localOnly)
	{
		// we need potentially to loop multiple times, because there may be false positives
		// in the set-with-available-instances
		while (true) {
			Pair instanceLocalityPair = findInstance(requestedLocations, localOnly);
			
			if (instanceLocalityPair == null) {
				// nothing is available
				return null;
			}

			final Instance instanceToUse = instanceLocalityPair.getLeft();
			final Locality locality = instanceLocalityPair.getRight();

			try {
				JobVertexID groupID = vertex.getJobvertexId();
				
				// allocate a shared slot from the instance
				SharedSlot sharedSlot = instanceToUse.allocateSharedSlot(vertex.getJobId(), groupAssignment);

				// if the instance has further available slots, re-add it to the set of available resources.
				if (instanceToUse.hasResourcesAvailable()) {
					this.instancesWithAvailableResources.add(instanceToUse);
				}

				if (sharedSlot != null) {
					// add the shared slot to the assignment group and allocate a sub-slot
					SimpleSlot slot = constraint == null ?
							groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, groupID) :
							groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, constraint);

					if (slot != null) {
						return slot;
					}
					else {
						// could not add and allocate the sub-slot, so release shared slot
						sharedSlot.releaseSlot();
					}
				}
			}
			catch (InstanceDiedException e) {
				// the instance died it has not yet been propagated to this scheduler
				// remove the instance from the set of available instances
				removeInstance(instanceToUse);
			}

			// if we failed to get a slot, fall through the loop
		}
	}

	/**
	 * Tries to find a requested instance. If no such instance is available it will return a non-
	 * local instance. If no such instance exists (all slots occupied), then return null.
	 * 
	 * NOTE: This method is not thread-safe, it needs to be synchronized by the caller.
	 *
	 * @param requestedLocations The list of preferred instances. May be null or empty, which indicates that
	 *                           no locality preference exists.   
	 * @param localOnly Flag to indicate whether only one of the exact local instances can be chosen.  
	 */
	private Pair findInstance(Iterable requestedLocations, boolean localOnly){
		
		// drain the queue of newly available instances
		while (this.newlyAvailableInstances.size() > 0) {
			Instance queuedInstance = this.newlyAvailableInstances.poll();
			if (queuedInstance != null) {
				this.instancesWithAvailableResources.add(queuedInstance);
			}
		}
		
		// if nothing is available at all, return null
		if (this.instancesWithAvailableResources.isEmpty()) {
			return null;
		}

		Iterator locations = requestedLocations == null ? null : requestedLocations.iterator();

		if (locations != null && locations.hasNext()) {
			// we have a locality preference

			while (locations.hasNext()) {
				Instance location = locations.next();
				if (location != null && this.instancesWithAvailableResources.remove(location)) {
					return new ImmutablePair(location, Locality.LOCAL);
				}
			}
			
			// no local instance available
			if (localOnly) {
				return null;
			}
			else {
				Instance instanceToUse = this.instancesWithAvailableResources.poll();
				return new ImmutablePair(instanceToUse, Locality.NON_LOCAL);
			}
		}
		else {
			// no location preference, so use some instance
			Instance instanceToUse = this.instancesWithAvailableResources.poll();
			return new ImmutablePair(instanceToUse, Locality.UNCONSTRAINED);
		}
	}
	
	@Override
	public void newSlotAvailable(final Instance instance) {
		
		// WARNING: The asynchrony here is necessary, because  we cannot guarantee the order
		// of lock acquisition (global scheduler, instance) and otherwise lead to potential deadlocks:
		// 
		// -> The scheduler needs to grab them (1) global scheduler lock
		//                                     (2) slot/instance lock
		// -> The slot releasing grabs (1) slot/instance (for releasing) and
		//                             (2) scheduler (to check whether to take a new task item
		// 
		// that leads with a high probability to deadlocks, when scheduling fast

		this.newlyAvailableInstances.add(instance);

		Futures.future(new Callable