org.apache.flink.runtime.jobmanager.scheduler.Scheduler Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager.scheduler;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.LinkedBlockingQueue;
import akka.dispatch.Futures;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.flink.runtime.instance.SlotSharingGroupAssignment;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.instance.SharedSlot;
import org.apache.flink.runtime.instance.SimpleSlot;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.instance.Instance;
import org.apache.flink.runtime.instance.InstanceDiedException;
import org.apache.flink.runtime.instance.InstanceListener;
import org.apache.flink.util.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.ExecutionContext;
/**
* The scheduler is responsible for distributing the ready-to-run tasks among instances and slots.
*
* The scheduler supports two scheduling modes:
*
* - Immediate scheduling: A request for a task slot immediately returns a task slot, if one is
* available, or throws a {@link NoResourceAvailableException}.
* - Queued Scheduling: A request for a task slot is queued and returns a future that will be
* fulfilled as soon as a slot becomes available.
*
*/
public class Scheduler implements InstanceListener, SlotAvailabilityListener {
/** Scheduler-wide logger */
private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class);
/** All modifications to the scheduler structures are performed under a global scheduler lock */
private final Object globalLock = new Object();
/** All instances that the scheduler can deploy to */
private final Set allInstances = new HashSet();
/** All instances by hostname */
private final HashMap> allInstancesByHost = new HashMap>();
/** All instances that still have available resources */
private final Queue instancesWithAvailableResources = new SetQueue();
/** All tasks pending to be scheduled */
private final Queue taskQueue = new ArrayDeque();
private final BlockingQueue newlyAvailableInstances = new LinkedBlockingQueue();
/** The number of slot allocations that had no location preference */
private int unconstrainedAssignments;
/** The number of slot allocations where locality could be respected */
private int localizedAssignments;
/** The number of slot allocations where locality could not be respected */
private int nonLocalizedAssignments;
/** The ExecutionContext which is used to execute newSlotAvailable futures. */
private final ExecutionContext executionContext;
// ------------------------------------------------------------------------
/**
* Creates a new scheduler.
*/
public Scheduler(ExecutionContext executionContext) {
this.executionContext = executionContext;
}
/**
* Shuts the scheduler down. After shut down no more tasks can be added to the scheduler.
*/
public void shutdown() {
synchronized (globalLock) {
for (Instance i : allInstances) {
i.removeSlotListener();
i.cancelAndReleaseAllSlots();
}
allInstances.clear();
allInstancesByHost.clear();
instancesWithAvailableResources.clear();
taskQueue.clear();
}
}
// ------------------------------------------------------------------------
// Scheduling
// ------------------------------------------------------------------------
public SimpleSlot scheduleImmediately(ScheduledUnit task) throws NoResourceAvailableException {
Object ret = scheduleTask(task, false);
if (ret instanceof SimpleSlot) {
return (SimpleSlot) ret;
}
else {
throw new RuntimeException();
}
}
public SlotAllocationFuture scheduleQueued(ScheduledUnit task) throws NoResourceAvailableException {
Object ret = scheduleTask(task, true);
if (ret instanceof SimpleSlot) {
return new SlotAllocationFuture((SimpleSlot) ret);
}
if (ret instanceof SlotAllocationFuture) {
return (SlotAllocationFuture) ret;
}
else {
throw new RuntimeException();
}
}
/**
* Returns either a {@link org.apache.flink.runtime.instance.SimpleSlot}, or a {@link SlotAllocationFuture}.
*/
private Object scheduleTask(ScheduledUnit task, boolean queueIfNoResource) throws NoResourceAvailableException {
if (task == null) {
throw new NullPointerException();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Scheduling task " + task);
}
final ExecutionVertex vertex = task.getTaskToExecute().getVertex();
final Iterable preferredLocations = vertex.getPreferredLocations();
final boolean forceExternalLocation = vertex.isScheduleLocalOnly() &&
preferredLocations != null && preferredLocations.iterator().hasNext();
synchronized (globalLock) {
SlotSharingGroup sharingUnit = task.getSlotSharingGroup();
if (sharingUnit != null) {
// 1) === If the task has a slot sharing group, schedule with shared slots ===
if (queueIfNoResource) {
throw new IllegalArgumentException(
"A task with a vertex sharing group was scheduled in a queued fashion.");
}
final SlotSharingGroupAssignment assignment = sharingUnit.getTaskAssignment();
final CoLocationConstraint constraint = task.getLocationConstraint();
// sanity check that we do not use an externally forced location and a co-location constraint together
if (constraint != null && forceExternalLocation) {
throw new IllegalArgumentException("The scheduling cannot be constrained simultaneously by a "
+ "co-location constraint and an external location constraint.");
}
// get a slot from the group, if the group has one for us (and can fulfill the constraint)
final SimpleSlot slotFromGroup;
if (constraint == null) {
slotFromGroup = assignment.getSlotForTask(vertex);
}
else {
slotFromGroup = assignment.getSlotForTask(vertex, constraint);
}
SimpleSlot newSlot = null;
SimpleSlot toUse = null;
// the following needs to make sure any allocated slot is released in case of an error
try {
// check whether the slot from the group is already what we want.
// any slot that is local, or where the assignment was unconstrained is good!
if (slotFromGroup != null && slotFromGroup.getLocality() != Locality.NON_LOCAL) {
// if this is the first slot for the co-location constraint, we lock
// the location, because we are quite happy with the slot
if (constraint != null && !constraint.isAssigned()) {
constraint.lockLocation();
}
updateLocalityCounters(slotFromGroup, vertex);
return slotFromGroup;
}
// the group did not have a local slot for us. see if we can one (or a better one)
// our location preference is either determined by the location constraint, or by the
// vertex's preferred locations
final Iterable locations;
final boolean localOnly;
if (constraint != null && constraint.isAssigned()) {
locations = Collections.singleton(constraint.getLocation());
localOnly = true;
}
else {
locations = vertex.getPreferredLocations();
localOnly = forceExternalLocation;
}
newSlot = getNewSlotForSharingGroup(vertex, locations, assignment, constraint, localOnly);
if (newSlot == null) {
if (slotFromGroup == null) {
// both null, which means there is nothing available at all
if (constraint != null && constraint.isAssigned()) {
// nothing is available on the node where the co-location constraint forces us to
throw new NoResourceAvailableException("Could not allocate a slot on instance " +
constraint.getLocation() + ", as required by the co-location constraint.");
}
else if (forceExternalLocation) {
// could not satisfy the external location constraint
String hosts = getHostnamesFromInstances(preferredLocations);
throw new NoResourceAvailableException("Could not schedule task " + vertex
+ " to any of the required hosts: " + hosts);
}
else {
// simply nothing is available
throw new NoResourceAvailableException(task, getNumberOfAvailableInstances(),
getTotalNumberOfSlots(), getNumberOfAvailableSlots());
}
}
else {
// got a non-local from the group, and no new one, so we use the non-local
// slot from the sharing group
toUse = slotFromGroup;
}
}
else if (slotFromGroup == null || !slotFromGroup.isAlive() || newSlot.getLocality() == Locality.LOCAL) {
// if there is no slot from the group, or the new slot is local,
// then we use the new slot
if (slotFromGroup != null) {
slotFromGroup.releaseSlot();
}
toUse = newSlot;
}
else {
// both are available and usable. neither is local. in that case, we may
// as well use the slot from the sharing group, to minimize the number of
// instances that the job occupies
newSlot.releaseSlot();
toUse = slotFromGroup;
}
// if this is the first slot for the co-location constraint, we lock
// the location, because we are going to use that slot
if (constraint != null && !constraint.isAssigned()) {
constraint.lockLocation();
}
updateLocalityCounters(toUse, vertex);
}
catch (NoResourceAvailableException e) {
throw e;
}
catch (Throwable t) {
if (slotFromGroup != null) {
slotFromGroup.releaseSlot();
}
if (newSlot != null) {
newSlot.releaseSlot();
}
ExceptionUtils.rethrow(t, "An error occurred while allocating a slot in a sharing group");
}
return toUse;
}
else {
// 2) === schedule without hints and sharing ===
SimpleSlot slot = getFreeSlotForTask(vertex, preferredLocations, forceExternalLocation);
if (slot != null) {
updateLocalityCounters(slot, vertex);
return slot;
}
else {
// no resource available now, so queue the request
if (queueIfNoResource) {
SlotAllocationFuture future = new SlotAllocationFuture();
this.taskQueue.add(new QueuedTask(task, future));
return future;
}
else if (forceExternalLocation) {
String hosts = getHostnamesFromInstances(preferredLocations);
throw new NoResourceAvailableException("Could not schedule task " + vertex
+ " to any of the required hosts: " + hosts);
}
else {
throw new NoResourceAvailableException(getNumberOfAvailableInstances(),
getTotalNumberOfSlots(), getNumberOfAvailableSlots());
}
}
}
}
}
/**
* Gets a suitable instance to schedule the vertex execution to.
*
* NOTE: This method does is not thread-safe, it needs to be synchronized by the caller.
*
* @param vertex The task to run.
* @return The instance to run the vertex on, it {@code null}, if no instance is available.
*/
protected SimpleSlot getFreeSlotForTask(ExecutionVertex vertex,
Iterable requestedLocations,
boolean localOnly) {
// we need potentially to loop multiple times, because there may be false positives
// in the set-with-available-instances
while (true) {
Pair instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null){
return null;
}
Instance instanceToUse = instanceLocalityPair.getLeft();
Locality locality = instanceLocalityPair.getRight();
try {
SimpleSlot slot = instanceToUse.allocateSimpleSlot(vertex.getJobId());
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.add(instanceToUse);
}
if (slot != null) {
slot.setLocality(locality);
return slot;
}
}
catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
/**
* Tries to allocate a new slot for a vertex that is part of a slot sharing group. If one
* of the instances has a slot available, the method will allocate it as a shared slot, add that
* shared slot to the sharing group, and allocate a simple slot from that shared slot.
*
* This method will try to allocate a slot from one of the local instances, and fall back to
* non-local instances, if permitted.
*
* @param vertex The vertex to allocate the slot for.
* @param requestedLocations The locations that are considered local. May be null or empty, if the
* vertex has no location preferences.
* @param groupAssignment The slot sharing group of the vertex. Mandatory parameter.
* @param constraint The co-location constraint of the vertex. May be null.
* @param localOnly Flag to indicate if non-local choices are acceptable.
*
* @return A sub-slot for the given vertex, or {@code null}, if no slot is available.
*/
protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex,
Iterable requestedLocations,
SlotSharingGroupAssignment groupAssignment,
CoLocationConstraint constraint,
boolean localOnly)
{
// we need potentially to loop multiple times, because there may be false positives
// in the set-with-available-instances
while (true) {
Pair instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null) {
// nothing is available
return null;
}
final Instance instanceToUse = instanceLocalityPair.getLeft();
final Locality locality = instanceLocalityPair.getRight();
try {
JobVertexID groupID = vertex.getJobvertexId();
// allocate a shared slot from the instance
SharedSlot sharedSlot = instanceToUse.allocateSharedSlot(vertex.getJobId(), groupAssignment);
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.add(instanceToUse);
}
if (sharedSlot != null) {
// add the shared slot to the assignment group and allocate a sub-slot
SimpleSlot slot = constraint == null ?
groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, groupID) :
groupAssignment.addSharedSlotAndAllocateSubSlot(sharedSlot, locality, constraint);
if (slot != null) {
return slot;
}
else {
// could not add and allocate the sub-slot, so release shared slot
sharedSlot.releaseSlot();
}
}
}
catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
/**
* Tries to find a requested instance. If no such instance is available it will return a non-
* local instance. If no such instance exists (all slots occupied), then return null.
*
* NOTE: This method is not thread-safe, it needs to be synchronized by the caller.
*
* @param requestedLocations The list of preferred instances. May be null or empty, which indicates that
* no locality preference exists.
* @param localOnly Flag to indicate whether only one of the exact local instances can be chosen.
*/
private Pair findInstance(Iterable requestedLocations, boolean localOnly){
// drain the queue of newly available instances
while (this.newlyAvailableInstances.size() > 0) {
Instance queuedInstance = this.newlyAvailableInstances.poll();
if (queuedInstance != null) {
this.instancesWithAvailableResources.add(queuedInstance);
}
}
// if nothing is available at all, return null
if (this.instancesWithAvailableResources.isEmpty()) {
return null;
}
Iterator locations = requestedLocations == null ? null : requestedLocations.iterator();
if (locations != null && locations.hasNext()) {
// we have a locality preference
while (locations.hasNext()) {
Instance location = locations.next();
if (location != null && this.instancesWithAvailableResources.remove(location)) {
return new ImmutablePair(location, Locality.LOCAL);
}
}
// no local instance available
if (localOnly) {
return null;
}
else {
Instance instanceToUse = this.instancesWithAvailableResources.poll();
return new ImmutablePair(instanceToUse, Locality.NON_LOCAL);
}
}
else {
// no location preference, so use some instance
Instance instanceToUse = this.instancesWithAvailableResources.poll();
return new ImmutablePair(instanceToUse, Locality.UNCONSTRAINED);
}
}
@Override
public void newSlotAvailable(final Instance instance) {
// WARNING: The asynchrony here is necessary, because we cannot guarantee the order
// of lock acquisition (global scheduler, instance) and otherwise lead to potential deadlocks:
//
// -> The scheduler needs to grab them (1) global scheduler lock
// (2) slot/instance lock
// -> The slot releasing grabs (1) slot/instance (for releasing) and
// (2) scheduler (to check whether to take a new task item
//
// that leads with a high probability to deadlocks, when scheduling fast
this.newlyAvailableInstances.add(instance);
Futures.future(new Callable