Maven / Gradle / Ivy
* Copyright 2015 Netflix, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
* A scheduling service that you can use to optimize the assignment of tasks to hosts within a Mesos framework.
* Call the {@link #scheduleOnce scheduleOnce()} method with a list of task requests and a list of new resource
* lease offers, and that method will return a set of task assignments.
* The {@code TaskScheduler} stores any unused lease offers and will apply them during future calls to
* {@code scheduleOnce()} until a time expires, which is defined by the lease offer expiry time that you set
* when you build the {@code TaskScheduler} (the default is 10 seconds). Upon reaching the expiry time, the
* {@code TaskScheduler} rejects expired resource lease offers by invoking the action you supplied then you
* built the {@code TaskScheduler}.
* Note that when you launch a task that has been scheduled by the {@code TaskScheduler}, you should call
* the task assigner action available from the {@link #getTaskAssigner getTaskAssigner()} method. When that
* task completes, you should call the task unassigner action available from the
* {@link #getTaskUnAssigner getTaskUnAssigner()} method. These actions make the {@code TaskScheduler} keep
* track of launched tasks. The {@code TaskScheduler} then makes these tracked tasks available to its
* scheduling optimization functions.
* Do not call the scheduler concurrently. The scheduler assigns tasks in the order that they are received in a
* particular list. It checks each task against available resources until it finds a match.
* You create your {@code TaskScheduler} by means of the {@link TaskScheduler.Builder}. It provides methods with
* which you can adjust the scheduler's autoscaling rules, fitness calculators, and so forth.
* @see Wikipedia: Builder pattern
public class TaskScheduler {
private static final int PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE = 30;
* The Builder is how you construct a {@link TaskScheduler} object with particular characteristics. Chain
* its methods and then call {@link #build build()} to create a {@code TaskScheduler}.
* @see Wikipedia: Builder pattern
public final static class Builder {
private Action1 leaseRejectAction=null;
private long leaseOfferExpirySecs=120;
private int maxOffersToReject=4;
private boolean rejectAllExpiredOffers=false;
private VMTaskFitnessCalculator fitnessCalculator = new DefaultFitnessCalculator();
private String autoScaleByAttributeName=null;
private String autoScalerMapHostnameAttributeName=null;
private String autoScaleDownBalancedByAttributeName=null;
private ScaleDownOrderEvaluator scaleDownOrderEvaluator;
private Map weightedScaleDownConstraintEvaluators;
private Action1 autoscalerCallback=null;
private long delayAutoscaleUpBySecs=0L;
private long delayAutoscaleDownBySecs=0L;
private List autoScaleRules=new ArrayList<>();
private Func1 isFitnessGoodEnoughFunction = new Func1() {
public Boolean call(Double f) {
return f>1.0;
private boolean disableShortfallEvaluation=false;
private Map resAllocs=null;
private boolean singleOfferMode=false;
* (Required) Call this method to establish a method that your task scheduler will call to notify you
* that it has rejected a resource offer. In this method, you should tell Mesos that you are declining
* the associated offer.
* @param leaseRejectAction the action to trigger when the task scheduler rejects a VM lease, with the
* lease being rejected as the only argument
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withLeaseRejectAction(Action1 leaseRejectAction) {
this.leaseRejectAction = leaseRejectAction;
return this;
* Call this method to set the expiration time for resource offers. Your task scheduler will reject any
* offers that remain unused if this expiration period from the time of the offer expires. This ensures
* your scheduler will not hoard unuseful offers. The default is 120 seconds.
* @param leaseOfferExpirySecs the amount of time the scheduler will keep an unused lease available for
* a later-scheduled task before it considers the lease to have expired, in
* seconds
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withLeaseOfferExpirySecs(long leaseOfferExpirySecs) {
this.leaseOfferExpirySecs = leaseOfferExpirySecs;
return this;
* Call this method to set the maximum number of offers to reject within a time period equal to lease expiry
* seconds, set with {@code leaseOfferExpirySecs()}. Default is 4.
* @param maxOffersToReject Maximum number of offers to reject.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withMaxOffersToReject(int maxOffersToReject) {
this.maxOffersToReject = maxOffersToReject;
return this;
* Indicate that all offers older than the set expiry time must be rejected. By default this is set to false.
* If false, Fenzo rejects a maximum number of offers set using {@link #withMaxOffersToReject(int)} per each
* time period spanning the expiry time, set by {@link #withLeaseOfferExpirySecs(long)}.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withRejectAllExpiredOffers() {
this.rejectAllExpiredOffers = true;
this.maxOffersToReject = Integer.MAX_VALUE;
return this;
* Call this method to add a fitness calculator that your scheduler will use to compute the suitability
* of a particular host for a particular task. You can only add a single fitness calculator to a
* scheduler; if you attempt to add a second fitness calculator, it will override the first one.
* @param fitnessCalculator the fitness calculator you want this scheduler to use in its evaluations
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Fitness Calculators
public Builder withFitnessCalculator(VMTaskFitnessCalculator fitnessCalculator) {
this.fitnessCalculator = fitnessCalculator;
return this;
* Call this method to indicate which host attribute you want your task scheduler to use in order to
* distinguish which hosts are in which autoscaling groups. You must call this method before you call
* {@link #withAutoScaleRule(AutoScaleRule)}.
* @param name the name of the host attribute that defines which autoscaling group it is in
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
public Builder withAutoScaleByAttributeName(String name) {
this.autoScaleByAttributeName = name;
return this;
* Use the given host attribute name to determine the alternate hostname of virtual machine to use as an
* argument for an autoscaling action.
* In some circumstances (for instance with Amazon Web Services), the host name is not the correct
* identifier for the host in the context of an autoscaling action (for instance, in AWS, you need the
* EC2 instance identifier). If this is the case for your system, you need to implement a function that
* maps the host name to the identifier for the host in an autoscaling context so that Fenzo can perform
* autoscaling properly. You provide this function to the task manager by means of this builder method.
* @param name the attribute name to use as the alternate host identifier in an autoscaling context
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
public Builder withAutoScalerMapHostnameAttributeName(String name) {
this.autoScalerMapHostnameAttributeName = name;
return this;
* Call this method to tell the autoscaler to try to maintain a balance of host varieties when it scales
* down a cluster. Pass the method a host attribute, and the autoscaler will attempt to scale down in
* such a way as to maintain a similar number of hosts with each value for that attribute.
* @param name the name of the attribute
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
public Builder withAutoScaleDownBalancedByAttributeName(String name) {
this.autoScaleDownBalancedByAttributeName = name;
return this;
* Call this method to set {@link ScaleDownOrderEvaluator}.
* @param scaleDownOrderEvaluator scale down ordering evaluator
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withScaleDownOrderEvaluator(ScaleDownOrderEvaluator scaleDownOrderEvaluator) {
this.scaleDownOrderEvaluator = scaleDownOrderEvaluator;
return this;
* Ordered list of scale down constraints evaluators.
* @param weightedScaleDownConstraintEvaluators scale down evaluators
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withWeightedScaleDownConstraintEvaluators(Map weightedScaleDownConstraintEvaluators) {
this.weightedScaleDownConstraintEvaluators = weightedScaleDownConstraintEvaluators;
return this;
* Use the given function to determine if the fitness of a host for a task is good enough that the task
* scheduler should stop looking for a more fit host. Pass this method a function that takes a value
* between 0.0 (completely unfit) and 1.0 (perfectly fit) that describes the fitness of a particular
* host for a particular task, and decides, by returning a boolean value, whether that value is a "good
* enough" fit such that the task scheduler should go ahead and assign the task to the host. If you
* write this function to only return true for values at or near 1.0, the task scheduler will spend more
* time searching for a good fit; if you write the function to return true for lower values, the task
* scheduler will be able to find a host to assign the task to more quickly.
* By default, if you do not build your task scheduler by passing a function into this method, the
* task scheduler will always search all of the available hosts for the best possible fit for every
* task.
* @param f a single-argument function that accepts a double parameter, representing the fitness, and
* returns a {@code Boolean} indicating whether the fitness is good enough to constitute a
* successful match between the host and task
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withFitnessGoodEnoughFunction(Func1 f) {
this.isFitnessGoodEnoughFunction = f;
return this;
* Disable resource shortfall evaluation. The shortfall evaluation is performed when evaluating the
* autoscaling needs. This is useful for evaluating the actual resources needed to scale up by, for
* pending tasks, which may be greater than the number of resources scaled up by thresholds based scale
* up.
* This evaluation can be computaionally expensive and/or may scale up aggressively, initially, to more
* resources than needed. The initial aggressive scale up is corrected later by scale down, which is
* triggered by scale down evaluation after a cool down period transpires.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
public Builder disableShortfallEvaluation() {
disableShortfallEvaluation = true;
return this;
* Call this method to set the initial limitations on how many resources will be available to each task
* group.
* @param resAllocs a Map with the task group name as keys and resource allocation limits as values
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Resource Allocation
* Limits
public Builder withInitialResAllocs(Map resAllocs) {
this.resAllocs = resAllocs;
return this;
* Adds an autoscaling rule that governs the behavior by which this scheduler will autoscale hosts of a
* certain type. You can chain this method multiple times, adding a new autoscaling rule each time (one
* for each autoscale group).
* Before you call this method you must first call
* {@link #withAutoScaleByAttributeName withAutoScaleByAttributeName()} to indicate which host
* attribute you are using to identify which hosts are in which autoscaling groups.
* @param rule the autoscaling rule to add
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you have not properly initialized autoscaling or if your rule is
* poorly formed
* @see Autoscaling
public Builder withAutoScaleRule(AutoScaleRule rule) {
if(autoScaleByAttributeName==null || autoScaleByAttributeName.isEmpty())
throw new IllegalArgumentException("Auto scale by attribute name must be set before setting rules");
throw new IllegalArgumentException("Min Idle must be >0");
throw new IllegalArgumentException("Min Idle must be <= Max Idle hosts");
return this;
* The callback you pass to this method receives an indication when an autoscale action is to be
* performed. This indicates which autoscale rule prompted the action and whether the action is to scale
* up or scale down the autoscale group. The callback then initiates the appropriate scaling actions.
* @see Autoscaling
public Builder withAutoScalerCallback(Action1 callback) {
this.autoscalerCallback = callback;
return this;
* Delay the autoscale up actions to reduce unnecessary actions due to short periods of breach of scale up
* policy rules. Such scale ups can be caused by, for example, the periodic offer rejections that result in
* offers coming back shortly. They can also be caused by certain environments where tasks are first scheduled
* to replace existing tasks.
* The autoscaler takes the scale up action based on the latest scale up request value after the delay.
* The default is 0 secs. Ideally, you should set this to be at least two times the larger of the two values:
* - Delay between successive calls to {@link TaskScheduler#scheduleOnce(List, List)}.
* - Delay in get a rejected offer back from Mesos.
* @param delayAutoscaleUpBySecs Delay autoscale up actions by this many seconds.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}.
* @see Autoscaling
public Builder withDelayAutoscaleUpBySecs(long delayAutoscaleUpBySecs) {
if(delayAutoscaleUpBySecs < 0L)
throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleUpBySecs);
this.delayAutoscaleUpBySecs = delayAutoscaleUpBySecs;
return this;
* Delay the autoscale down actions to reduce unnecessary actions due to short periods of breach of scale down
* policy rules. Such scale downs can be caused by, for example, certain environments where existing tasks are
* removed before replacing them with new tasks.
* The autoscaler takes the scale down action based on the latest scale down request value after the delay.
* The default is 0 secs. Ideally, you should set this to be at least two times the delay before terminated
* tasks are replaced successfully.
* @param delayAutoscaleDownBySecs Delay autoscale down actions by this many seconds.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}.
* @see Autoscaling
public Builder withDelayAutoscaleDownBySecs(long delayAutoscaleDownBySecs) {
if(delayAutoscaleDownBySecs < 0L)
throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleDownBySecs);
this.delayAutoscaleDownBySecs = delayAutoscaleDownBySecs;
return this;
* Indicate that the cluster receives resource offers only once per VM (host). Normally, Mesos sends resource
* offers multiple times, as resources free up on the host upon completion of various tasks. This method
* provides an experimental support for a mode where Fenzo can be made aware of the entire set of resources
* on hosts once, in a model similar to Amazon ECS. Fenzo internally keeps track of total versus used resources
* on the host based on tasks assigned and then later unassigned. No further resource offers are expected after
* the initial one.
* @param b True if only one resource offer is expected per host, false by default.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
public Builder withSingleOfferPerVM(boolean b) {
this.singleOfferMode = b;
return this;
* Creates a {@link TaskScheduler} based on the various builder methods you have chained.
* @return a {@code TaskScheduler} built according to the specifications you indicated
public TaskScheduler build() {
if(scaleDownOrderEvaluator == null) {
if(weightedScaleDownConstraintEvaluators != null) {
scaleDownOrderEvaluator = new NoOpScaleDownOrderEvaluator();
} else {
if(weightedScaleDownConstraintEvaluators == null) {
weightedScaleDownConstraintEvaluators = Collections.emptyMap();
return new TaskScheduler(this);
private static class EvalResult {
List assignmentResults;
TaskAssignmentResult result;
int numAllocationTrials;
Exception exception;
private EvalResult(List assignmentResults, TaskAssignmentResult result, int numAllocationTrials, Exception e) {
this.assignmentResults = assignmentResults;
this.result = result;
this.numAllocationTrials = numAllocationTrials;
this.exception = e;
private final AssignableVMs assignableVMs;
private static final Logger logger = LoggerFactory.getLogger(TaskScheduler.class);
private static final long purgeVMsIntervalSecs = 60;
private long lastVMPurgeAt=System.currentTimeMillis();
private final Builder builder;
private final StateMonitor stateMonitor;
private final AutoScaler autoScaler;
private final int EXEC_SVC_THREADS=Runtime.getRuntime().availableProcessors();
private final ExecutorService executorService = Executors.newFixedThreadPool(EXEC_SVC_THREADS);
private final AtomicBoolean isShutdown = new AtomicBoolean();
private final ResAllocsEvaluater resAllocsEvaluator;
private final TaskTracker taskTracker;
private volatile boolean usingSchedulingService = false;
private final String usingSchedSvcMesg = "Invalid call when using task scheduling service";
private TaskScheduler(Builder builder) {
if(builder.leaseRejectAction ==null)
throw new IllegalArgumentException("Lease reject action must be non-null");
this.builder = builder;
this.stateMonitor = new StateMonitor();
taskTracker = new TaskTracker();
resAllocsEvaluator = new ResAllocsEvaluater(taskTracker, builder.resAllocs);
assignableVMs = new AssignableVMs(taskTracker, builder.leaseRejectAction,
builder.leaseOfferExpirySecs, builder.maxOffersToReject, builder.autoScaleByAttributeName,
builder.singleOfferMode, builder.autoScaleByAttributeName);
if(builder.autoScaleByAttributeName != null && !builder.autoScaleByAttributeName.isEmpty()) {
ScaleDownConstraintExecutor scaleDownConstraintExecutor = builder.scaleDownOrderEvaluator == null
? null : new ScaleDownConstraintExecutor(builder.scaleDownOrderEvaluator, builder.weightedScaleDownConstraintEvaluators);
autoScaler = new AutoScaler(builder.autoScaleByAttributeName, builder.autoScalerMapHostnameAttributeName,
builder.autoScaleRules, assignableVMs,
builder.disableShortfallEvaluation, assignableVMs.getActiveVmGroups(),
assignableVMs.getVmCollection(), scaleDownConstraintExecutor);
if(builder.autoscalerCallback != null)
if(builder.delayAutoscaleDownBySecs > 0L)
if(builder.delayAutoscaleUpBySecs > 0L)
else {
void checkIfShutdown() throws IllegalStateException {
throw new IllegalStateException("TaskScheduler already shutdown");
* Set the autoscale call back action. The callback you pass to this method receives an indication when an
* autoscale action is to be performed, telling it which autoscale rule prompted the action and whether the
* action is to scale up or scale down the autoscale group. The callback then initiates the appropriate
* scaling actions.
* @param callback the callback to invoke for autoscale actions
* @throws IllegalStateException if no autoscaler was established
* @see Autoscaling
public void setAutoscalerCallback(Action1 callback) throws IllegalStateException {
throw new IllegalStateException("No autoScaler setup");
public TaskTracker getTaskTracker() {
return taskTracker;
private TaskAssignmentResult getSuccessfulResult(List results) {
double bestFitness=0.0;
TaskAssignmentResult bestResult=null;
for(int r=results.size()-1; r>=0; r--) {
// change to using fitness value from assignment result
TaskAssignmentResult res = results.get(r);
if(res!=null && res.isSuccessful()) {
if(bestResult==null || res.getFitness()>bestFitness) {
bestFitness = res.getFitness();
bestResult = res;
return bestResult;
private boolean isGoodEnough(TaskAssignmentResult result) {
* Get the current mapping of resource allocations registered with the scheduler.
* @return current mapping of resource allocations
* @see Resource Allocation
* Limits
public Map getResAllocs() {
return resAllocsEvaluator.getResAllocs();
* Add a new resource allocation, or replace an existing one of the same name.
* @param resAllocs the resource allocation to add or replace
* @see Resource Allocation
* Limits
public void addOrReplaceResAllocs(ResAllocs resAllocs) {
* Remove a resource allocation associated with the specified name.
* @param groupName the name of the resource allocation to remove
* @see Resource Allocation
* Limits
public void removeResAllocs(String groupName) {
* Get the autoscale rules currently registered with the scheduler.
* @return a collection of currently registered autoscale rules
* @see Autoscaling
public Collection getAutoScaleRules() {
return Collections.emptyList();
return autoScaler.getRules();
* Add a new autoscale rule to those used by this scheduler. If a rule with the same name exists, it is
* replaced. This autoscale rule will be used the next time the scheduler invokes its autoscale action.
* @param rule the autoscale rule to add
* @see Autoscaling
public void addOrReplaceAutoScaleRule(AutoScaleRule rule) {
* Remove the autoscale rule associated with the given name from those used by the scheduler.
* @param ruleName name of the autoscale rule to remove
* @see Autoscaling
public void removeAutoScaleRule(String ruleName) {
/* package */ void setUsingSchedulingService(boolean b) {
usingSchedulingService = b;
/* package */ void setTaskToClusterAutoScalerMapGetter(Func1> getter) {
if (autoScaler != null)
/* package */ AutoScaler getAutoScaler() {
return autoScaler;
* Schedule a list of task requests by using any newly-added resource leases in addition to any
* previously-unused leases. This is the main scheduling method that attempts to assign resources to task
* requests. Resource leases are associated with a host name. A host can have zero or more leases. Leases
* that the scheduler does not use in this scheduling run it stores for later use until they expire.
* You cannot add a lease object with an Id equal to that of a stored lease object; {@code scheduleOnce()}
* will throw an {@code IllegalStateException}. Upon throwing this exception, if you provided multiple
* leases in the {@code newLeases} argument, the state of internally maintained list of unused leases will
* be in an indeterminate state - some of the leases may have been successfully added.
* The task scheduler rejects any expired leases before scheduling begins. Then, it combines all leases of a
* host to determine the total available resources on the host. The scheduler then tries each task request,
* in the order that they appear in the given list, for assignment against the available hosts until
* successful. For each task the scheduler returns either a successful assignment result, or, a set of
* assignment failures.
* After the scheduler evaluates all assignments, it will reject remaining leases if they are unused and
* their offer time is further in the past than lease expiration interval. This prevents the scheduler from
* hoarding leases. If you provided an autoscaler, the scheduler then launches autoscale evaluation to run
* asynchronously, which runs each registered autoscale rule based on its policy.
* The successful assignments contain hosts to which tasks have been successfully assigned and the offers for that
* host that were used for the assignments. Fenzo removes those offers from its internal state. Normally, you
* would use those offers to launch the tasks. For any reason if you do not launch those tasks, you must either
* reject the offers to Mesos, or, re-add them to Fenzo with the next call to {@link #scheduleOnce(List, List)}.
* Otherwise, those offers would be "leaked out".
* Unexpected exceptions may arise during scheduling, for example, due to uncaught exceptions in user provided
* plugins. The scheduling routine stops upon catching any unexpected exceptions. These exceptions are surfaced to
* you in one or both of two ways.
* - The returned result object will contain the exceptions encountered in
* {@link SchedulingResult#getExceptions()}. In this case, no assignments would have been made.
* - This method may throw {@code IllegalStateException} with its cause set to the uncaught exception. In this
* case the internal state of Fenzo will be undefined.
* @param requests a list of task requests to match with resources, in their given order
* @param newLeases new resource leases from hosts that the scheduler can use along with any previously
* ununsed leases
* @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries
* @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease
* again, or, if there was unexpected exception during the scheduling iteration, or, if using
* {@link TaskSchedulingService}, which will instead invoke scheduling from within. Unexpected exceptions
* can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown
* via the {@link #shutdown()} method.
public SchedulingResult scheduleOnce(
List extends TaskRequest> requests,
List newLeases) throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
final Iterator extends TaskRequest> iterator =
requests != null ?
requests.iterator() :
TaskIterator taskIterator = new TaskIterator() {
public Assignable next() {
if (iterator.hasNext())
return Assignable.success(;
return null;
return scheduleOnce(taskIterator, newLeases);
* Variant of {@link #scheduleOnce(List, List)} that takes a task iterator instead of task list.
* @param taskIterator Iterator for tasks to assign resources to.
* @param newLeases new resource leases from hosts that the scheduler can use along with any previously
* ununsed leases
* @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries
* @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease
* again, or, if there was unexpected exception during the scheduling iteration. For example, unexpected exceptions
* can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown
* via the {@link #shutdown()} method.
/* package */ SchedulingResult scheduleOnce(
TaskIterator taskIterator,
List newLeases) throws IllegalStateException {
try (AutoCloseable
ac = stateMonitor.enter()) {
long start = System.currentTimeMillis();
final SchedulingResult schedulingResult = doSchedule(taskIterator, newLeases);
if((lastVMPurgeAt + purgeVMsIntervalSecs*1000) < System.currentTimeMillis()) {
lastVMPurgeAt = System.currentTimeMillis();"Purging inactive VMs");
assignableVMs.purgeInactiveVMs( // explicitly exclude VMs that have assignments
schedulingResult.getResultMap() == null?
Collections.emptySet() :
new HashSet<>(schedulingResult.getResultMap().keySet())
schedulingResult.setRuntime(System.currentTimeMillis() - start);
return schedulingResult;
} catch (Exception e) {
logger.error("Error with scheduling run: " + e.getMessage(), e);
if(e instanceof IllegalStateException)
throw (IllegalStateException)e;
else {
logger.warn("Unexpected exception: " + e.getMessage());
throw new IllegalStateException("Unexpected exception during scheduling run: " + e.getMessage(), e);
private SchedulingResult doSchedule(
TaskIterator taskIterator,
List newLeases) throws Exception {
AtomicInteger rejectedCount = new AtomicInteger();
List avms = assignableVMs.prepareAndGetOrderedVMs(newLeases, rejectedCount);
logger.debug("Found " + avms.size() + " VMs with non-zero offers to assign from");
final boolean hasResAllocs = resAllocsEvaluator.prepare();
//"Got " + avms.size() + " AVMs to schedule on");
int totalNumAllocations=0;
Set failedTasksForAutoScaler = new HashSet<>();
Map resultMap = new HashMap<>(avms.size());
final SchedulingResult schedulingResult = new SchedulingResult(resultMap);
if(avms.isEmpty()) {
while (true) {
final Assignable extends TaskRequest> taskOrFailure =;
if (taskOrFailure == null)
} else {
while (true) {
final Assignable extends TaskRequest> taskOrFailure =;
//System.out.println("*************** TaskSched: task=" + (task == null? "null" : task.getId()));
if (taskOrFailure == null)
if(taskOrFailure.hasFailure()) {
Collections.singletonList(new TaskAssignmentResult(
TaskRequest task = taskOrFailure.getTask();
if(hasResAllocs) {
if(resAllocsEvaluator.taskGroupFailed(task.taskGroupName())) {
logger.debug("Resource allocation limits reached for task: " + task.getId());
final AssignmentFailure resAllocsFailure = resAllocsEvaluator.hasResAllocs(task);
if(resAllocsFailure != null) {
final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(),
task, false, Collections.singletonList(resAllocsFailure), null, 0.0));
schedulingResult.addFailures(task, failures);
failedTasksForAutoScaler.remove(task); // don't scale up for resAllocs failures
logger.debug("Resource allocation limit reached for task " + task.getId() + ": " + resAllocsFailure);
final AssignmentFailure maxResourceFailure = assignableVMs.getFailedMaxResource(null, task);
if(maxResourceFailure != null) {
final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(), task, false,
Collections.singletonList(maxResourceFailure), null, 0.0));
schedulingResult.addFailures(task, failures);
logger.debug("Task " + task.getId() + ": maxResource failure: " + maxResourceFailure);
// create batches of VMs to evaluate assignments concurrently across the batches
final BlockingQueue virtualMachines = new ArrayBlockingQueue<>(avms.size(), false, avms);
int nThreads = (int)Math.ceil((double)avms.size()/ PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE);
List> futures = new ArrayList<>();
logger.debug("Launching " + nThreads + " threads for evaluating assignments for task " + task.getId());
for(int b=0; b() {
public EvalResult call() throws Exception {
return evalAssignments(task, virtualMachines);
List results = new ArrayList<>();
List bestResults = new ArrayList<>();
for(Future f: futures) {
try {
EvalResult evalResult = f.get();
if(evalResult.exception!=null) {
logger.warn("Error during concurrent task assignment eval - " + evalResult.exception.getMessage(),
else {
logger.debug("Task " + task.getId() + ": best result so far: " + evalResult.result);
totalNumAllocations += evalResult.numAllocationTrials;
} catch (InterruptedException|ExecutionException e) {
logger.error("Unexpected during concurrent task assignment eval - " + e.getMessage(), e);
TaskAssignmentResult successfulResult = getSuccessfulResult(bestResults);
List failures = new ArrayList<>();
if(successfulResult == null) {
logger.debug("Task " + task.getId() + ": no successful results");
for(EvalResult er: results)
schedulingResult.addFailures(task, failures);
else {
logger.debug("Task " + task.getId() + ": found successful assignment on host " + successfulResult.getHostname());
List idleResourcesList = new ArrayList<>();
if(schedulingResult.getExceptions().isEmpty()) {
List expirableLeases = new ArrayList<>();
for (AssignableVirtualMachine avm : avms) {
VMAssignmentResult assignmentResult = avm.resetAndGetSuccessfullyAssignedRequests();
if (assignmentResult == null) {
if (!avm.hasPreviouslyAssignedTasks())
} else {
resultMap.put(avm.getHostname(), assignmentResult);
final AutoScalerInput autoScalerInput = new AutoScalerInput(idleResourcesList, failedTasksForAutoScaler);
if (autoScaler != null)
return schedulingResult;
/* package */ Map> createPseudoHosts(Map groupCounts) {
return assignableVMs.createPseudoHosts(groupCounts, autoScaler == null? name -> null : autoScaler::getRule);
/* package */ void removePsuedoHosts(Map> hostsMap) {
/* package */ void removePsuedoAssignments() {
taskTracker.clearAssignedTasks(); // this should suffice for pseudo assignments
* Returns the state of resources on all known hosts. You can use this for debugging or informational
* purposes (occasionally). This method obtains and holds a lock for the duration of creating the state
* information. Scheduling runs are blocked around the lock.
* @return a Map of state information with the hostname as the key and a Map of resource state as the value.
* The resource state Map contains a resource as the key and a two element Double array - the first
* element of which contains the amount of the resource used and the second element contains the
* amount still available (available does not include used).
* @see How to Learn Which Resources Are Available on Which Hosts
* @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when
* using a {@link TaskSchedulingService}.
public Map> getResourceStatus() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getResourceStatusIntl();
/* package */ Map> getResourceStatusIntl() {
try (AutoCloseable ac = stateMonitor.enter()) {
return assignableVMs.getResourceStatus();
} catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage());
throw new RuntimeException(e);
* Returns the current state of all known hosts. You might occasionally use this for debugging or
* informational purposes. If you call this method, it will obtain and hold a lock for as long as it takes
* to create the state information. Scheduling runs are blocked around the lock.
* @return a list containing the current state of all known VMs
* @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when
* using a {@link TaskSchedulingService}.
* @see How to Learn the Amount of Resources Currently Available on Particular Hosts
public List getVmCurrentStates() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getVmCurrentStatesIntl();
/* package */ List getVmCurrentStatesIntl() throws IllegalStateException {
try (AutoCloseable ac = stateMonitor.enter()) {
return assignableVMs.getVmCurrentStates();
catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage(), e);
throw new IllegalStateException(e);
private EvalResult evalAssignments(TaskRequest task, BlockingQueue virtualMachines) {
// This number below sort of controls minimum machines to eval, choose carefully.
// Having it too small increases overhead of getting next machine to evaluate on.
// Having it too high increases latency of thread before it returns when done
try {
int N=10;
List buf = new ArrayList<>(N);
List results = new ArrayList<>();
while(true) {
int n = virtualMachines.drainTo(buf, N);
if(n == 0)
return new EvalResult(results, getSuccessfulResult(results), results.size(), null);
for(int m=0; m
* In addition, in your framework's task completion callback that you supply to Mesos, you must call your
* task scheduler's {@link #getTaskUnAssigner() getTaskUnassigner().call()} method to notify Fenzo that the
* task is no longer assigned.
* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts
* have resource offers available, but also which tasks have previously been assigned and are currently
* running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers.
* Note that you may not call the task assigner action concurrently with
* {@link #scheduleOnce(java.util.List, java.util.List) scheduleOnce()}. If you do so, the task assigner
* action will throw an {@code IllegalStateException}.
* @return a task assigner action
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
public Action2 getTaskAssigner() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getTaskAssignerIntl();
/* package */Action2 getTaskAssignerIntl() throws IllegalStateException {
return new Action2() {
public void call(TaskRequest request, String hostname) {
try (AutoCloseable ac = stateMonitor.enter()) {
assignableVMs.setTaskAssigned(request, hostname);
} catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage(), e);
throw new IllegalStateException(e);
* Get the task unassigner action. Call this object's {@code call()} method to unassign an assignment you
* have previously set for each task that completes so that internal state is maintained correctly. Pass two
* String arguments to this call method: the taskId and the hostname.
* For each task you assign and launch, you must call your task scheduler's
* {@link #getTaskAssigner() getTaskAssigner().call()} method in order to notify Fenzo that the task has
* actually been deployed on a host.
* In addition, in your framework's task completion callback that you supply to Mesos, you must call your
* task scheduler's {@code getTaskUnassigner().call()} method to notify Fenzo that the
* task is no longer assigned.
* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts
* have resource offers available, but also which tasks have previously been assigned and are currently
* running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers.
* This method is safe to be called concurrently with other calls to {@code TaskScheduler}. The tasks to be
* unassigned are stored internally and actually unassigned at the beginning of the next scheduling iteration,
* that is, the next time {@link #scheduleOnce(List, List)} is called.
* @return the task un-assigner action
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
public Action2 getTaskUnAssigner() throws IllegalStateException {
return new Action2() {
public void call(String taskId, String hostname) {
assignableVMs.unAssignTask(taskId, hostname);
* Disable the virtual machine with the specified hostname. If the scheduler is not yet aware of the host
* with that hostname, it creates a new object for it, and therefore your disabling of it will be remembered
* when offers that concern that host come in later. The scheduler will not use disabled hosts for
* allocating resources to tasks.
* @param hostname the name of the host to disable
* @param durationMillis the length of time, starting from now, in milliseconds, during which the host will
* be disabled
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
public void disableVM(String hostname, long durationMillis) throws IllegalStateException {"Disable VM " + hostname + " for " + durationMillis + " millis");
assignableVMs.disableUntil(hostname, System.currentTimeMillis()+durationMillis);
* Disable the virtual machine with the specified ID. If the scheduler is not yet aware of the host with
* that hostname, it creates a new object for it, and therefore your disabling of it will be remembered when
* offers that concern that host come in later. The scheduler will not use disabled hosts for allocating
* resources to tasks.
* @param vmID the ID of the host to disable
* @param durationMillis the length of time, starting from now, in milliseconds, during which the host will
* be disabled
* @return {@code true} if the ID matches a known VM, {@code false} otherwise.
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
public boolean disableVMByVMId(String vmID, long durationMillis) throws IllegalStateException {
final String hostname = assignableVMs.getHostnameFromVMId(vmID);
if(hostname == null)
return false;
disableVM(hostname, durationMillis);
return true;
* Enable the VM with the specified host name. Hosts start in an enabled state, so you only need to call
* this method if you have previously explicitly disabled the host.
* @param hostname the name of the host to enable
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
public void enableVM(String hostname) throws IllegalStateException {"Enabling VM " + hostname);
* Set how the scheduler determines to which group the VM (host) belongs. You can group hosts. Which group a
* host belongs to is determined by the value of a particular attribute in its offers. You can set which
* attribute defines group membership by naming it in this method.
* @param attributeName the name of the attribute that determines a VM's group
public void setActiveVmGroupAttributeName(String attributeName) {
* Set the list of VM group names that are active. VMs (hosts) that belong to groups that you do not include
* in this list are said to be disabled. The scheduler does not use the resources of disabled hosts when it
* allocates tasks. If you pass in a null list, this indicates that the scheduler should consider all groups
* to be enabled.
* @param vmGroups a list of VM group names that the scheduler is to consider to be enabled, or {@code null}
* if the scheduler is to consider every group to be enabled
public void setActiveVmGroups(List vmGroups) {
* Mark task scheduler as shutdown and shutdown any thread pool executors created.
public void shutdown() {
if(isShutdown.compareAndSet(false, true)) {
if(autoScaler != null)