com.netflix.fenzo.TaskScheduler Maven / Gradle / Ivy
/*
* Copyright 2015 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.fenzo;
import com.netflix.fenzo.plugins.NoOpScaleDownOrderEvaluator;
import com.netflix.fenzo.queues.Assignable;
import com.netflix.fenzo.queues.QueuableTask;
import com.netflix.fenzo.sla.ResAllocs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.netflix.fenzo.functions.Action1;
import com.netflix.fenzo.functions.Action2;
import com.netflix.fenzo.functions.Func1;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
/**
* A scheduling service that you can use to optimize the assignment of tasks to hosts within a Mesos framework.
* Call the {@link #scheduleOnce scheduleOnce()} method with a list of task requests and a list of new resource
* lease offers, and that method will return a set of task assignments.
*
* The {@code TaskScheduler} stores any unused lease offers and will apply them during future calls to
* {@code scheduleOnce()} until a time expires, which is defined by the lease offer expiry time that you set
* when you build the {@code TaskScheduler} (the default is 10 seconds). Upon reaching the expiry time, the
* {@code TaskScheduler} rejects expired resource lease offers by invoking the action you supplied then you
* built the {@code TaskScheduler}.
*
* Note that when you launch a task that has been scheduled by the {@code TaskScheduler}, you should call
* the task assigner action available from the {@link #getTaskAssigner getTaskAssigner()} method. When that
* task completes, you should call the task unassigner action available from the
* {@link #getTaskUnAssigner getTaskUnAssigner()} method. These actions make the {@code TaskScheduler} keep
* track of launched tasks. The {@code TaskScheduler} then makes these tracked tasks available to its
* scheduling optimization functions.
*
* Do not call the scheduler concurrently. The scheduler assigns tasks in the order that they are received in a
* particular list. It checks each task against available resources until it finds a match.
*
* You create your {@code TaskScheduler} by means of the {@link TaskScheduler.Builder}. It provides methods with
* which you can adjust the scheduler's autoscaling rules, fitness calculators, and so forth.
*
* @see Wikipedia: Builder pattern
*/
public class TaskScheduler {
private static final int PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE = 30;
/**
* The Builder is how you construct a {@link TaskScheduler} object with particular characteristics. Chain
* its methods and then call {@link #build build()} to create a {@code TaskScheduler}.
*
* @see Wikipedia: Builder pattern
*/
public final static class Builder {
private Action1 leaseRejectAction=null;
private long leaseOfferExpirySecs=120;
private int maxOffersToReject=4;
private boolean rejectAllExpiredOffers=false;
private VMTaskFitnessCalculator fitnessCalculator = new DefaultFitnessCalculator();
private String autoScaleByAttributeName=null;
private String autoScalerMapHostnameAttributeName=null;
private String autoScaleDownBalancedByAttributeName=null;
private ScaleDownOrderEvaluator scaleDownOrderEvaluator;
private Map weightedScaleDownConstraintEvaluators;
private Action1 autoscalerCallback=null;
private long delayAutoscaleUpBySecs=0L;
private long delayAutoscaleDownBySecs=0L;
private long disabledVmDurationInSecs =0L;
private List autoScaleRules=new ArrayList<>();
private Func1 isFitnessGoodEnoughFunction = new Func1() {
@Override
public Boolean call(Double f) {
return f>1.0;
}
};
private boolean disableShortfallEvaluation=false;
private Map resAllocs=null;
private boolean singleOfferMode=false;
/**
* (Required) Call this method to establish a method that your task scheduler will call to notify you
* that it has rejected a resource offer. In this method, you should tell Mesos that you are declining
* the associated offer.
*
* @param leaseRejectAction the action to trigger when the task scheduler rejects a VM lease, with the
* lease being rejected as the only argument
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withLeaseRejectAction(Action1 leaseRejectAction) {
this.leaseRejectAction = leaseRejectAction;
return this;
}
/**
* Call this method to set the expiration time for resource offers. Your task scheduler will reject any
* offers that remain unused if this expiration period from the time of the offer expires. This ensures
* your scheduler will not hoard unuseful offers. The default is 120 seconds.
*
* @param leaseOfferExpirySecs the amount of time the scheduler will keep an unused lease available for
* a later-scheduled task before it considers the lease to have expired, in
* seconds
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withLeaseOfferExpirySecs(long leaseOfferExpirySecs) {
this.leaseOfferExpirySecs = leaseOfferExpirySecs;
return this;
}
/**
* Call this method to set the maximum number of offers to reject within a time period equal to lease expiry
* seconds, set with {@code leaseOfferExpirySecs()}. Default is 4.
* @param maxOffersToReject Maximum number of offers to reject.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withMaxOffersToReject(int maxOffersToReject) {
if(!rejectAllExpiredOffers)
this.maxOffersToReject = maxOffersToReject;
return this;
}
/**
* Indicate that all offers older than the set expiry time must be rejected. By default this is set to false.
* If false, Fenzo rejects a maximum number of offers set using {@link #withMaxOffersToReject(int)} per each
* time period spanning the expiry time, set by {@link #withLeaseOfferExpirySecs(long)}.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withRejectAllExpiredOffers() {
this.rejectAllExpiredOffers = true;
this.maxOffersToReject = Integer.MAX_VALUE;
return this;
}
/**
* Call this method to add a fitness calculator that your scheduler will use to compute the suitability
* of a particular host for a particular task. You can only add a single fitness calculator to a
* scheduler; if you attempt to add a second fitness calculator, it will override the first one.
*
* @param fitnessCalculator the fitness calculator you want this scheduler to use in its evaluations
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Fitness Calculators
*/
public Builder withFitnessCalculator(VMTaskFitnessCalculator fitnessCalculator) {
this.fitnessCalculator = fitnessCalculator;
return this;
}
/**
* Call this method to indicate which host attribute you want your task scheduler to use in order to
* distinguish which hosts are in which autoscaling groups. You must call this method before you call
* {@link #withAutoScaleRule(AutoScaleRule)}.
*
* @param name the name of the host attribute that defines which autoscaling group it is in
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
*/
public Builder withAutoScaleByAttributeName(String name) {
this.autoScaleByAttributeName = name;
return this;
}
/**
* Use the given host attribute name to determine the alternate hostname of virtual machine to use as an
* argument for an autoscaling action.
*
* In some circumstances (for instance with Amazon Web Services), the host name is not the correct
* identifier for the host in the context of an autoscaling action (for instance, in AWS, you need the
* EC2 instance identifier). If this is the case for your system, you need to implement a function that
* maps the host name to the identifier for the host in an autoscaling context so that Fenzo can perform
* autoscaling properly. You provide this function to the task manager by means of this builder method.
*
* @param name the attribute name to use as the alternate host identifier in an autoscaling context
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
*/
public Builder withAutoScalerMapHostnameAttributeName(String name) {
this.autoScalerMapHostnameAttributeName = name;
return this;
}
/**
* Call this method to tell the autoscaler to try to maintain a balance of host varieties when it scales
* down a cluster. Pass the method a host attribute, and the autoscaler will attempt to scale down in
* such a way as to maintain a similar number of hosts with each value for that attribute.
*
* @param name the name of the attribute
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
*/
public Builder withAutoScaleDownBalancedByAttributeName(String name) {
this.autoScaleDownBalancedByAttributeName = name;
return this;
}
/**
* Call this method to set {@link ScaleDownOrderEvaluator}.
*
* @param scaleDownOrderEvaluator scale down ordering evaluator
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withScaleDownOrderEvaluator(ScaleDownOrderEvaluator scaleDownOrderEvaluator) {
this.scaleDownOrderEvaluator = scaleDownOrderEvaluator;
return this;
}
/**
* Ordered list of scale down constraints evaluators.
*
* @param weightedScaleDownConstraintEvaluators scale down evaluators
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withWeightedScaleDownConstraintEvaluators(Map weightedScaleDownConstraintEvaluators) {
this.weightedScaleDownConstraintEvaluators = weightedScaleDownConstraintEvaluators;
return this;
}
/**
* Use the given function to determine if the fitness of a host for a task is good enough that the task
* scheduler should stop looking for a more fit host. Pass this method a function that takes a value
* between 0.0 (completely unfit) and 1.0 (perfectly fit) that describes the fitness of a particular
* host for a particular task, and decides, by returning a boolean value, whether that value is a "good
* enough" fit such that the task scheduler should go ahead and assign the task to the host. If you
* write this function to only return true for values at or near 1.0, the task scheduler will spend more
* time searching for a good fit; if you write the function to return true for lower values, the task
* scheduler will be able to find a host to assign the task to more quickly.
*
* By default, if you do not build your task scheduler by passing a function into this method, the
* task scheduler will always search all of the available hosts for the best possible fit for every
* task.
*
* @param f a single-argument function that accepts a double parameter, representing the fitness, and
* returns a {@code Boolean} indicating whether the fitness is good enough to constitute a
* successful match between the host and task
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withFitnessGoodEnoughFunction(Func1 f) {
this.isFitnessGoodEnoughFunction = f;
return this;
}
/**
* Disable resource shortfall evaluation. The shortfall evaluation is performed when evaluating the
* autoscaling needs. This is useful for evaluating the actual resources needed to scale up by, for
* pending tasks, which may be greater than the number of resources scaled up by thresholds based scale
* up.
*
* This evaluation can be computaionally expensive and/or may scale up aggressively, initially, to more
* resources than needed. The initial aggressive scale up is corrected later by scale down, which is
* triggered by scale down evaluation after a cool down period transpires.
*
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Autoscaling
*/
public Builder disableShortfallEvaluation() {
disableShortfallEvaluation = true;
return this;
}
/**
* Call this method to set the initial limitations on how many resources will be available to each task
* group.
*
* @param resAllocs a Map with the task group name as keys and resource allocation limits as values
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @see Resource Allocation
* Limits
*/
public Builder withInitialResAllocs(Map resAllocs) {
this.resAllocs = resAllocs;
return this;
}
/**
* Adds an autoscaling rule that governs the behavior by which this scheduler will autoscale hosts of a
* certain type. You can chain this method multiple times, adding a new autoscaling rule each time (one
* for each autoscale group).
*
* Before you call this method you must first call
* {@link #withAutoScaleByAttributeName withAutoScaleByAttributeName()} to indicate which host
* attribute you are using to identify which hosts are in which autoscaling groups.
*
* @param rule the autoscaling rule to add
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you have not properly initialized autoscaling or if your rule is
* poorly formed
* @see Autoscaling
*/
public Builder withAutoScaleRule(AutoScaleRule rule) {
if(autoScaleByAttributeName==null || autoScaleByAttributeName.isEmpty())
throw new IllegalArgumentException("Auto scale by attribute name must be set before setting rules");
if(rule.getMinIdleHostsToKeep()<1)
throw new IllegalArgumentException("Min Idle must be >0");
if(rule.getMinIdleHostsToKeep()>rule.getMaxIdleHostsToKeep())
throw new IllegalArgumentException("Min Idle must be <= Max Idle hosts");
this.autoScaleRules.add(rule);
return this;
}
/*
* The callback you pass to this method receives an indication when an autoscale action is to be
* performed. This indicates which autoscale rule prompted the action and whether the action is to scale
* up or scale down the autoscale group. The callback then initiates the appropriate scaling actions.
*
* @see Autoscaling
*/
public Builder withAutoScalerCallback(Action1 callback) {
this.autoscalerCallback = callback;
return this;
}
/**
* Delay the autoscale up actions to reduce unnecessary actions due to short periods of breach of scale up
* policy rules. Such scale ups can be caused by, for example, the periodic offer rejections that result in
* offers coming back shortly. They can also be caused by certain environments where tasks are first scheduled
* to replace existing tasks.
*
* The autoscaler takes the scale up action based on the latest scale up request value after the delay.
*
* The default is 0 secs. Ideally, you should set this to be at least two times the larger of the two values:
*
* - Delay between successive calls to {@link TaskScheduler#scheduleOnce(List, List)}.
* - Delay in get a rejected offer back from Mesos.
*
* @param delayAutoscaleUpBySecs Delay autoscale up actions by this many seconds.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}.
* @see Autoscaling
*/
public Builder withDelayAutoscaleUpBySecs(long delayAutoscaleUpBySecs) {
if(delayAutoscaleUpBySecs < 0L)
throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleUpBySecs);
this.delayAutoscaleUpBySecs = delayAutoscaleUpBySecs;
return this;
}
/**
* Delay the autoscale down actions to reduce unnecessary actions due to short periods of breach of scale down
* policy rules. Such scale downs can be caused by, for example, certain environments where existing tasks are
* removed before replacing them with new tasks.
*
* The autoscaler takes the scale down action based on the latest scale down request value after the delay.
*
* The default is 0 secs. Ideally, you should set this to be at least two times the delay before terminated
* tasks are replaced successfully.
* @param delayAutoscaleDownBySecs Delay autoscale down actions by this many seconds.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}.
* @see Autoscaling
*/
public Builder withDelayAutoscaleDownBySecs(long delayAutoscaleDownBySecs) {
if(delayAutoscaleDownBySecs < 0L)
throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleDownBySecs);
this.delayAutoscaleDownBySecs = delayAutoscaleDownBySecs;
return this;
}
/**
* How long to disable a VM when going through a scale down action. Note that the value used will be the max
* between this value and the {@link AutoScaleRule#getCoolDownSecs()} value and that this value should be
* greater than the {@link AutoScaleRule#getCoolDownSecs()} value. If the supplied {@link AutoScaleAction}
* does not actually terminate the instance in this time frame then the VM will become enabled. This option is useful
* when you want to increase the disabled time of a VM because the implementation of the {@link AutoScaleAction} may
* take longer than the cooldown period.
*
* @param disabledVmDurationInSecs Disable VMs about to be terminated by this many seconds.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
* @throws IllegalArgumentException if {@code disabledVmDurationInSecs} is not greater than 0.
* @see Autoscaling
*/
public Builder withAutoscaleDisabledVmDurationInSecs(long disabledVmDurationInSecs) {
if(disabledVmDurationInSecs <= 0L) {
throw new IllegalArgumentException("disabledVmDurationInSecs must be greater than 0: " + disabledVmDurationInSecs);
}
this.disabledVmDurationInSecs = disabledVmDurationInSecs;
return this;
}
/**
* Indicate that the cluster receives resource offers only once per VM (host). Normally, Mesos sends resource
* offers multiple times, as resources free up on the host upon completion of various tasks. This method
* provides an experimental support for a mode where Fenzo can be made aware of the entire set of resources
* on hosts once, in a model similar to Amazon ECS. Fenzo internally keeps track of total versus used resources
* on the host based on tasks assigned and then later unassigned. No further resource offers are expected after
* the initial one.
*
* @param b True if only one resource offer is expected per host, false by default.
* @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler}
*/
public Builder withSingleOfferPerVM(boolean b) {
this.singleOfferMode = b;
return this;
}
/**
* Creates a {@link TaskScheduler} based on the various builder methods you have chained.
*
* @return a {@code TaskScheduler} built according to the specifications you indicated
*/
public TaskScheduler build() {
if(scaleDownOrderEvaluator == null) {
if(weightedScaleDownConstraintEvaluators != null) {
scaleDownOrderEvaluator = new NoOpScaleDownOrderEvaluator();
}
} else {
if(weightedScaleDownConstraintEvaluators == null) {
weightedScaleDownConstraintEvaluators = Collections.emptyMap();
}
}
return new TaskScheduler(this);
}
}
private static class EvalResult {
List assignmentResults;
TaskAssignmentResult result;
int numAllocationTrials;
Exception exception;
private EvalResult(List assignmentResults, TaskAssignmentResult result, int numAllocationTrials, Exception e) {
this.assignmentResults = assignmentResults;
this.result = result;
this.numAllocationTrials = numAllocationTrials;
this.exception = e;
}
}
private final AssignableVMs assignableVMs;
private static final Logger logger = LoggerFactory.getLogger(TaskScheduler.class);
private static final long purgeVMsIntervalSecs = 60;
private long lastVMPurgeAt=System.currentTimeMillis();
private final Builder builder;
private final StateMonitor stateMonitor;
private final AutoScaler autoScaler;
private final int EXEC_SVC_THREADS=Runtime.getRuntime().availableProcessors();
private final ExecutorService executorService = Executors.newFixedThreadPool(EXEC_SVC_THREADS);
private final AtomicBoolean isShutdown = new AtomicBoolean();
private final ResAllocsEvaluater resAllocsEvaluator;
private final TaskTracker taskTracker;
private volatile boolean usingSchedulingService = false;
private final String usingSchedSvcMesg = "Invalid call when using task scheduling service";
private TaskScheduler(Builder builder) {
if(builder.leaseRejectAction ==null)
throw new IllegalArgumentException("Lease reject action must be non-null");
this.builder = builder;
this.stateMonitor = new StateMonitor();
taskTracker = new TaskTracker();
resAllocsEvaluator = new ResAllocsEvaluater(taskTracker, builder.resAllocs);
assignableVMs = new AssignableVMs(taskTracker, builder.leaseRejectAction,
builder.leaseOfferExpirySecs, builder.maxOffersToReject, builder.autoScaleByAttributeName,
builder.singleOfferMode, builder.autoScaleByAttributeName);
if(builder.autoScaleByAttributeName != null && !builder.autoScaleByAttributeName.isEmpty()) {
ScaleDownConstraintExecutor scaleDownConstraintExecutor = builder.scaleDownOrderEvaluator == null
? null : new ScaleDownConstraintExecutor(builder.scaleDownOrderEvaluator, builder.weightedScaleDownConstraintEvaluators);
autoScaler = new AutoScaler(builder.autoScaleByAttributeName, builder.autoScalerMapHostnameAttributeName,
builder.autoScaleDownBalancedByAttributeName,
builder.autoScaleRules, assignableVMs,
builder.disableShortfallEvaluation, assignableVMs.getActiveVmGroups(),
assignableVMs.getVmCollection(), scaleDownConstraintExecutor);
if(builder.autoscalerCallback != null)
autoScaler.setCallback(builder.autoscalerCallback);
if(builder.delayAutoscaleDownBySecs > 0L)
autoScaler.setDelayScaleDownBySecs(builder.delayAutoscaleDownBySecs);
if(builder.delayAutoscaleUpBySecs > 0L)
autoScaler.setDelayScaleUpBySecs(builder.delayAutoscaleUpBySecs);
if (builder.disabledVmDurationInSecs > 0L) {
autoScaler.setDisabledVmDurationInSecs(builder.disabledVmDurationInSecs);
}
}
else {
autoScaler=null;
}
}
void checkIfShutdown() throws IllegalStateException {
if(isShutdown.get())
throw new IllegalStateException("TaskScheduler already shutdown");
}
/**
* Set the autoscale call back action. The callback you pass to this method receives an indication when an
* autoscale action is to be performed, telling it which autoscale rule prompted the action and whether the
* action is to scale up or scale down the autoscale group. The callback then initiates the appropriate
* scaling actions.
*
* @param callback the callback to invoke for autoscale actions
* @throws IllegalStateException if no autoscaler was established
* @see Autoscaling
*/
public void setAutoscalerCallback(Action1 callback) throws IllegalStateException {
checkIfShutdown();
if(autoScaler==null)
throw new IllegalStateException("No autoScaler setup");
autoScaler.setCallback(callback);
}
public TaskTracker getTaskTracker() {
return taskTracker;
}
private TaskAssignmentResult getSuccessfulResult(List results) {
double bestFitness=0.0;
TaskAssignmentResult bestResult=null;
for(int r=results.size()-1; r>=0; r--) {
// change to using fitness value from assignment result
TaskAssignmentResult res = results.get(r);
if(res!=null && res.isSuccessful()) {
if(bestResult==null || res.getFitness()>bestFitness ||
(res.getFitness()==bestFitness && res.getHostname().compareTo(bestResult.getHostname())<0)) {
bestFitness = res.getFitness();
bestResult = res;
}
}
}
return bestResult;
}
private boolean isGoodEnough(TaskAssignmentResult result) {
return builder.isFitnessGoodEnoughFunction.call(result.getFitness());
}
/**
* Get the current mapping of resource allocations registered with the scheduler.
*
* @return current mapping of resource allocations
* @see Resource Allocation
* Limits
*/
public Map getResAllocs() {
return resAllocsEvaluator.getResAllocs();
}
/**
* Add a new resource allocation, or replace an existing one of the same name.
*
* @param resAllocs the resource allocation to add or replace
* @see Resource Allocation
* Limits
*/
public void addOrReplaceResAllocs(ResAllocs resAllocs) {
resAllocsEvaluator.replaceResAllocs(resAllocs);
}
/**
* Remove a resource allocation associated with the specified name.
*
* @param groupName the name of the resource allocation to remove
* @see Resource Allocation
* Limits
*/
public void removeResAllocs(String groupName) {
resAllocsEvaluator.remResAllocs(groupName);
}
/**
* Get the autoscale rules currently registered with the scheduler.
*
* @return a collection of currently registered autoscale rules
* @see Autoscaling
*/
public Collection getAutoScaleRules() {
if(autoScaler==null)
return Collections.emptyList();
return autoScaler.getRules();
}
/**
* Add a new autoscale rule to those used by this scheduler. If a rule with the same name exists, it is
* replaced. This autoscale rule will be used the next time the scheduler invokes its autoscale action.
*
* @param rule the autoscale rule to add
* @see Autoscaling
*/
public void addOrReplaceAutoScaleRule(AutoScaleRule rule) {
autoScaler.replaceRule(rule);
}
/**
* Remove the autoscale rule associated with the given name from those used by the scheduler.
*
* @param ruleName name of the autoscale rule to remove
* @see Autoscaling
*/
public void removeAutoScaleRule(String ruleName) {
autoScaler.removeRule(ruleName);
}
/* package */ void setUsingSchedulingService(boolean b) {
usingSchedulingService = b;
}
/* package */ void setTaskToClusterAutoScalerMapGetter(Func1> getter) {
if (autoScaler != null)
autoScaler.setTaskToClustersGetter(getter);
}
/* package */ AutoScaler getAutoScaler() {
return autoScaler;
}
/**
* Schedule a list of task requests by using any newly-added resource leases in addition to any
* previously-unused leases. This is the main scheduling method that attempts to assign resources to task
* requests. Resource leases are associated with a host name. A host can have zero or more leases. Leases
* that the scheduler does not use in this scheduling run it stores for later use until they expire.
*
* You cannot add a lease object with an Id equal to that of a stored lease object; {@code scheduleOnce()}
* will throw an {@code IllegalStateException}. Upon throwing this exception, if you provided multiple
* leases in the {@code newLeases} argument, the state of internally maintained list of unused leases will
* be in an indeterminate state - some of the leases may have been successfully added.
*
* The task scheduler rejects any expired leases before scheduling begins. Then, it combines all leases of a
* host to determine the total available resources on the host. The scheduler then tries each task request,
* in the order that they appear in the given list, for assignment against the available hosts until
* successful. For each task the scheduler returns either a successful assignment result, or, a set of
* assignment failures.
*
* After the scheduler evaluates all assignments, it will reject remaining leases if they are unused and
* their offer time is further in the past than lease expiration interval. This prevents the scheduler from
* hoarding leases. If you provided an autoscaler, the scheduler then launches autoscale evaluation to run
* asynchronously, which runs each registered autoscale rule based on its policy.
*
* The successful assignments contain hosts to which tasks have been successfully assigned and the offers for that
* host that were used for the assignments. Fenzo removes those offers from its internal state. Normally, you
* would use those offers to launch the tasks. For any reason if you do not launch those tasks, you must either
* reject the offers to Mesos, or, re-add them to Fenzo with the next call to {@link #scheduleOnce(List, List)}.
* Otherwise, those offers would be "leaked out".
*
* Unexpected exceptions may arise during scheduling, for example, due to uncaught exceptions in user provided
* plugins. The scheduling routine stops upon catching any unexpected exceptions. These exceptions are surfaced to
* you in one or both of two ways.
*
* - The returned result object will contain the exceptions encountered in
* {@link SchedulingResult#getExceptions()}. In this case, no assignments would have been made.
* - This method may throw {@code IllegalStateException} with its cause set to the uncaught exception. In this
* case the internal state of Fenzo will be undefined.
*
* If there are exceptions, the internal state of Fenzo may be corrupt with no way to undo any partial effects.
*
* @param requests a list of task requests to match with resources, in their given order
* @param newLeases new resource leases from hosts that the scheduler can use along with any previously
* ununsed leases
* @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries
* @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease
* again, or, if there was unexpected exception during the scheduling iteration, or, if using
* {@link TaskSchedulingService}, which will instead invoke scheduling from within. Unexpected exceptions
* can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown
* via the {@link #shutdown()} method.
*/
public SchedulingResult scheduleOnce(
List extends TaskRequest> requests,
List newLeases) throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
final Iterator extends TaskRequest> iterator =
requests != null ?
requests.iterator() :
Collections.emptyIterator();
TaskIterator taskIterator = new TaskIterator() {
@Override
public Assignable next() {
if (iterator.hasNext())
return Assignable.success(iterator.next());
return null;
}
};
return scheduleOnce(taskIterator, newLeases);
}
/**
* Variant of {@link #scheduleOnce(List, List)} that takes a task iterator instead of task list.
* @param taskIterator Iterator for tasks to assign resources to.
* @param newLeases new resource leases from hosts that the scheduler can use along with any previously
* ununsed leases
* @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries
* @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease
* again, or, if there was unexpected exception during the scheduling iteration. For example, unexpected exceptions
* can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown
* via the {@link #shutdown()} method.
*/
/* package */ SchedulingResult scheduleOnce(
TaskIterator taskIterator,
List newLeases) throws IllegalStateException {
checkIfShutdown();
try (AutoCloseable ac = stateMonitor.enter()) {
return doScheduling(taskIterator, newLeases);
} catch (Exception e) {
logger.error("Error with scheduling run: " + e.getMessage(), e);
if(e instanceof IllegalStateException)
throw (IllegalStateException)e;
else {
logger.warn("Unexpected exception: " + e.getMessage());
throw new IllegalStateException("Unexpected exception during scheduling run: " + e.getMessage(), e);
}
}
}
/**
* Variant of {@link #scheduleOnce(List, List)} that should be only used to schedule a pseudo iteration as it
* ignores the StateMonitor lock.
* @param taskIterator Iterator for tasks to assign resources to.
* @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries
*/
/* package */ SchedulingResult pseudoScheduleOnce(TaskIterator taskIterator) throws Exception {
return doScheduling(taskIterator, Collections.emptyList());
}
private SchedulingResult doScheduling(TaskIterator taskIterator,
List newLeases) throws Exception {
long start = System.currentTimeMillis();
final SchedulingResult schedulingResult = doSchedule(taskIterator, newLeases);
if((lastVMPurgeAt + purgeVMsIntervalSecs*1000) < System.currentTimeMillis()) {
lastVMPurgeAt = System.currentTimeMillis();
logger.info("Purging inactive VMs");
assignableVMs.purgeInactiveVMs( // explicitly exclude VMs that have assignments
schedulingResult.getResultMap() == null?
Collections.emptySet() :
new HashSet<>(schedulingResult.getResultMap().keySet())
);
}
schedulingResult.setRuntime(System.currentTimeMillis() - start);
return schedulingResult;
}
private SchedulingResult doSchedule(
TaskIterator taskIterator,
List newLeases) throws Exception {
AtomicInteger rejectedCount = new AtomicInteger();
List avms = assignableVMs.prepareAndGetOrderedVMs(newLeases, rejectedCount);
if(logger.isDebugEnabled())
logger.debug("Got {} avms", avms.size());
List inactiveAVMs = assignableVMs.getInactiveVMs();
if(logger.isDebugEnabled())
logger.debug("Found {} VMs with non-zero offers to assign from", avms.size());
final boolean hasResAllocs = resAllocsEvaluator.prepare();
//logger.info("Got " + avms.size() + " AVMs to schedule on");
int totalNumAllocations=0;
Set failedTasksForAutoScaler = new HashSet<>();
Map resultMap = new HashMap<>(avms.size());
final SchedulingResult schedulingResult = new SchedulingResult(resultMap);
if(avms.isEmpty()) {
while (true) {
final Assignable extends TaskRequest> taskOrFailure = taskIterator.next();
if (taskOrFailure == null)
break;
failedTasksForAutoScaler.add(taskOrFailure.getTask());
}
} else {
while (true) {
final Assignable extends TaskRequest> taskOrFailure = taskIterator.next();
if(logger.isDebugEnabled())
logger.debug("TaskSched: task=" + (taskOrFailure == null? "null" : taskOrFailure.getTask().getId()));
if (taskOrFailure == null)
break;
if(taskOrFailure.hasFailure()) {
schedulingResult.addFailures(
taskOrFailure.getTask(),
Collections.singletonList(new TaskAssignmentResult(
assignableVMs.getDummyVM(),
taskOrFailure.getTask(),
false,
Collections.singletonList(taskOrFailure.getAssignmentFailure()),
null,
0
)
));
continue;
}
TaskRequest task = taskOrFailure.getTask();
failedTasksForAutoScaler.add(task);
if(hasResAllocs) {
if(resAllocsEvaluator.taskGroupFailed(task.taskGroupName())) {
if(logger.isDebugEnabled())
logger.debug("Resource allocation limits reached for task: " + task.getId());
continue;
}
final AssignmentFailure resAllocsFailure = resAllocsEvaluator.hasResAllocs(task);
if(resAllocsFailure != null) {
final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(),
task, false, Collections.singletonList(resAllocsFailure), null, 0.0));
schedulingResult.addFailures(task, failures);
failedTasksForAutoScaler.remove(task); // don't scale up for resAllocs failures
if(logger.isDebugEnabled())
logger.debug("Resource allocation limit reached for task " + task.getId() + ": " + resAllocsFailure);
continue;
}
}
final AssignmentFailure maxResourceFailure = assignableVMs.getFailedMaxResource(null, task);
if(maxResourceFailure != null) {
final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(), task, false,
Collections.singletonList(maxResourceFailure), null, 0.0));
schedulingResult.addFailures(task, failures);
if(logger.isDebugEnabled())
logger.debug("Task {}: maxResource failure: {}", task.getId(), maxResourceFailure);
continue;
}
// create batches of VMs to evaluate assignments concurrently across the batches
final BlockingQueue virtualMachines = new ArrayBlockingQueue<>(avms.size(), false, avms);
int nThreads = (int)Math.ceil((double)avms.size()/ PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE);
List> futures = new ArrayList<>();
if(logger.isDebugEnabled())
logger.debug("Launching {} threads for evaluating assignments for task {}", nThreads, task.getId());
for(int b=0; b() {
@Override
public EvalResult call() throws Exception {
return evalAssignments(task, virtualMachines);
}
}));
}
List results = new ArrayList<>();
List bestResults = new ArrayList<>();
for(Future f: futures) {
try {
EvalResult evalResult = f.get();
if(evalResult.exception!=null) {
logger.warn("Error during concurrent task assignment eval - " + evalResult.exception.getMessage(),
evalResult.exception);
schedulingResult.addException(evalResult.exception);
}
else {
results.add(evalResult);
bestResults.add(evalResult.result);
if(logger.isDebugEnabled())
logger.debug("Task {}: best result so far: {}", task.getId(), evalResult.result);
totalNumAllocations += evalResult.numAllocationTrials;
}
} catch (InterruptedException|ExecutionException e) {
logger.error("Unexpected during concurrent task assignment eval - " + e.getMessage(), e);
}
}
if(!schedulingResult.getExceptions().isEmpty())
break;
TaskAssignmentResult successfulResult = getSuccessfulResult(bestResults);
List failures = new ArrayList<>();
if(successfulResult == null) {
if(logger.isDebugEnabled())
logger.debug("Task {}: no successful results", task.getId());
for(EvalResult er: results)
failures.addAll(er.assignmentResults);
schedulingResult.addFailures(task, failures);
}
else {
if(logger.isDebugEnabled())
logger.debug("Task {}: found successful assignment on host {}", task.getId(),
successfulResult.getHostname());
successfulResult.assignResult();
failedTasksForAutoScaler.remove(task);
}
}
}
List idleResourcesList = new ArrayList<>();
if(schedulingResult.getExceptions().isEmpty()) {
List expirableLeases = new ArrayList<>();
for (AssignableVirtualMachine avm : avms) {
VMAssignmentResult assignmentResult = avm.resetAndGetSuccessfullyAssignedRequests();
if (assignmentResult == null) {
if (!avm.hasPreviouslyAssignedTasks())
idleResourcesList.add(avm.getCurrTotalLease());
expirableLeases.add(avm.getCurrTotalLease());
} else {
resultMap.put(avm.getHostname(), assignmentResult);
}
}
// Process inactive VMs
List idleInactiveAVMs = inactiveAVMs.stream()
.filter(vm -> vm.getCurrTotalLease() != null && !vm.hasPreviouslyAssignedTasks())
.map(AssignableVirtualMachine::getCurrTotalLease)
.collect(Collectors.toList());
rejectedCount.addAndGet(assignableVMs.removeLimitedLeases(expirableLeases));
final AutoScalerInput autoScalerInput = new AutoScalerInput(idleResourcesList, idleInactiveAVMs, failedTasksForAutoScaler);
if (autoScaler != null)
autoScaler.doAutoscale(autoScalerInput);
}
schedulingResult.setLeasesAdded(newLeases.size());
schedulingResult.setLeasesRejected(rejectedCount.get());
schedulingResult.setNumAllocations(totalNumAllocations);
schedulingResult.setTotalVMsCount(assignableVMs.getTotalNumVMs());
schedulingResult.setIdleVMsCount(idleResourcesList.size());
return schedulingResult;
}
/* package */ Map> createPseudoHosts(Map groupCounts) {
return assignableVMs.createPseudoHosts(groupCounts, autoScaler == null? name -> null : autoScaler::getRule);
}
/* package */ void removePseudoHosts(Map> hostsMap) {
assignableVMs.removePseudoHosts(hostsMap);
}
/* package */ void removePseudoAssignments() {
taskTracker.clearAssignedTasks(); // this should suffice for pseudo assignments
}
/**
* Returns the state of resources on all known hosts. You can use this for debugging or informational
* purposes (occasionally). This method obtains and holds a lock for the duration of creating the state
* information. Scheduling runs are blocked around the lock.
*
* @return a Map of state information with the hostname as the key and a Map of resource state as the value.
* The resource state Map contains a resource as the key and a two element Double array - the first
* element of which contains the amount of the resource used and the second element contains the
* amount still available (available does not include used).
* @see How to Learn Which Resources Are Available on Which Hosts
* @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when
* using a {@link TaskSchedulingService}.
*/
public Map> getResourceStatus() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getResourceStatusIntl();
}
/* package */ Map> getResourceStatusIntl() {
try (AutoCloseable ac = stateMonitor.enter()) {
return assignableVMs.getResourceStatus();
} catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage());
throw new RuntimeException(e);
}
}
/**
* Returns the current state of all known hosts. You might occasionally use this for debugging or
* informational purposes. If you call this method, it will obtain and hold a lock for as long as it takes
* to create the state information. Scheduling runs are blocked around the lock.
*
* @return a list containing the current state of all known VMs
* @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when
* using a {@link TaskSchedulingService}.
* @see How to Learn the Amount of Resources Currently Available on Particular Hosts
*/
public List getVmCurrentStates() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getVmCurrentStatesIntl();
}
/* package */ List getVmCurrentStatesIntl() throws IllegalStateException {
try (AutoCloseable ac = stateMonitor.enter()) {
return assignableVMs.getVmCurrentStates();
}
catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage(), e);
throw new IllegalStateException(e);
}
}
private EvalResult evalAssignments(TaskRequest task, BlockingQueue virtualMachines) {
// This number below sort of controls minimum machines to eval, choose carefully.
// Having it too small increases overhead of getting next machine to evaluate on.
// Having it too high increases latency of thread before it returns when done
try {
int N=10;
List buf = new ArrayList<>(N);
List results = new ArrayList<>();
while(true) {
buf.clear();
int n = virtualMachines.drainTo(buf, N);
if(n == 0)
return new EvalResult(results, getSuccessfulResult(results), results.size(), null);
for(int m=0; m
* In addition, in your framework's task completion callback that you supply to Mesos, you must call your
* task scheduler's {@link #getTaskUnAssigner() getTaskUnassigner().call()} method to notify Fenzo that the
* task is no longer assigned.
*
* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts
* have resource offers available, but also which tasks have previously been assigned and are currently
* running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers.
*
* Note that you may not call the task assigner action concurrently with
* {@link #scheduleOnce(java.util.List, java.util.List) scheduleOnce()}. If you do so, the task assigner
* action will throw an {@code IllegalStateException}.
*
* @return a task assigner action
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
*/
public Action2 getTaskAssigner() throws IllegalStateException {
if (usingSchedulingService)
throw new IllegalStateException(usingSchedSvcMesg);
return getTaskAssignerIntl();
}
/* package */Action2 getTaskAssignerIntl() throws IllegalStateException {
return new Action2() {
@Override
public void call(TaskRequest request, String hostname) {
try (AutoCloseable ac = stateMonitor.enter()) {
assignableVMs.setTaskAssigned(request, hostname);
} catch (Exception e) {
logger.error("Unexpected error from state monitor: " + e.getMessage(), e);
throw new IllegalStateException(e);
}
}
};
}
/**
* Get the task unassigner action. Call this object's {@code call()} method to unassign an assignment you
* have previously set for each task that completes so that internal state is maintained correctly. Pass two
* String arguments to this call method: the taskId and the hostname.
*
* For each task you assign and launch, you must call your task scheduler's
* {@link #getTaskAssigner() getTaskAssigner().call()} method in order to notify Fenzo that the task has
* actually been deployed on a host.
*
* In addition, in your framework's task completion callback that you supply to Mesos, you must call your
* task scheduler's {@code getTaskUnassigner().call()} method to notify Fenzo that the
* task is no longer assigned.
*
* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts
* have resource offers available, but also which tasks have previously been assigned and are currently
* running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers.
*
* This method is safe to be called concurrently with other calls to {@code TaskScheduler}. The tasks to be
* unassigned are stored internally and actually unassigned at the beginning of the next scheduling iteration,
* that is, the next time {@link #scheduleOnce(List, List)} is called.
*
* @return the task un-assigner action
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
*/
public Action2 getTaskUnAssigner() throws IllegalStateException {
return new Action2() {
@Override
public void call(String taskId, String hostname) {
assignableVMs.unAssignTask(taskId, hostname);
}
};
}
/**
* Disable the virtual machine with the specified hostname. If the scheduler is not yet aware of the host
* with that hostname, it creates a new object for it, and therefore your disabling of it will be remembered
* when offers that concern that host come in later. The scheduler will not use disabled hosts for
* allocating resources to tasks.
*
* @param hostname the name of the host to disable
* @param durationMillis the length of time, starting from now, in milliseconds, during which the host will
* be disabled
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
*/
public void disableVM(String hostname, long durationMillis) throws IllegalStateException {
logger.info("Disable VM " + hostname + " for " + durationMillis + " millis");
assignableVMs.disableUntil(hostname, System.currentTimeMillis()+durationMillis);
}
/**
* Disable the virtual machine with the specified ID. If the scheduler is not yet aware of the host with
* that hostname, it creates a new object for it, and therefore your disabling of it will be remembered when
* offers that concern that host come in later. The scheduler will not use disabled hosts for allocating
* resources to tasks.
*
* @param vmID the ID of the host to disable
* @param durationMillis the length of time, starting from now, in milliseconds, during which the host will
* be disabled
* @return {@code true} if the ID matches a known VM, {@code false} otherwise.
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
*/
public boolean disableVMByVMId(String vmID, long durationMillis) throws IllegalStateException {
final String hostname = assignableVMs.getHostnameFromVMId(vmID);
if(hostname == null)
return false;
disableVM(hostname, durationMillis);
return true;
}
/**
* Enable the VM with the specified host name. Hosts start in an enabled state, so you only need to call
* this method if you have previously explicitly disabled the host.
*
* @param hostname the name of the host to enable
* @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method.
*/
public void enableVM(String hostname) throws IllegalStateException {
logger.info("Enabling VM " + hostname);
assignableVMs.enableVM(hostname);
}
/**
* Set how the scheduler determines to which group the VM (host) belongs. You can group hosts. Which group a
* host belongs to is determined by the value of a particular attribute in its offers. You can set which
* attribute defines group membership by naming it in this method.
*
* @param attributeName the name of the attribute that determines a VM's group
*/
public void setActiveVmGroupAttributeName(String attributeName) {
assignableVMs.setActiveVmGroupAttributeName(attributeName);
}
/**
* Set the list of VM group names that are active. VMs (hosts) that belong to groups that you do not include
* in this list are said to be disabled. The scheduler does not use the resources of disabled hosts when it
* allocates tasks. If you pass in a null list, this indicates that the scheduler should consider all groups
* to be enabled.
*
* @param vmGroups a list of VM group names that the scheduler is to consider to be enabled, or {@code null}
* if the scheduler is to consider every group to be enabled
*/
public void setActiveVmGroups(List vmGroups) {
assignableVMs.setActiveVmGroups(vmGroups);
}
/**
* Mark task scheduler as shutdown and shutdown any thread pool executors created.
*/
public void shutdown() {
if(isShutdown.compareAndSet(false, true)) {
executorService.shutdown();
if(autoScaler != null)
autoScaler.shutdown();
}
}
}