All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.fenzo.TaskScheduler Maven / Gradle / Ivy

There is a newer version: 1.0.1
Show newest version
/*
 * Copyright 2015 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netflix.fenzo;

import com.netflix.fenzo.plugins.NoOpScaleDownOrderEvaluator;
import com.netflix.fenzo.queues.Assignable;
import com.netflix.fenzo.queues.QueuableTask;
import com.netflix.fenzo.sla.ResAllocs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.netflix.fenzo.functions.Action1;
import com.netflix.fenzo.functions.Action2;
import com.netflix.fenzo.functions.Func1;

import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
 * A scheduling service that you can use to optimize the assignment of tasks to hosts within a Mesos framework.
 * Call the {@link #scheduleOnce scheduleOnce()} method with a list of task requests and a list of new resource
 * lease offers, and that method will return a set of task assignments.
 * 

* The {@code TaskScheduler} stores any unused lease offers and will apply them during future calls to * {@code scheduleOnce()} until a time expires, which is defined by the lease offer expiry time that you set * when you build the {@code TaskScheduler} (the default is 10 seconds). Upon reaching the expiry time, the * {@code TaskScheduler} rejects expired resource lease offers by invoking the action you supplied then you * built the {@code TaskScheduler}. *

* Note that when you launch a task that has been scheduled by the {@code TaskScheduler}, you should call * the task assigner action available from the {@link #getTaskAssigner getTaskAssigner()} method. When that * task completes, you should call the task unassigner action available from the * {@link #getTaskUnAssigner getTaskUnAssigner()} method. These actions make the {@code TaskScheduler} keep * track of launched tasks. The {@code TaskScheduler} then makes these tracked tasks available to its * scheduling optimization functions. * * Do not call the scheduler concurrently. The scheduler assigns tasks in the order that they are received in a * particular list. It checks each task against available resources until it finds a match. *

* You create your {@code TaskScheduler} by means of the {@link TaskScheduler.Builder}. It provides methods with * which you can adjust the scheduler's autoscaling rules, fitness calculators, and so forth. * * @see Wikipedia: Builder pattern */ public class TaskScheduler { private static final int PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE = 30; /** * The Builder is how you construct a {@link TaskScheduler} object with particular characteristics. Chain * its methods and then call {@link #build build()} to create a {@code TaskScheduler}. * * @see Wikipedia: Builder pattern */ public final static class Builder { private Action1 leaseRejectAction=null; private long leaseOfferExpirySecs=120; private int maxOffersToReject=4; private boolean rejectAllExpiredOffers=false; private VMTaskFitnessCalculator fitnessCalculator = new DefaultFitnessCalculator(); private String autoScaleByAttributeName=null; private String autoScalerMapHostnameAttributeName=null; private String autoScaleDownBalancedByAttributeName=null; private ScaleDownOrderEvaluator scaleDownOrderEvaluator; private Map weightedScaleDownConstraintEvaluators; private Action1 autoscalerCallback=null; private long delayAutoscaleUpBySecs=0L; private long delayAutoscaleDownBySecs=0L; private long disabledVmDurationInSecs =0L; private List autoScaleRules=new ArrayList<>(); private Func1 isFitnessGoodEnoughFunction = new Func1() { @Override public Boolean call(Double f) { return f>1.0; } }; private boolean disableShortfallEvaluation=false; private Map resAllocs=null; private boolean singleOfferMode=false; /** * (Required) Call this method to establish a method that your task scheduler will call to notify you * that it has rejected a resource offer. In this method, you should tell Mesos that you are declining * the associated offer. * * @param leaseRejectAction the action to trigger when the task scheduler rejects a VM lease, with the * lease being rejected as the only argument * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withLeaseRejectAction(Action1 leaseRejectAction) { this.leaseRejectAction = leaseRejectAction; return this; } /** * Call this method to set the expiration time for resource offers. Your task scheduler will reject any * offers that remain unused if this expiration period from the time of the offer expires. This ensures * your scheduler will not hoard unuseful offers. The default is 120 seconds. * * @param leaseOfferExpirySecs the amount of time the scheduler will keep an unused lease available for * a later-scheduled task before it considers the lease to have expired, in * seconds * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withLeaseOfferExpirySecs(long leaseOfferExpirySecs) { this.leaseOfferExpirySecs = leaseOfferExpirySecs; return this; } /** * Call this method to set the maximum number of offers to reject within a time period equal to lease expiry * seconds, set with {@code leaseOfferExpirySecs()}. Default is 4. * @param maxOffersToReject Maximum number of offers to reject. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withMaxOffersToReject(int maxOffersToReject) { if(!rejectAllExpiredOffers) this.maxOffersToReject = maxOffersToReject; return this; } /** * Indicate that all offers older than the set expiry time must be rejected. By default this is set to false. * If false, Fenzo rejects a maximum number of offers set using {@link #withMaxOffersToReject(int)} per each * time period spanning the expiry time, set by {@link #withLeaseOfferExpirySecs(long)}. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withRejectAllExpiredOffers() { this.rejectAllExpiredOffers = true; this.maxOffersToReject = Integer.MAX_VALUE; return this; } /** * Call this method to add a fitness calculator that your scheduler will use to compute the suitability * of a particular host for a particular task. You can only add a single fitness calculator to a * scheduler; if you attempt to add a second fitness calculator, it will override the first one. * * @param fitnessCalculator the fitness calculator you want this scheduler to use in its evaluations * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Fitness Calculators */ public Builder withFitnessCalculator(VMTaskFitnessCalculator fitnessCalculator) { this.fitnessCalculator = fitnessCalculator; return this; } /** * Call this method to indicate which host attribute you want your task scheduler to use in order to * distinguish which hosts are in which autoscaling groups. You must call this method before you call * {@link #withAutoScaleRule(AutoScaleRule)}. * * @param name the name of the host attribute that defines which autoscaling group it is in * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Autoscaling */ public Builder withAutoScaleByAttributeName(String name) { this.autoScaleByAttributeName = name; return this; } /** * Use the given host attribute name to determine the alternate hostname of virtual machine to use as an * argument for an autoscaling action. *

* In some circumstances (for instance with Amazon Web Services), the host name is not the correct * identifier for the host in the context of an autoscaling action (for instance, in AWS, you need the * EC2 instance identifier). If this is the case for your system, you need to implement a function that * maps the host name to the identifier for the host in an autoscaling context so that Fenzo can perform * autoscaling properly. You provide this function to the task manager by means of this builder method. * * @param name the attribute name to use as the alternate host identifier in an autoscaling context * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Autoscaling */ public Builder withAutoScalerMapHostnameAttributeName(String name) { this.autoScalerMapHostnameAttributeName = name; return this; } /** * Call this method to tell the autoscaler to try to maintain a balance of host varieties when it scales * down a cluster. Pass the method a host attribute, and the autoscaler will attempt to scale down in * such a way as to maintain a similar number of hosts with each value for that attribute. * * @param name the name of the attribute * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Autoscaling */ public Builder withAutoScaleDownBalancedByAttributeName(String name) { this.autoScaleDownBalancedByAttributeName = name; return this; } /** * Call this method to set {@link ScaleDownOrderEvaluator}. * * @param scaleDownOrderEvaluator scale down ordering evaluator * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withScaleDownOrderEvaluator(ScaleDownOrderEvaluator scaleDownOrderEvaluator) { this.scaleDownOrderEvaluator = scaleDownOrderEvaluator; return this; } /** * Ordered list of scale down constraints evaluators. * * @param weightedScaleDownConstraintEvaluators scale down evaluators * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withWeightedScaleDownConstraintEvaluators(Map weightedScaleDownConstraintEvaluators) { this.weightedScaleDownConstraintEvaluators = weightedScaleDownConstraintEvaluators; return this; } /** * Use the given function to determine if the fitness of a host for a task is good enough that the task * scheduler should stop looking for a more fit host. Pass this method a function that takes a value * between 0.0 (completely unfit) and 1.0 (perfectly fit) that describes the fitness of a particular * host for a particular task, and decides, by returning a boolean value, whether that value is a "good * enough" fit such that the task scheduler should go ahead and assign the task to the host. If you * write this function to only return true for values at or near 1.0, the task scheduler will spend more * time searching for a good fit; if you write the function to return true for lower values, the task * scheduler will be able to find a host to assign the task to more quickly. *

* By default, if you do not build your task scheduler by passing a function into this method, the * task scheduler will always search all of the available hosts for the best possible fit for every * task. * * @param f a single-argument function that accepts a double parameter, representing the fitness, and * returns a {@code Boolean} indicating whether the fitness is good enough to constitute a * successful match between the host and task * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withFitnessGoodEnoughFunction(Func1 f) { this.isFitnessGoodEnoughFunction = f; return this; } /** * Disable resource shortfall evaluation. The shortfall evaluation is performed when evaluating the * autoscaling needs. This is useful for evaluating the actual resources needed to scale up by, for * pending tasks, which may be greater than the number of resources scaled up by thresholds based scale * up. *

* This evaluation can be computaionally expensive and/or may scale up aggressively, initially, to more * resources than needed. The initial aggressive scale up is corrected later by scale down, which is * triggered by scale down evaluation after a cool down period transpires. * * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Autoscaling */ public Builder disableShortfallEvaluation() { disableShortfallEvaluation = true; return this; } /** * Call this method to set the initial limitations on how many resources will be available to each task * group. * * @param resAllocs a Map with the task group name as keys and resource allocation limits as values * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @see Resource Allocation * Limits */ public Builder withInitialResAllocs(Map resAllocs) { this.resAllocs = resAllocs; return this; } /** * Adds an autoscaling rule that governs the behavior by which this scheduler will autoscale hosts of a * certain type. You can chain this method multiple times, adding a new autoscaling rule each time (one * for each autoscale group). *

* Before you call this method you must first call * {@link #withAutoScaleByAttributeName withAutoScaleByAttributeName()} to indicate which host * attribute you are using to identify which hosts are in which autoscaling groups. * * @param rule the autoscaling rule to add * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @throws IllegalArgumentException if you have not properly initialized autoscaling or if your rule is * poorly formed * @see Autoscaling */ public Builder withAutoScaleRule(AutoScaleRule rule) { if(autoScaleByAttributeName==null || autoScaleByAttributeName.isEmpty()) throw new IllegalArgumentException("Auto scale by attribute name must be set before setting rules"); if(rule.getMinIdleHostsToKeep()<1) throw new IllegalArgumentException("Min Idle must be >0"); if(rule.getMinIdleHostsToKeep()>rule.getMaxIdleHostsToKeep()) throw new IllegalArgumentException("Min Idle must be <= Max Idle hosts"); this.autoScaleRules.add(rule); return this; } /* * The callback you pass to this method receives an indication when an autoscale action is to be * performed. This indicates which autoscale rule prompted the action and whether the action is to scale * up or scale down the autoscale group. The callback then initiates the appropriate scaling actions. * * @see Autoscaling */ public Builder withAutoScalerCallback(Action1 callback) { this.autoscalerCallback = callback; return this; } /** * Delay the autoscale up actions to reduce unnecessary actions due to short periods of breach of scale up * policy rules. Such scale ups can be caused by, for example, the periodic offer rejections that result in * offers coming back shortly. They can also be caused by certain environments where tasks are first scheduled * to replace existing tasks. *

* The autoscaler takes the scale up action based on the latest scale up request value after the delay. *

* The default is 0 secs. Ideally, you should set this to be at least two times the larger of the two values: *

    *
  • Delay between successive calls to {@link TaskScheduler#scheduleOnce(List, List)}.
  • *
  • Delay in get a rejected offer back from Mesos.
  • *
* @param delayAutoscaleUpBySecs Delay autoscale up actions by this many seconds. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}. * @see Autoscaling */ public Builder withDelayAutoscaleUpBySecs(long delayAutoscaleUpBySecs) { if(delayAutoscaleUpBySecs < 0L) throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleUpBySecs); this.delayAutoscaleUpBySecs = delayAutoscaleUpBySecs; return this; } /** * Delay the autoscale down actions to reduce unnecessary actions due to short periods of breach of scale down * policy rules. Such scale downs can be caused by, for example, certain environments where existing tasks are * removed before replacing them with new tasks. *

* The autoscaler takes the scale down action based on the latest scale down request value after the delay. *

* The default is 0 secs. Ideally, you should set this to be at least two times the delay before terminated * tasks are replaced successfully. * @param delayAutoscaleDownBySecs Delay autoscale down actions by this many seconds. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @throws IllegalArgumentException if you give negative number for {@code delayAutoscalerbySecs}. * @see Autoscaling */ public Builder withDelayAutoscaleDownBySecs(long delayAutoscaleDownBySecs) { if(delayAutoscaleDownBySecs < 0L) throw new IllegalArgumentException("Delay secs can't be negative: " + delayAutoscaleDownBySecs); this.delayAutoscaleDownBySecs = delayAutoscaleDownBySecs; return this; } /** * How long to disable a VM when going through a scale down action. Note that the value used will be the max * between this value and the {@link AutoScaleRule#getCoolDownSecs()} value and that this value should be * greater than the {@link AutoScaleRule#getCoolDownSecs()} value. If the supplied {@link AutoScaleAction} * does not actually terminate the instance in this time frame then the VM will become enabled. This option is useful * when you want to increase the disabled time of a VM because the implementation of the {@link AutoScaleAction} may * take longer than the cooldown period. * * @param disabledVmDurationInSecs Disable VMs about to be terminated by this many seconds. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} * @throws IllegalArgumentException if {@code disabledVmDurationInSecs} is not greater than 0. * @see Autoscaling */ public Builder withAutoscaleDisabledVmDurationInSecs(long disabledVmDurationInSecs) { if(disabledVmDurationInSecs <= 0L) { throw new IllegalArgumentException("disabledVmDurationInSecs must be greater than 0: " + disabledVmDurationInSecs); } this.disabledVmDurationInSecs = disabledVmDurationInSecs; return this; } /** * Indicate that the cluster receives resource offers only once per VM (host). Normally, Mesos sends resource * offers multiple times, as resources free up on the host upon completion of various tasks. This method * provides an experimental support for a mode where Fenzo can be made aware of the entire set of resources * on hosts once, in a model similar to Amazon ECS. Fenzo internally keeps track of total versus used resources * on the host based on tasks assigned and then later unassigned. No further resource offers are expected after * the initial one. * * @param b True if only one resource offer is expected per host, false by default. * @return this same {@code Builder}, suitable for further chaining or to build the {@link TaskScheduler} */ public Builder withSingleOfferPerVM(boolean b) { this.singleOfferMode = b; return this; } /** * Creates a {@link TaskScheduler} based on the various builder methods you have chained. * * @return a {@code TaskScheduler} built according to the specifications you indicated */ public TaskScheduler build() { if(scaleDownOrderEvaluator == null) { if(weightedScaleDownConstraintEvaluators != null) { scaleDownOrderEvaluator = new NoOpScaleDownOrderEvaluator(); } } else { if(weightedScaleDownConstraintEvaluators == null) { weightedScaleDownConstraintEvaluators = Collections.emptyMap(); } } return new TaskScheduler(this); } } private static class EvalResult { List assignmentResults; TaskAssignmentResult result; int numAllocationTrials; Exception exception; private EvalResult(List assignmentResults, TaskAssignmentResult result, int numAllocationTrials, Exception e) { this.assignmentResults = assignmentResults; this.result = result; this.numAllocationTrials = numAllocationTrials; this.exception = e; } } private final AssignableVMs assignableVMs; private static final Logger logger = LoggerFactory.getLogger(TaskScheduler.class); private static final long purgeVMsIntervalSecs = 60; private long lastVMPurgeAt=System.currentTimeMillis(); private final Builder builder; private final StateMonitor stateMonitor; private final AutoScaler autoScaler; private final int EXEC_SVC_THREADS=Runtime.getRuntime().availableProcessors(); private final ExecutorService executorService = Executors.newFixedThreadPool(EXEC_SVC_THREADS); private final AtomicBoolean isShutdown = new AtomicBoolean(); private final ResAllocsEvaluater resAllocsEvaluator; private final TaskTracker taskTracker; private volatile boolean usingSchedulingService = false; private final String usingSchedSvcMesg = "Invalid call when using task scheduling service"; private TaskScheduler(Builder builder) { if(builder.leaseRejectAction ==null) throw new IllegalArgumentException("Lease reject action must be non-null"); this.builder = builder; this.stateMonitor = new StateMonitor(); taskTracker = new TaskTracker(); resAllocsEvaluator = new ResAllocsEvaluater(taskTracker, builder.resAllocs); assignableVMs = new AssignableVMs(taskTracker, builder.leaseRejectAction, builder.leaseOfferExpirySecs, builder.maxOffersToReject, builder.autoScaleByAttributeName, builder.singleOfferMode, builder.autoScaleByAttributeName); if(builder.autoScaleByAttributeName != null && !builder.autoScaleByAttributeName.isEmpty()) { ScaleDownConstraintExecutor scaleDownConstraintExecutor = builder.scaleDownOrderEvaluator == null ? null : new ScaleDownConstraintExecutor(builder.scaleDownOrderEvaluator, builder.weightedScaleDownConstraintEvaluators); autoScaler = new AutoScaler(builder.autoScaleByAttributeName, builder.autoScalerMapHostnameAttributeName, builder.autoScaleDownBalancedByAttributeName, builder.autoScaleRules, assignableVMs, builder.disableShortfallEvaluation, assignableVMs.getActiveVmGroups(), assignableVMs.getVmCollection(), scaleDownConstraintExecutor); if(builder.autoscalerCallback != null) autoScaler.setCallback(builder.autoscalerCallback); if(builder.delayAutoscaleDownBySecs > 0L) autoScaler.setDelayScaleDownBySecs(builder.delayAutoscaleDownBySecs); if(builder.delayAutoscaleUpBySecs > 0L) autoScaler.setDelayScaleUpBySecs(builder.delayAutoscaleUpBySecs); if (builder.disabledVmDurationInSecs > 0L) { autoScaler.setDisabledVmDurationInSecs(builder.disabledVmDurationInSecs); } } else { autoScaler=null; } } void checkIfShutdown() throws IllegalStateException { if(isShutdown.get()) throw new IllegalStateException("TaskScheduler already shutdown"); } /** * Set the autoscale call back action. The callback you pass to this method receives an indication when an * autoscale action is to be performed, telling it which autoscale rule prompted the action and whether the * action is to scale up or scale down the autoscale group. The callback then initiates the appropriate * scaling actions. * * @param callback the callback to invoke for autoscale actions * @throws IllegalStateException if no autoscaler was established * @see Autoscaling */ public void setAutoscalerCallback(Action1 callback) throws IllegalStateException { checkIfShutdown(); if(autoScaler==null) throw new IllegalStateException("No autoScaler setup"); autoScaler.setCallback(callback); } public TaskTracker getTaskTracker() { return taskTracker; } private TaskAssignmentResult getSuccessfulResult(List results) { double bestFitness=0.0; TaskAssignmentResult bestResult=null; for(int r=results.size()-1; r>=0; r--) { // change to using fitness value from assignment result TaskAssignmentResult res = results.get(r); if(res!=null && res.isSuccessful()) { if(bestResult==null || res.getFitness()>bestFitness || (res.getFitness()==bestFitness && res.getHostname().compareTo(bestResult.getHostname())<0)) { bestFitness = res.getFitness(); bestResult = res; } } } return bestResult; } private boolean isGoodEnough(TaskAssignmentResult result) { return builder.isFitnessGoodEnoughFunction.call(result.getFitness()); } /** * Get the current mapping of resource allocations registered with the scheduler. * * @return current mapping of resource allocations * @see Resource Allocation * Limits */ public Map getResAllocs() { return resAllocsEvaluator.getResAllocs(); } /** * Add a new resource allocation, or replace an existing one of the same name. * * @param resAllocs the resource allocation to add or replace * @see Resource Allocation * Limits */ public void addOrReplaceResAllocs(ResAllocs resAllocs) { resAllocsEvaluator.replaceResAllocs(resAllocs); } /** * Remove a resource allocation associated with the specified name. * * @param groupName the name of the resource allocation to remove * @see Resource Allocation * Limits */ public void removeResAllocs(String groupName) { resAllocsEvaluator.remResAllocs(groupName); } /** * Get the autoscale rules currently registered with the scheduler. * * @return a collection of currently registered autoscale rules * @see Autoscaling */ public Collection getAutoScaleRules() { if(autoScaler==null) return Collections.emptyList(); return autoScaler.getRules(); } /** * Add a new autoscale rule to those used by this scheduler. If a rule with the same name exists, it is * replaced. This autoscale rule will be used the next time the scheduler invokes its autoscale action. * * @param rule the autoscale rule to add * @see Autoscaling */ public void addOrReplaceAutoScaleRule(AutoScaleRule rule) { autoScaler.replaceRule(rule); } /** * Remove the autoscale rule associated with the given name from those used by the scheduler. * * @param ruleName name of the autoscale rule to remove * @see Autoscaling */ public void removeAutoScaleRule(String ruleName) { autoScaler.removeRule(ruleName); } /* package */ void setUsingSchedulingService(boolean b) { usingSchedulingService = b; } /* package */ void setTaskToClusterAutoScalerMapGetter(Func1> getter) { if (autoScaler != null) autoScaler.setTaskToClustersGetter(getter); } /* package */ AutoScaler getAutoScaler() { return autoScaler; } /** * Schedule a list of task requests by using any newly-added resource leases in addition to any * previously-unused leases. This is the main scheduling method that attempts to assign resources to task * requests. Resource leases are associated with a host name. A host can have zero or more leases. Leases * that the scheduler does not use in this scheduling run it stores for later use until they expire. *

* You cannot add a lease object with an Id equal to that of a stored lease object; {@code scheduleOnce()} * will throw an {@code IllegalStateException}. Upon throwing this exception, if you provided multiple * leases in the {@code newLeases} argument, the state of internally maintained list of unused leases will * be in an indeterminate state - some of the leases may have been successfully added. *

* The task scheduler rejects any expired leases before scheduling begins. Then, it combines all leases of a * host to determine the total available resources on the host. The scheduler then tries each task request, * in the order that they appear in the given list, for assignment against the available hosts until * successful. For each task the scheduler returns either a successful assignment result, or, a set of * assignment failures. *

* After the scheduler evaluates all assignments, it will reject remaining leases if they are unused and * their offer time is further in the past than lease expiration interval. This prevents the scheduler from * hoarding leases. If you provided an autoscaler, the scheduler then launches autoscale evaluation to run * asynchronously, which runs each registered autoscale rule based on its policy. *

* The successful assignments contain hosts to which tasks have been successfully assigned and the offers for that * host that were used for the assignments. Fenzo removes those offers from its internal state. Normally, you * would use those offers to launch the tasks. For any reason if you do not launch those tasks, you must either * reject the offers to Mesos, or, re-add them to Fenzo with the next call to {@link #scheduleOnce(List, List)}. * Otherwise, those offers would be "leaked out". *

* Unexpected exceptions may arise during scheduling, for example, due to uncaught exceptions in user provided * plugins. The scheduling routine stops upon catching any unexpected exceptions. These exceptions are surfaced to * you in one or both of two ways. *

    *
  • The returned result object will contain the exceptions encountered in * {@link SchedulingResult#getExceptions()}. In this case, no assignments would have been made.
  • *
  • This method may throw {@code IllegalStateException} with its cause set to the uncaught exception. In this * case the internal state of Fenzo will be undefined.
  • *
* If there are exceptions, the internal state of Fenzo may be corrupt with no way to undo any partial effects. * * @param requests a list of task requests to match with resources, in their given order * @param newLeases new resource leases from hosts that the scheduler can use along with any previously * ununsed leases * @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries * @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease * again, or, if there was unexpected exception during the scheduling iteration, or, if using * {@link TaskSchedulingService}, which will instead invoke scheduling from within. Unexpected exceptions * can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown * via the {@link #shutdown()} method. */ public SchedulingResult scheduleOnce( List requests, List newLeases) throws IllegalStateException { if (usingSchedulingService) throw new IllegalStateException(usingSchedSvcMesg); final Iterator iterator = requests != null ? requests.iterator() : Collections.emptyIterator(); TaskIterator taskIterator = new TaskIterator() { @Override public Assignable next() { if (iterator.hasNext()) return Assignable.success(iterator.next()); return null; } }; return scheduleOnce(taskIterator, newLeases); } /** * Variant of {@link #scheduleOnce(List, List)} that takes a task iterator instead of task list. * @param taskIterator Iterator for tasks to assign resources to. * @param newLeases new resource leases from hosts that the scheduler can use along with any previously * ununsed leases * @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries * @throws IllegalStateException if you call this method concurrently, or, if you try to add an existing lease * again, or, if there was unexpected exception during the scheduling iteration. For example, unexpected exceptions * can arise from uncaught exceptions in user defined plugins. It is also thrown if the scheduler has been shutdown * via the {@link #shutdown()} method. */ /* package */ SchedulingResult scheduleOnce( TaskIterator taskIterator, List newLeases) throws IllegalStateException { checkIfShutdown(); try (AutoCloseable ac = stateMonitor.enter()) { return doScheduling(taskIterator, newLeases); } catch (Exception e) { logger.error("Error with scheduling run: " + e.getMessage(), e); if(e instanceof IllegalStateException) throw (IllegalStateException)e; else { logger.warn("Unexpected exception: " + e.getMessage()); throw new IllegalStateException("Unexpected exception during scheduling run: " + e.getMessage(), e); } } } /** * Variant of {@link #scheduleOnce(List, List)} that should be only used to schedule a pseudo iteration as it * ignores the StateMonitor lock. * @param taskIterator Iterator for tasks to assign resources to. * @return a {@link SchedulingResult} object that contains a task assignment results map and other summaries */ /* package */ SchedulingResult pseudoScheduleOnce(TaskIterator taskIterator) throws Exception { return doScheduling(taskIterator, Collections.emptyList()); } private SchedulingResult doScheduling(TaskIterator taskIterator, List newLeases) throws Exception { long start = System.currentTimeMillis(); final SchedulingResult schedulingResult = doSchedule(taskIterator, newLeases); if((lastVMPurgeAt + purgeVMsIntervalSecs*1000) < System.currentTimeMillis()) { lastVMPurgeAt = System.currentTimeMillis(); logger.info("Purging inactive VMs"); assignableVMs.purgeInactiveVMs( // explicitly exclude VMs that have assignments schedulingResult.getResultMap() == null? Collections.emptySet() : new HashSet<>(schedulingResult.getResultMap().keySet()) ); } schedulingResult.setRuntime(System.currentTimeMillis() - start); return schedulingResult; } private SchedulingResult doSchedule( TaskIterator taskIterator, List newLeases) throws Exception { AtomicInteger rejectedCount = new AtomicInteger(); List avms = assignableVMs.prepareAndGetOrderedVMs(newLeases, rejectedCount); if(logger.isDebugEnabled()) logger.debug("Got {} avms", avms.size()); List inactiveAVMs = assignableVMs.getInactiveVMs(); if(logger.isDebugEnabled()) logger.debug("Found {} VMs with non-zero offers to assign from", avms.size()); final boolean hasResAllocs = resAllocsEvaluator.prepare(); //logger.info("Got " + avms.size() + " AVMs to schedule on"); int totalNumAllocations=0; Set failedTasksForAutoScaler = new HashSet<>(); Map resultMap = new HashMap<>(avms.size()); final SchedulingResult schedulingResult = new SchedulingResult(resultMap); if(avms.isEmpty()) { while (true) { final Assignable taskOrFailure = taskIterator.next(); if (taskOrFailure == null) break; failedTasksForAutoScaler.add(taskOrFailure.getTask()); } } else { while (true) { final Assignable taskOrFailure = taskIterator.next(); if(logger.isDebugEnabled()) logger.debug("TaskSched: task=" + (taskOrFailure == null? "null" : taskOrFailure.getTask().getId())); if (taskOrFailure == null) break; if(taskOrFailure.hasFailure()) { schedulingResult.addFailures( taskOrFailure.getTask(), Collections.singletonList(new TaskAssignmentResult( assignableVMs.getDummyVM(), taskOrFailure.getTask(), false, Collections.singletonList(taskOrFailure.getAssignmentFailure()), null, 0 ) )); continue; } TaskRequest task = taskOrFailure.getTask(); failedTasksForAutoScaler.add(task); if(hasResAllocs) { if(resAllocsEvaluator.taskGroupFailed(task.taskGroupName())) { if(logger.isDebugEnabled()) logger.debug("Resource allocation limits reached for task: " + task.getId()); continue; } final AssignmentFailure resAllocsFailure = resAllocsEvaluator.hasResAllocs(task); if(resAllocsFailure != null) { final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(), task, false, Collections.singletonList(resAllocsFailure), null, 0.0)); schedulingResult.addFailures(task, failures); failedTasksForAutoScaler.remove(task); // don't scale up for resAllocs failures if(logger.isDebugEnabled()) logger.debug("Resource allocation limit reached for task " + task.getId() + ": " + resAllocsFailure); continue; } } final AssignmentFailure maxResourceFailure = assignableVMs.getFailedMaxResource(null, task); if(maxResourceFailure != null) { final List failures = Collections.singletonList(new TaskAssignmentResult(assignableVMs.getDummyVM(), task, false, Collections.singletonList(maxResourceFailure), null, 0.0)); schedulingResult.addFailures(task, failures); if(logger.isDebugEnabled()) logger.debug("Task {}: maxResource failure: {}", task.getId(), maxResourceFailure); continue; } // create batches of VMs to evaluate assignments concurrently across the batches final BlockingQueue virtualMachines = new ArrayBlockingQueue<>(avms.size(), false, avms); int nThreads = (int)Math.ceil((double)avms.size()/ PARALLEL_SCHED_EVAL_MIN_BATCH_SIZE); List> futures = new ArrayList<>(); if(logger.isDebugEnabled()) logger.debug("Launching {} threads for evaluating assignments for task {}", nThreads, task.getId()); for(int b=0; b() { @Override public EvalResult call() throws Exception { return evalAssignments(task, virtualMachines); } })); } List results = new ArrayList<>(); List bestResults = new ArrayList<>(); for(Future f: futures) { try { EvalResult evalResult = f.get(); if(evalResult.exception!=null) { logger.warn("Error during concurrent task assignment eval - " + evalResult.exception.getMessage(), evalResult.exception); schedulingResult.addException(evalResult.exception); } else { results.add(evalResult); bestResults.add(evalResult.result); if(logger.isDebugEnabled()) logger.debug("Task {}: best result so far: {}", task.getId(), evalResult.result); totalNumAllocations += evalResult.numAllocationTrials; } } catch (InterruptedException|ExecutionException e) { logger.error("Unexpected during concurrent task assignment eval - " + e.getMessage(), e); } } if(!schedulingResult.getExceptions().isEmpty()) break; TaskAssignmentResult successfulResult = getSuccessfulResult(bestResults); List failures = new ArrayList<>(); if(successfulResult == null) { if(logger.isDebugEnabled()) logger.debug("Task {}: no successful results", task.getId()); for(EvalResult er: results) failures.addAll(er.assignmentResults); schedulingResult.addFailures(task, failures); } else { if(logger.isDebugEnabled()) logger.debug("Task {}: found successful assignment on host {}", task.getId(), successfulResult.getHostname()); successfulResult.assignResult(); failedTasksForAutoScaler.remove(task); } } } List idleResourcesList = new ArrayList<>(); if(schedulingResult.getExceptions().isEmpty()) { List expirableLeases = new ArrayList<>(); for (AssignableVirtualMachine avm : avms) { VMAssignmentResult assignmentResult = avm.resetAndGetSuccessfullyAssignedRequests(); if (assignmentResult == null) { if (!avm.hasPreviouslyAssignedTasks()) idleResourcesList.add(avm.getCurrTotalLease()); expirableLeases.add(avm.getCurrTotalLease()); } else { resultMap.put(avm.getHostname(), assignmentResult); } } // Process inactive VMs List idleInactiveAVMs = inactiveAVMs.stream() .filter(vm -> vm.getCurrTotalLease() != null && !vm.hasPreviouslyAssignedTasks()) .map(AssignableVirtualMachine::getCurrTotalLease) .collect(Collectors.toList()); rejectedCount.addAndGet(assignableVMs.removeLimitedLeases(expirableLeases)); final AutoScalerInput autoScalerInput = new AutoScalerInput(idleResourcesList, idleInactiveAVMs, failedTasksForAutoScaler); if (autoScaler != null) autoScaler.doAutoscale(autoScalerInput); } schedulingResult.setLeasesAdded(newLeases.size()); schedulingResult.setLeasesRejected(rejectedCount.get()); schedulingResult.setNumAllocations(totalNumAllocations); schedulingResult.setTotalVMsCount(assignableVMs.getTotalNumVMs()); schedulingResult.setIdleVMsCount(idleResourcesList.size()); return schedulingResult; } /* package */ Map> createPseudoHosts(Map groupCounts) { return assignableVMs.createPseudoHosts(groupCounts, autoScaler == null? name -> null : autoScaler::getRule); } /* package */ void removePseudoHosts(Map> hostsMap) { assignableVMs.removePseudoHosts(hostsMap); } /* package */ void removePseudoAssignments() { taskTracker.clearAssignedTasks(); // this should suffice for pseudo assignments } /** * Returns the state of resources on all known hosts. You can use this for debugging or informational * purposes (occasionally). This method obtains and holds a lock for the duration of creating the state * information. Scheduling runs are blocked around the lock. * * @return a Map of state information with the hostname as the key and a Map of resource state as the value. * The resource state Map contains a resource as the key and a two element Double array - the first * element of which contains the amount of the resource used and the second element contains the * amount still available (available does not include used). * @see How to Learn Which Resources Are Available on Which Hosts * @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when * using a {@link TaskSchedulingService}. */ public Map> getResourceStatus() throws IllegalStateException { if (usingSchedulingService) throw new IllegalStateException(usingSchedSvcMesg); return getResourceStatusIntl(); } /* package */ Map> getResourceStatusIntl() { try (AutoCloseable ac = stateMonitor.enter()) { return assignableVMs.getResourceStatus(); } catch (Exception e) { logger.error("Unexpected error from state monitor: " + e.getMessage()); throw new RuntimeException(e); } } /** * Returns the current state of all known hosts. You might occasionally use this for debugging or * informational purposes. If you call this method, it will obtain and hold a lock for as long as it takes * to create the state information. Scheduling runs are blocked around the lock. * * @return a list containing the current state of all known VMs * @throws IllegalStateException if called concurrently with {@link #scheduleOnce(List, List)} or if called when * using a {@link TaskSchedulingService}. * @see How to Learn the Amount of Resources Currently Available on Particular Hosts */ public List getVmCurrentStates() throws IllegalStateException { if (usingSchedulingService) throw new IllegalStateException(usingSchedSvcMesg); return getVmCurrentStatesIntl(); } /* package */ List getVmCurrentStatesIntl() throws IllegalStateException { try (AutoCloseable ac = stateMonitor.enter()) { return assignableVMs.getVmCurrentStates(); } catch (Exception e) { logger.error("Unexpected error from state monitor: " + e.getMessage(), e); throw new IllegalStateException(e); } } private EvalResult evalAssignments(TaskRequest task, BlockingQueue virtualMachines) { // This number below sort of controls minimum machines to eval, choose carefully. // Having it too small increases overhead of getting next machine to evaluate on. // Having it too high increases latency of thread before it returns when done try { int N=10; List buf = new ArrayList<>(N); List results = new ArrayList<>(); while(true) { buf.clear(); int n = virtualMachines.drainTo(buf, N); if(n == 0) return new EvalResult(results, getSuccessfulResult(results), results.size(), null); for(int m=0; m * In addition, in your framework's task completion callback that you supply to Mesos, you must call your * task scheduler's {@link #getTaskUnAssigner() getTaskUnassigner().call()} method to notify Fenzo that the * task is no longer assigned. *

* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts * have resource offers available, but also which tasks have previously been assigned and are currently * running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers. *

* Note that you may not call the task assigner action concurrently with * {@link #scheduleOnce(java.util.List, java.util.List) scheduleOnce()}. If you do so, the task assigner * action will throw an {@code IllegalStateException}. * * @return a task assigner action * @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method. */ public Action2 getTaskAssigner() throws IllegalStateException { if (usingSchedulingService) throw new IllegalStateException(usingSchedSvcMesg); return getTaskAssignerIntl(); } /* package */Action2 getTaskAssignerIntl() throws IllegalStateException { return new Action2() { @Override public void call(TaskRequest request, String hostname) { try (AutoCloseable ac = stateMonitor.enter()) { assignableVMs.setTaskAssigned(request, hostname); } catch (Exception e) { logger.error("Unexpected error from state monitor: " + e.getMessage(), e); throw new IllegalStateException(e); } } }; } /** * Get the task unassigner action. Call this object's {@code call()} method to unassign an assignment you * have previously set for each task that completes so that internal state is maintained correctly. Pass two * String arguments to this call method: the taskId and the hostname. *

* For each task you assign and launch, you must call your task scheduler's * {@link #getTaskAssigner() getTaskAssigner().call()} method in order to notify Fenzo that the task has * actually been deployed on a host. *

* In addition, in your framework's task completion callback that you supply to Mesos, you must call your * task scheduler's {@code getTaskUnassigner().call()} method to notify Fenzo that the * task is no longer assigned. *

* Some scheduling optimizers need to know not only which tasks are waiting to be scheduled and which hosts * have resource offers available, but also which tasks have previously been assigned and are currently * running on hosts. These two methods help Fenzo provide this information to these scheduling optimizers. *

* This method is safe to be called concurrently with other calls to {@code TaskScheduler}. The tasks to be * unassigned are stored internally and actually unassigned at the beginning of the next scheduling iteration, * that is, the next time {@link #scheduleOnce(List, List)} is called. * * @return the task un-assigner action * @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method. */ public Action2 getTaskUnAssigner() throws IllegalStateException { return new Action2() { @Override public void call(String taskId, String hostname) { assignableVMs.unAssignTask(taskId, hostname); } }; } /** * Disable the virtual machine with the specified hostname. If the scheduler is not yet aware of the host * with that hostname, it creates a new object for it, and therefore your disabling of it will be remembered * when offers that concern that host come in later. The scheduler will not use disabled hosts for * allocating resources to tasks. * * @param hostname the name of the host to disable * @param durationMillis the length of time, starting from now, in milliseconds, during which the host will * be disabled * @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method. */ public void disableVM(String hostname, long durationMillis) throws IllegalStateException { logger.info("Disable VM " + hostname + " for " + durationMillis + " millis"); assignableVMs.disableUntil(hostname, System.currentTimeMillis()+durationMillis); } /** * Disable the virtual machine with the specified ID. If the scheduler is not yet aware of the host with * that hostname, it creates a new object for it, and therefore your disabling of it will be remembered when * offers that concern that host come in later. The scheduler will not use disabled hosts for allocating * resources to tasks. * * @param vmID the ID of the host to disable * @param durationMillis the length of time, starting from now, in milliseconds, during which the host will * be disabled * @return {@code true} if the ID matches a known VM, {@code false} otherwise. * @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method. */ public boolean disableVMByVMId(String vmID, long durationMillis) throws IllegalStateException { final String hostname = assignableVMs.getHostnameFromVMId(vmID); if(hostname == null) return false; disableVM(hostname, durationMillis); return true; } /** * Enable the VM with the specified host name. Hosts start in an enabled state, so you only need to call * this method if you have previously explicitly disabled the host. * * @param hostname the name of the host to enable * @throws IllegalStateException if the scheduler is shutdown via the {@link #isShutdown} method. */ public void enableVM(String hostname) throws IllegalStateException { logger.info("Enabling VM " + hostname); assignableVMs.enableVM(hostname); } /** * Set how the scheduler determines to which group the VM (host) belongs. You can group hosts. Which group a * host belongs to is determined by the value of a particular attribute in its offers. You can set which * attribute defines group membership by naming it in this method. * * @param attributeName the name of the attribute that determines a VM's group */ public void setActiveVmGroupAttributeName(String attributeName) { assignableVMs.setActiveVmGroupAttributeName(attributeName); } /** * Set the list of VM group names that are active. VMs (hosts) that belong to groups that you do not include * in this list are said to be disabled. The scheduler does not use the resources of disabled hosts when it * allocates tasks. If you pass in a null list, this indicates that the scheduler should consider all groups * to be enabled. * * @param vmGroups a list of VM group names that the scheduler is to consider to be enabled, or {@code null} * if the scheduler is to consider every group to be enabled */ public void setActiveVmGroups(List vmGroups) { assignableVMs.setActiveVmGroups(vmGroups); } /** * Mark task scheduler as shutdown and shutdown any thread pool executors created. */ public void shutdown() { if(isShutdown.compareAndSet(false, true)) { executorService.shutdown(); if(autoScaler != null) autoScaler.shutdown(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy