nextflow.scheduler.Autoscaler.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nf-ignite Show documentation
Show all versions of nf-ignite Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner(forked from nextflow.io)
The newest version!
/*
* Copyright 2013-2019, Centre for Genomic Regulation (CRG)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.scheduler
import java.util.concurrent.ConcurrentLinkedQueue
import java.util.concurrent.Executors
import java.util.concurrent.ScheduledExecutorService
import java.util.concurrent.TimeUnit
import groovy.transform.PackageScope
import groovy.util.logging.Slf4j
import nextflow.cloud.CloudConfig
import nextflow.cloud.CloudDriver
import nextflow.cloud.CloudDriverFactory
import nextflow.cloud.types.CloudInstanceStatus
import nextflow.executor.IgBaseTask
import nextflow.processor.TaskId
import nextflow.scheduler.Protocol.NodeData
import nextflow.scheduler.Protocol.TaskHolder
import nextflow.util.Duration
import org.apache.ignite.Ignite
/**
* Implements an auto-scaling policy which adds new instances
* under scheduler request pressure and remove when those
* instances are idle
*
* @author Paolo Di Tommaso
*/
@Slf4j
class Autoscaler implements Closeable {
/**
* The cloud driver name e.g. `aws`
*/
private String driverName
/**
* The reference to the cloud driver
*/
private CloudDriver driver
/**
* The reference to the underlying Ignite cluster
*/
private Ignite ignite
/**
* Map each node UUID to the associated host name
*/
private Map workerNodes
/**
* Map of scheduled tasks not yet completed
*/
private Map scheduledTasks
private ScheduledExecutorService watchdog
/**
* The auto-scaler configuration object
*/
private CloudConfig.Autoscale scalerConfig
/**
* Whenever the auto-scaling is enabled
*/
private volatile boolean enabled
/**
* The list of remaining instances requested by the auto-scaler
* pending to join the cluster
*/
private volatile Queue pendingInstanceIds
private Map canRunTaskCache = new HashMap<>()
private long waitingTimestamp
/**
* Creates the auto-scaler object
*
* @param ignite Reference to the underlying {@link Ignite} cluster
* @param config A {@link CloudConfig} object holding the cloud configuration
*/
Autoscaler(Ignite ignite, CloudConfig config) {
this.ignite = ignite
this.scalerConfig = config.getAutoscale()
this.driverName = config.getDriverName()
this.enabled = scalerConfig.enabled
log.debug "### Auto-scaling enabled: $enabled"
}
/**
* ONLY FOR TESTING PURPOSE
*/
protected Autoscaler() { }
/**
* Initialize the auto-scaling policy
*
* @param nodes The map holding the current cluster topology
* @param tasks The map of scheduled tasks
*/
@PackageScope
void init(Map nodes, Map tasks) {
this.workerNodes = nodes
this.scheduledTasks = tasks
// -- init the cloud driver
if( enabled ) {
def driverName = driverName ?: CloudDriverFactory.getDefaultDriverName()
if( !driverName )
throw new IllegalStateException("No cloud driver name has been specified")
driver = CloudDriverFactory.getDriver(driverName)
if( !driver ) throw new IllegalStateException("Cannot load cloud driver: `$driverName`")
driver.validate(scalerConfig)
}
// --
this.watchdog = Executors.newScheduledThreadPool(1)
this.watchdog.scheduleWithFixedDelay(this.&clusterWatchdog as Runnable, 1, 1, TimeUnit.MINUTES)
}
/**
* Method invoked when a new node join the cluster. If it's an
* instance requested by the auto-scaler, remove its ID from the list on
* pending instances to be launched. Once no more instances and pending
* it re-enable the auto-scaler
*
* @param data A {@link NodeData} object representing the new node
*/
@PackageScope
void onNodeStart(NodeData data) {
if( !pendingInstanceIds )
// nothing to do
return
def found = pendingInstanceIds.remove(data.instanceId)
if( found ) {
log.debug "### Autoscale node joined the cluster [${data.hostName}] instance-id=$data.instanceId -- Still missing ${pendingInstanceIds.size()} instances"
if( pendingInstanceIds.isEmpty() ) {
enabled = true
}
}
}
/**
* Implements the auto-scaler main tasks. This method is invoked periodically every minute
*/
@PackageScope
void clusterWatchdog() {
try {
checkClusterSize()
checkStarvingTasks()
checkIdleNodes()
checkPendingInstances()
}
catch (InterruptedException e) {
log.debug "Shutdown in progress [InterruptedException]"
}
catch( Throwable e ) {
log.debug "### Oops.. Something went wrong", e
}
}
/**
* @return the current number of nodes that made-up the cloud cluster
*/
@PackageScope
int getClusterSize() {
return workerNodes.size()
}
/**
* Check that the cluster size is within its min - max size boundaries
*/
@PackageScope
void checkClusterSize() {
if( !enabled ) return
def currentSize = getClusterSize()
if( currentSize < scalerConfig.minInstances ) {
// -- add instances as needed to reach the `minInstances` count
def missingInstances = scalerConfig.minInstances - currentSize
log.debug "### Adding $missingInstances instance(s) to cloud cluster -- current-size: $currentSize; min-size: ${scalerConfig.minInstances}; "
requestNewInstances(missingInstances)
}
}
/**
* Launch new cloud instances if tasks are not executed after a specified amount of time
*/
@PackageScope
void checkStarvingTasks() {
// find all waiting tasks i.e. not marked as `started`
final timeout = scalerConfig.starvingTimeout
final type = scalerConfig.instanceType
Collection waiting = scheduledTasks.values().findAll{ !it.started }
// no task waiting -- reset the timestamp
if( !waiting ) {
log.trace "### No tasks waiting for execution"
waitingTimestamp = 0
return
}
// some tasks are waiting -- set the timestamp
if( !waitingTimestamp ) {
waitingTimestamp = System.currentTimeMillis()
}
def delta = System.currentTimeMillis() - waitingTimestamp
log.trace "### Tasks waiting for execution: count=${waiting.size()}; delta=${Duration.of(delta)}; timeout=${timeout}"
if( delta < timeout.millis ) {
return
}
// make sure these tasks can run on the instance type chosen
// and count the number of required cpus
List tasks = []
int missingCpus = 0
def itr = waiting.iterator()
while( itr.hasNext() ) {
def it = itr.next()
if( it.isWaitingMoreThan(timeout) )
canRunOnExistingNodes(it.task)
def result = enabled && canRunOnNewInstance(it.task, type)
if( result ) {
missingCpus += it.task.getResources().cpus
tasks << it.task.taskId
}
}
// cleanup the cache
canRunTaskCache.clear()
if( missingCpus && enabled ) {
log.debug "### The following tasks have been waiting for more than $timeout -- required cpus=$missingCpus; taskIds=${tasks.join(',')}"
requestNewCpus(missingCpus)
}
}
/**
* Request a specific amount of cpus spinning new instances in the cloud, making sure to not
* overcome the current max instances limit
*
* @param cpus The number of cpus requested
*/
@PackageScope
void requestNewCpus( int cpus ) {
def type = driver.describeInstanceType(scalerConfig.instanceType)
if( !type ) {
log.warn "### Can't find a instance type description: ${scalerConfig.instanceType}"
return
}
def nodeNeeded = (int)Math.ceil( cpus / type.cpus )
def currentSize = getClusterSize()
def maxSize = scalerConfig.getMaxInstances()
if( currentSize >= maxSize ) {
log.debug "### Can't grow the cluster more, current size reached the cluster limit -- missing-cpus: $cpus; node-needed: $nodeNeeded; current-size: $currentSize; max-size: ${maxSize}"
return
}
def num = Math.min( nodeNeeded, maxSize-currentSize )
log.debug "### Requesting $num instance(s) of type: $type -- missing-cpus: $cpus; node-needed: $nodeNeeded; current-size: $currentSize; max-size: ${maxSize}"
requestNewInstances(num)
}
/**
* Submit the request for new cloud instances
*
* @param num The number of instances requested
*/
@PackageScope
void requestNewInstances( int num ) {
// -- launch the requested nodes
def ids = driver.launchInstances(num, scalerConfig)
// -- disable cluster auto-scaling until all instance have joined the cluster
this.enabled = false
this.pendingInstanceIds = new ConcurrentLinkedQueue<>(ids)
// -- tag the instance
driver.waitInstanceStatus(ids, CloudInstanceStatus.STARTED)
driver.tagInstances(ids, scalerConfig)
}
/**
* Check if task resource requirements are fulfilled by a specified cloud instance type
*
* @param task A {@link IgBaseTask} instance modeling the task resources request
* @param instanceType The instance type identifier e.g. {@code m4.xlarge}
* @return {@code true} when the task can be execute in the specified instance type, {@code false} otherwise
*/
@PackageScope
boolean canRunOnNewInstance( IgBaseTask task, String instanceType ) {
final taskId = task.getTaskId()
final req = task.getResources()
if( req.cpus == 1 && !req.memory )
return true
// -- make sure that tasks can be fulfilled by the new instance
def total = driver.describeInstanceType(instanceType)
if( !total ) {
log.warn "### Unknown instance type: $instanceType"
return false
}
if( req.cpus > total.cpus ) {
log.warn("### Task (id=${taskId}) exceed the number of CPUs provided by autoscaling instance type: ${instanceType} -- req: ${req.cpus}; provided: ${total.cpus}")
return false
}
if( req.memory && req.memory > total.memory ) {
log.warn("### Task (id=${taskId}) exceed the amount of memory provided by autoscaling instance type: ${instanceType} -- req: ${req.memory}; provided: ${total.memory}")
return false
}
return true
}
/**
* Check a given task with some resources request can be executed in
* any node in the current cluster topology
*
* @param task
* A {@link IgBaseTask} instance modeling the task requesting some computation resources
* @return
* {@code 0} there's no free resources in the current cluster topology to fulfill the specified task
* {@code 1} there's at least one node with enough free resources to execute the task, and
* {@code 2} there's no node with enough to execute the task
*/
@PackageScope
byte canRunOnExistingNodes( IgBaseTask task ) {
final taskId = task.getTaskId()
final req = task.getResources()
// keep the result in a local cache to avoid to make too many
// queries over the distributed cache
def key = (List)[ req.cpus, req.memory ]
def found = canRunTaskCache.get(key)
if( found != null )
return found
def overflow = true
def fulfil = false
try {
def itr = workerNodes.values().iterator()
while( itr.hasNext() ) {
def node = itr.next()
def total = node.resources
if( req.cpus <= total.cpus && ( !req.memory || req.memory <= total.memory) ) {
// at least one instance has enough resources => reset the `overflow` flag
overflow = false
}
def free = node.free
if( free && req.cpus <= free.cpus && (!req.memory || req.memory <= free.memory)) {
// at least one instance has enough free resources => set `fulfil` flag and exit
fulfil = true
break
}
}
}
catch ( Exception e ) {
log.debug "### Oops.. Cannot establish resources availability: taskId=$taskId", e
}
if( overflow ) {
log.warn "### Task (id=${taskId}) requests an amount of resources not available in any node in the current cluster topology -- CPUs: ${req.cpus}; memory: ${req.memory?:'-'}"
}
byte result = fulfil ? 1 : ( overflow ? 2 : 0 )
canRunTaskCache.put(key, result)
return result
}
/**
* @return The ID of the local cluster node
*/
@PackageScope getLocalNodeId() {
ignite.cluster().localNode().id()
}
@PackageScope void checkIdleNodes() {
if( !enabled || !scalerConfig.terminateWhenIdle )
return
final timeout = scalerConfig.getIdleTimeout()
final idleNodeIds = []
final itr = workerNodes.values().iterator()
while( itr.hasNext() ) {
final node = itr.next()
try {
if( node.isIdle(timeout) ) {
log.debug "### Idle node detected: $node"
idleNodeIds << node.nodeId
}
}
catch( Exception e ) {
log.debug "### Oops.. Failed to check idle node info: $node -- Cause: ${e.message ?: e}"
}
}
// -- do not commit a suicide, remove this node
def local = getLocalNodeId()
idleNodeIds.remove(local)
if( !idleNodeIds )
return
def killList = applyTerminationPolicy(idleNodeIds)
if( killList ) {
killNodes(killList)
}
}
@PackageScope List applyTerminationPolicy( Collection idleNodeIds) {
def killList = new ArrayList(idleNodeIds.size())
def itr = idleNodeIds.iterator()
while( itr.hasNext() ) {
final nodeId = itr.next()
final data = workerNodes.get(nodeId)
if( !data.instanceId ) {
log.debug "### Oops.. Missing cloud instance id for node: [${data.hostName}] $nodeId -- Ignore termination request"
continue
}
killList << data.instanceId
}
return killList
}
@PackageScope void killNodes(List killList) {
// avoid to terminate too many instances
final instanceIds = ensureMinSize(killList)
log.debug "### Killing instances: ids=${instanceIds.join(', ')}"
requestTerminateInstances(instanceIds)
}
@PackageScope void requestTerminateInstances( List instanceIds ) {
driver.terminateInstances(instanceIds)
driver.waitInstanceStatus(instanceIds, CloudInstanceStatus.TERMINATED)
}
@PackageScope
List ensureMinSize( List killList ) {
if( killList.size() >= clusterSize )
throw new IllegalStateException("Can't kill all cluster nodes")
int newSize = clusterSize - killList.size()
def delta = newSize - scalerConfig.minInstances
if( delta<0 ) {
int last = delta-1
return killList[0..last]
}
return killList
}
/**
* Verifies periodically the status of pending instances requested
* by the auto-scaler
*/
@PackageScope void checkPendingInstances() {
//TODO
}
/**
* Shutdown the auto-scaler thread
*/
@Override
void close() throws IOException {
watchdog.shutdownNow()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy