nextflow.executor.SlurmExecutor.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nextflow Show documentation
Show all versions of nextflow Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner
The newest version!
/*
* Copyright 2013-2024, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.executor
import java.nio.file.Path
import java.util.regex.Pattern
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.fusion.FusionHelper
import nextflow.processor.TaskArrayRun
import nextflow.processor.TaskConfig
import nextflow.processor.TaskRun
/**
* Processor for SLURM resource manager
*
* See http://computing.llnl.gov/linux/slurm/
*
*
* @author Paolo Di Tommaso
*/
@Slf4j
@CompileStatic
class SlurmExecutor extends AbstractGridExecutor implements TaskArrayExecutor {
static private Pattern SUBMIT_REGEX = ~/Submitted batch job (\d+)/
private boolean perCpuMemAllocation
private boolean hasSignalOpt(TaskConfig config) {
final opts = config.getClusterOptionsAsString()
return opts ? opts.contains('--signal ') || opts.contains('--signal=') : false
}
/**
* Gets the directives to submit the specified task to the cluster for execution
*
* @param task A {@link TaskRun} to be submitted
* @param result The {@link List} instance to which add the job directives
* @return A {@link List} containing all directive tokens and values.
*/
protected List getDirectives(TaskRun task, List result) {
if( task instanceof TaskArrayRun ) {
final arraySize = task.getArraySize()
result << '--array' << "0-${arraySize - 1}".toString()
}
result << '-J' << getJobNameFor(task)
// -o OUTFILE and no -e option => stdout and stderr merged to stdout/OUTFILE
result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG)))
result << '--no-requeue' << '' // note: directive need to be returned as pairs
if( !hasSignalOpt(task.config) ) {
// see https://github.com/nextflow-io/nextflow/issues/2163
// and https://slurm.schedmd.com/sbatch.html#OPT_signal
result << '--signal' << 'B:USR2@30'
}
if( task.config.getCpus() > 1 ) {
result << '-c' << task.config.getCpus().toString()
}
if( task.config.getTime() ) {
result << '-t' << task.config.getTime().format('HH:mm:ss')
}
if( task.config.getMemory() ) {
//NOTE: Enforcement of memory limits currently relies upon the task/cgroup plugin or
// enabling of accounting, which samples memory use on a periodic basis (data need not
// be stored, just collected). In both cases memory use is based upon the job's
// Resident Set Size (RSS). A task may exceed the memory limit until the next periodic
// accounting sample. -- https://slurm.schedmd.com/sbatch.html
final mem = task.config.getMemory().toMega()
if( perCpuMemAllocation )
result << '--mem-per-cpu' << mem.intdiv(task.config.getCpus()).toString() + 'M'
else
result << '--mem' << mem.toString() + 'M'
}
// the requested partition (a.k.a queue) name
if( task.config.queue ) {
result << '-p' << (task.config.queue.toString())
}
// -- at the end append the command script wrapped file name
addClusterOptionsDirective(task.config, result)
// add slurm account from config
final account = session.getExecConfigProp(getName(), 'account', null) as String
if( account ) {
result << '-A' << account
}
return result
}
String getHeaderToken() { '#SBATCH' }
/**
* The command line to submit this job
*
* @param task The {@link TaskRun} instance to submit for execution to the cluster
* @param scriptFile The file containing the job launcher script
* @return A list representing the submit command line
*/
@Override
List getSubmitCommandLine(TaskRun task, Path scriptFile ) {
return pipeLauncherScript()
? List.of('sbatch')
: List.of('sbatch', scriptFile.getName())
}
/**
* Parse the string returned by the {@code sbatch} command and extract the job ID string
*
* @param text The string returned when submitting the job
* @return The actual job ID string
*/
@Override
def parseJobId(String text) {
for( String line : text.readLines() ) {
def m = SUBMIT_REGEX.matcher(line)
if( m.find() ) {
return m.group(1).toString()
}
}
// customised `sbatch` command can return only the jobid
def id = text.trim()
if( id.isLong() )
return id
throw new IllegalStateException("Invalid SLURM submit response:\n$text\n\n")
}
@Override
protected List getKillCommand() { ['scancel'] }
@Override
protected List queueStatusCommand(Object queue) {
final result = ['squeue','--noheader','-o','%i %t', '-t', 'all']
if( queue )
result << '-p' << queue.toString()
final user = System.getProperty('user.name')
if( user )
result << '-u' << user
else
log.debug "Cannot retrieve current user"
return result
}
/*
* Maps SLURM job status to nextflow status
* see http://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES
*/
static private Map STATUS_MAP = [
'PD': QueueStatus.PENDING, // (pending)
'R': QueueStatus.RUNNING, // (running)
'CA': QueueStatus.ERROR, // (cancelled)
'CF': QueueStatus.PENDING, // (configuring)
'CG': QueueStatus.RUNNING, // (completing)
'CD': QueueStatus.DONE, // (completed)
'F': QueueStatus.ERROR, // (failed),
'TO': QueueStatus.ERROR, // (timeout),
'NF': QueueStatus.ERROR, // (node failure)
'S': QueueStatus.HOLD, // (job suspended)
'ST': QueueStatus.HOLD, // (stopped)
'PR': QueueStatus.ERROR, // (Job terminated due to preemption)
'BF': QueueStatus.ERROR, // (boot fail, Job terminated due to launch failure)
]
@Override
protected Map parseQueueStatus(String text) {
final result = new LinkedHashMap()
text.eachLine { String line ->
def cols = line.split(/\s+/)
if( cols.size() == 2 ) {
result.put( cols[0], STATUS_MAP.get(cols[1]) )
}
else {
log.debug "[SLURM] invalid status line: `$line`"
}
}
return result
}
@Override
void register() {
super.register()
perCpuMemAllocation = session.getExecConfigProp(name, 'perCpuMemAllocation', false)
}
@Override
protected boolean pipeLauncherScript() {
return isFusionEnabled()
}
@Override
boolean isFusionEnabled() {
return FusionHelper.isFusionEnabled(session)
}
@Override
String getArrayIndexName() {
return 'SLURM_ARRAY_TASK_ID'
}
@Override
int getArrayIndexStart() {
return 0
}
@Override
String getArrayTaskId(String jobId, int index) {
assert jobId, "Missing 'jobId' argument"
return "${jobId}_${index}"
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy