All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nextflow.executor.SlurmExecutor.groovy Maven / Gradle / Ivy

Go to download

A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner

The newest version!
/*
 * Copyright 2013-2024, Seqera Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nextflow.executor

import java.nio.file.Path
import java.util.regex.Pattern

import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.fusion.FusionHelper
import nextflow.processor.TaskArrayRun
import nextflow.processor.TaskConfig
import nextflow.processor.TaskRun
/**
 * Processor for SLURM resource manager
 *
 * See http://computing.llnl.gov/linux/slurm/
 *
 *
 * @author Paolo Di Tommaso 
 */
@Slf4j
@CompileStatic
class SlurmExecutor extends AbstractGridExecutor implements TaskArrayExecutor {

    static private Pattern SUBMIT_REGEX = ~/Submitted batch job (\d+)/

    private boolean perCpuMemAllocation

    private boolean hasSignalOpt(TaskConfig config) {
        final opts = config.getClusterOptionsAsString()
        return opts ? opts.contains('--signal ') || opts.contains('--signal=') : false
    }

    /**
     * Gets the directives to submit the specified task to the cluster for execution
     *
     * @param task A {@link TaskRun} to be submitted
     * @param result The {@link List} instance to which add the job directives
     * @return A {@link List} containing all directive tokens and values.
     */
    protected List getDirectives(TaskRun task, List result) {

        if( task instanceof TaskArrayRun ) {
            final arraySize = task.getArraySize()
            result << '--array' << "0-${arraySize - 1}".toString()
        }

        result << '-J' << getJobNameFor(task)

        // -o OUTFILE and no -e option => stdout and stderr merged to stdout/OUTFILE
        result << '-o' << (task.isArray() ? '/dev/null' : quote(task.workDir.resolve(TaskRun.CMD_LOG)))

        result << '--no-requeue' << '' // note: directive need to be returned as pairs

        if( !hasSignalOpt(task.config) ) {
            // see https://github.com/nextflow-io/nextflow/issues/2163
            // and https://slurm.schedmd.com/sbatch.html#OPT_signal
            result << '--signal' << 'B:USR2@30'
        }

        if( task.config.getCpus() > 1 ) {
            result << '-c' << task.config.getCpus().toString()
        }

        if( task.config.getTime() ) {
            result << '-t' << task.config.getTime().format('HH:mm:ss')
        }

        if( task.config.getMemory() ) {
            //NOTE: Enforcement of memory limits currently relies upon the task/cgroup plugin or
            // enabling of accounting, which samples memory use on a periodic basis (data need not
            // be stored, just collected). In both cases memory use is based upon the job's
            // Resident Set Size (RSS). A task may exceed the memory limit until the next periodic
            // accounting sample. -- https://slurm.schedmd.com/sbatch.html
            final mem = task.config.getMemory().toMega()
            if( perCpuMemAllocation )
                result << '--mem-per-cpu' << mem.intdiv(task.config.getCpus()).toString() + 'M'
            else
                result << '--mem' << mem.toString() + 'M'
        }

        // the requested partition (a.k.a queue) name
        if( task.config.queue ) {
            result << '-p' << (task.config.queue.toString())
        }

        // -- at the end append the command script wrapped file name
        addClusterOptionsDirective(task.config, result)

        // add slurm account from config
        final account = session.getExecConfigProp(getName(), 'account', null) as String
        if( account ) {
            result << '-A' << account
        }

        return result
    }

    String getHeaderToken() { '#SBATCH' }

    /**
     * The command line to submit this job
     *
     * @param task The {@link TaskRun} instance to submit for execution to the cluster
     * @param scriptFile The file containing the job launcher script
     * @return A list representing the submit command line
     */
    @Override
    List getSubmitCommandLine(TaskRun task, Path scriptFile ) {
        return pipeLauncherScript()
                ? List.of('sbatch')
                : List.of('sbatch', scriptFile.getName())
    }

    /**
     * Parse the string returned by the {@code sbatch} command and extract the job ID string
     *
     * @param text The string returned when submitting the job
     * @return The actual job ID string
     */
    @Override
    def parseJobId(String text) {

        for( String line : text.readLines() ) {
            def m = SUBMIT_REGEX.matcher(line)
            if( m.find() ) {
                return m.group(1).toString()
            }
        }

        // customised `sbatch` command can return only the jobid
        def id = text.trim()
        if( id.isLong() )
            return id

        throw new IllegalStateException("Invalid SLURM submit response:\n$text\n\n")
    }

    @Override
    protected List getKillCommand() { ['scancel'] }

    @Override
    protected List queueStatusCommand(Object queue) {

        final result = ['squeue','--noheader','-o','%i %t', '-t', 'all']

        if( queue )
            result << '-p' << queue.toString()

        final user = System.getProperty('user.name')
        if( user )
            result << '-u' << user
        else
            log.debug "Cannot retrieve current user"

        return result
    }

    /*
     *  Maps SLURM job status to nextflow status
     *  see http://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES
     */
    static private Map STATUS_MAP = [
            'PD': QueueStatus.PENDING,  // (pending)
            'R': QueueStatus.RUNNING,   // (running)
            'CA': QueueStatus.ERROR,    // (cancelled)
            'CF': QueueStatus.PENDING,  // (configuring)
            'CG': QueueStatus.RUNNING,  // (completing)
            'CD': QueueStatus.DONE,     // (completed)
            'F': QueueStatus.ERROR,     // (failed),
            'TO': QueueStatus.ERROR,    // (timeout),
            'NF': QueueStatus.ERROR,    // (node failure)
            'S': QueueStatus.HOLD,      // (job suspended)
            'ST': QueueStatus.HOLD,     // (stopped)
            'PR': QueueStatus.ERROR,    // (Job terminated due to preemption)
            'BF': QueueStatus.ERROR,    // (boot fail, Job terminated due to launch failure)
    ]

    @Override
    protected Map parseQueueStatus(String text) {

        final result = new LinkedHashMap()

        text.eachLine { String line ->
            def cols = line.split(/\s+/)
            if( cols.size() == 2 ) {
                result.put( cols[0], STATUS_MAP.get(cols[1]) )
            }
            else {
                log.debug "[SLURM] invalid status line: `$line`"
            }
        }

        return result
    }

    @Override
    void register() {
        super.register()
        perCpuMemAllocation = session.getExecConfigProp(name, 'perCpuMemAllocation', false)
    }

    @Override
    protected boolean pipeLauncherScript() {
        return isFusionEnabled()
    }

    @Override
    boolean isFusionEnabled() {
        return FusionHelper.isFusionEnabled(session)
    }

    @Override
    String getArrayIndexName() {
        return 'SLURM_ARRAY_TASK_ID'
    }

    @Override
    int getArrayIndexStart() {
        return 0
    }

    @Override
    String getArrayTaskId(String jobId, int index) {
        assert jobId, "Missing 'jobId' argument"
        return "${jobId}_${index}"
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy