All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nextflow.processor.TaskArrayCollector.groovy Maven / Gradle / Ivy

Go to download

A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner

There is a newer version: 24.11.0-edge
Show newest version
/*
 * Copyright 2013-2023, Seqera Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nextflow.processor

import java.nio.file.Files
import java.util.concurrent.locks.Lock
import java.util.concurrent.locks.ReentrantLock

import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.executor.Executor
import nextflow.executor.TaskArrayExecutor
import nextflow.file.FileHelper
import nextflow.util.CacheHelper
import nextflow.util.Escape
/**
 * Task monitor that batches tasks and submits them as job arrays
 * to an underlying task monitor.
 *
 * @author Ben Sherman 
 */
@Slf4j
@CompileStatic
class TaskArrayCollector {

    /**
     * The set of directives which are used by the job array.
     */
    private static final List ARRAY_DIRECTIVES = [
            'accelerator',
            'arch',
            'clusterOptions',
            'cpus',
            'disk',
            'machineType',
            'memory',
            'queue',
            'resourceLabels',
            'resourceLimits',
            'time',
            // only needed for container-native executors and/or Fusion
            'container',
            'containerOptions',
    ]

    private TaskProcessor processor

    private TaskArrayExecutor executor

    private int arraySize

    private Lock sync = new ReentrantLock()

    private List array

    private boolean closed = false

    TaskArrayCollector(TaskProcessor processor, Executor executor, int arraySize) {
        if( executor !instanceof TaskArrayExecutor )
            throw new IllegalArgumentException("Executor '${executor.name}' does not support job arrays")

        this.processor = processor
        this.executor = (TaskArrayExecutor)executor
        this.arraySize = arraySize
        this.array = new ArrayList<>(arraySize)
    }

    /**
     * Add a task to the current array, and submit the array when it
     * reaches the desired size.
     *
     * @param task
     */
    void collect(TaskRun task) {
        sync.lock()
        try {
            // submit task directly if the collector is closed
            // or if the task is retried (since it might have dynamic resources)
            if( closed || task.config.getAttempt() > 1 ) {
                executor.submit(task)
                return
            }

            // add task to the array
            array.add(task)

            // submit job array when it is ready
            if( array.size() == arraySize ) {
                executor.submit(createTaskArray(array))
                array = new ArrayList<>(arraySize)
            }
        }
        finally {
            sync.unlock()
        }
    }

    /**
     * Close the collector, submitting any remaining tasks as a partial job array.
     */
    void close() {
        sync.lock()
        try {
            if( array.size() == 1 ) {
                executor.submit(array.first())
            }
            else if( array.size() > 0 ) {
                executor.submit(createTaskArray(array))
                array = null
            }
            closed = true
        }
        finally {
            sync.unlock()
        }
    }

    /**
     * Create the task run for a job array.
     *
     * @param tasks
     */
    protected TaskArrayRun createTaskArray(List tasks) {
        // prepare child job launcher scripts
        final handlers = tasks.collect( t -> executor.createTaskHandler(t) )
        for( TaskHandler handler : handlers ) {
            handler.prepareLauncher()
        }

        // create work directory
        final hash = CacheHelper.hasher( tasks.collect( t -> t.getHash().asLong() ) ).hash()
        final workDir = FileHelper.getWorkFolder(executor.getWorkDir(), hash)
        Files.createDirectories(workDir)

        // create wrapper script
        final script = createArrayTaskScript(handlers)
        log.debug "Creating task array run >> $workDir\n$script"
        
        // create config for job array
        final rawConfig = new HashMap(ARRAY_DIRECTIVES.size())
        for( final key : ARRAY_DIRECTIVES ) {
            final value = processor.config.get(key)
            if( value != null )
                rawConfig[key] = value
        }

        // create job array
        final first = tasks.min( t -> t.index )
        final taskArray = new TaskArrayRun(
            id: first.id,
            index: first.index,
            processor: processor,
            type: processor.taskBody.type,
            config: new TaskConfig(rawConfig),
            context: new TaskContext(processor),
            hash: hash,
            workDir: workDir,
            script: script,
            children: handlers
        )
        taskArray.config.context = taskArray.context
        taskArray.config.process = taskArray.processor.name
        taskArray.config.executor = taskArray.processor.executor.name

        return taskArray
    }

    /**
     * Create the wrapper script for a job array.
     *
     * @param array
     */
    protected String createArrayTaskScript(List array) {
        // get work directory and launch command for each task
        final workDirs = array.collect( h -> executor.getArrayWorkDir(h) )
        """
        array=( ${workDirs.collect( p -> Escape.path(p) ).join(' ')} )
        export nxf_array_task_dir=${getArrayIndexRef()}
        ${executor.getArrayLaunchCommand('$nxf_array_task_dir')}
        """.stripIndent().leftTrim()
    }

    protected String getArrayIndexRef() {
        final name = executor.getArrayIndexName()
        final start = executor.getArrayIndexStart()
        final index = start > 0 ? "${name} - ${start}" : name
        return '${array[' + index + ']}'
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy