nextflow.extension.SplitOp.groovy Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nextflow Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner
There is a newer version: 24.11.0-edge
Show newest version
/*
 * Copyright 2013-2024, Seqera Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nextflow.extension

import groovy.transform.CompileStatic
import groovy.transform.PackageScope
import groovy.util.logging.Slf4j
import groovyx.gpars.dataflow.DataflowQueue
import groovyx.gpars.dataflow.DataflowReadChannel
import groovyx.gpars.dataflow.DataflowWriteChannel
import nextflow.Channel
import nextflow.splitter.AbstractSplitter
import nextflow.splitter.FastqSplitter
import nextflow.splitter.SplitterFactory
/**
 * Implements splitter operators:
 * - splitCsv
 * - splitFasta
 * - splitFastq
 * - splitJson
 * - splitText
 *
 * @author Paolo Di Tommaso 
 */
@Slf4j
@CompileStatic
class SplitOp {

    /**
     * The channel to which this operator is applied
     */
    private DataflowReadChannel source

    /**
     * Operator named parameters
     */
    @PackageScope Map params

    /**
     * Whenever the splitter is applied to a paired-end read files (only valid for {@code splitFastq} operator.
     */
    @PackageScope boolean pairedEnd

    /**
     * Whenever the splitter is applied to multiple file elements
     */
    @PackageScope boolean multiSplit

    /**
     * Index of the elements to which a split operation need to be applied
     */
    @PackageScope List indexes

    /**
     * The name of the operator eg. {code splitFasta}
     */
    @PackageScope String methodName

    /**
     * Creates a splitter operator
     *
     * @param source The source channel to which apply to operator
     * @param methodName The operator method name eg. {@code splitFasta}, {@code splitCsv}, etc.
     * @param opts The operator named options
     */
    SplitOp( DataflowReadChannel source, String methodName, Map opts ) {

        this.source = source
        this.params = opts != null ? new HashMap(opts) : new HashMap<>()
        this.methodName = methodName

        if( params.pe && methodName != 'splitFastq' )
            throw new IllegalArgumentException("Unknown argument 'pe' for operator 'splitFastq'")

        if( params.pe==true && params.elem )
            throw new IllegalArgumentException("Parameter `pe` and `elem` conflicts")

        if( params.pe == true ) {
            indexes = [-1,-2]
            multiSplit = true
            pairedEnd = true
        }
        if( params.elem instanceof List ) {
            indexes = params.elem as List
            multiSplit = true
        }

        // -- validate options
        if( params.containsKey('autoClose') )
            throw new IllegalArgumentException('Parameter `autoClose` is not supported')
        // turn off channel auto-close
        params.autoClose = false

        if( params.into && !(CH.isChannelQueue(params.into)) )
            throw new IllegalArgumentException('Parameter `into` must reference a channel object')

    }

    /**
     * Applies the splitting operator
     *
     * @return the output channel emitting the resulting split chunks
     */
    DataflowWriteChannel apply() {
        multiSplit ? splitMultiEntries() : splitSingleEntry(source, params)
    }

    /**
     * Split more than one elements. Each split operation is handled
     * on a separate channel. All channels are then merged to a
     * single output result channel.
     */
    protected DataflowWriteChannel splitMultiEntries() {
        assert indexes
        final cardinality = indexes.size()

        // -- creates a copy of `source` channel for each element to split
        def copies = createSourceCopies(source, cardinality)

        // -- applies the splitter the each channel copy
        def splitted = new ArrayList(cardinality)
        for( int i=0; i createSourceCopies(DataflowReadChannel source, int n) {
        new IntoOp(source, n).apply().getOutputs()
    }

    @PackageScope
    void applySplittingOperator( DataflowReadChannel origin, DataflowWriteChannel output, AbstractSplitter splitter ) {
        final events = new HashMap(2)
        events.onNext = { entry -> splitter.target(entry).apply() }
        events.onComplete = { output << Channel.STOP }
        DataflowHelper.subscribeImpl ( origin, events )
    }

    @PackageScope
    AbstractSplitter createSplitter(String methodName, Map params) {
        SplitterFactory
                .create(methodName)
                .options(params) as AbstractSplitter
    }

    @PackageScope
    void applyMergingOperator(List splitted, DataflowWriteChannel output, List indexes) {
        DataflowHelper.newOperator(splitted, [output], new SplitterMergeClosure(indexes))
    }

    @PackageScope
    DataflowWriteChannel getOrCreateWriteChannel(Map params) {
        def result
        // create a new DataflowChannel that will receive the splitter entries
        if( params.into instanceof DataflowWriteChannel ) {
            result = (DataflowWriteChannel)params.into
        }
        else {
            result = CH.create()
        }

        return result
    }
}