nextflow.extension.SplitOp.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nextflow Show documentation
Show all versions of nextflow Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner
/*
* Copyright 2013-2024, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.extension
import groovy.transform.CompileStatic
import groovy.transform.PackageScope
import groovy.util.logging.Slf4j
import groovyx.gpars.dataflow.DataflowQueue
import groovyx.gpars.dataflow.DataflowReadChannel
import groovyx.gpars.dataflow.DataflowWriteChannel
import nextflow.Channel
import nextflow.splitter.AbstractSplitter
import nextflow.splitter.FastqSplitter
import nextflow.splitter.SplitterFactory
/**
* Implements splitter operators:
* - splitCsv
* - splitFasta
* - splitFastq
* - splitJson
* - splitText
*
* @author Paolo Di Tommaso
*/
@Slf4j
@CompileStatic
class SplitOp {
/**
* The channel to which this operator is applied
*/
private DataflowReadChannel source
/**
* Operator named parameters
*/
@PackageScope Map params
/**
* Whenever the splitter is applied to a paired-end read files (only valid for {@code splitFastq} operator.
*/
@PackageScope boolean pairedEnd
/**
* Whenever the splitter is applied to multiple file elements
*/
@PackageScope boolean multiSplit
/**
* Index of the elements to which a split operation need to be applied
*/
@PackageScope List indexes
/**
* The name of the operator eg. {code splitFasta}
*/
@PackageScope String methodName
/**
* Creates a splitter operator
*
* @param source The source channel to which apply to operator
* @param methodName The operator method name eg. {@code splitFasta}, {@code splitCsv}, etc.
* @param opts The operator named options
*/
SplitOp( DataflowReadChannel source, String methodName, Map opts ) {
this.source = source
this.params = opts != null ? new HashMap(opts) : new HashMap<>()
this.methodName = methodName
if( params.pe && methodName != 'splitFastq' )
throw new IllegalArgumentException("Unknown argument 'pe' for operator 'splitFastq'")
if( params.pe==true && params.elem )
throw new IllegalArgumentException("Parameter `pe` and `elem` conflicts")
if( params.pe == true ) {
indexes = [-1,-2]
multiSplit = true
pairedEnd = true
}
if( params.elem instanceof List ) {
indexes = params.elem as List
multiSplit = true
}
// -- validate options
if( params.containsKey('autoClose') )
throw new IllegalArgumentException('Parameter `autoClose` is not supported')
// turn off channel auto-close
params.autoClose = false
if( params.into && !(CH.isChannelQueue(params.into)) )
throw new IllegalArgumentException('Parameter `into` must reference a channel object')
}
/**
* Applies the splitting operator
*
* @return the output channel emitting the resulting split chunks
*/
DataflowWriteChannel apply() {
multiSplit ? splitMultiEntries() : splitSingleEntry(source, params)
}
/**
* Split more than one elements. Each split operation is handled
* on a separate channel. All channels are then merged to a
* single output result channel.
*/
protected DataflowWriteChannel splitMultiEntries() {
assert indexes
final cardinality = indexes.size()
// -- creates a copy of `source` channel for each element to split
def copies = createSourceCopies(source, cardinality)
// -- applies the splitter the each channel copy
def splitted = new ArrayList(cardinality)
for( int i=0; i createSourceCopies(DataflowReadChannel source, int n) {
new IntoOp(source, n).apply().getOutputs()
}
@PackageScope
void applySplittingOperator( DataflowReadChannel origin, DataflowWriteChannel output, AbstractSplitter splitter ) {
final events = new HashMap(2)
events.onNext = { entry -> splitter.target(entry).apply() }
events.onComplete = { output << Channel.STOP }
DataflowHelper.subscribeImpl ( origin, events )
}
@PackageScope
AbstractSplitter createSplitter(String methodName, Map params) {
SplitterFactory
.create(methodName)
.options(params) as AbstractSplitter
}
@PackageScope
void applyMergingOperator(List splitted, DataflowWriteChannel output, List indexes) {
DataflowHelper.newOperator(splitted, [output], new SplitterMergeClosure(indexes))
}
@PackageScope
DataflowWriteChannel getOrCreateWriteChannel(Map params) {
def result
// create a new DataflowChannel that will receive the splitter entries
if( params.into instanceof DataflowWriteChannel ) {
result = (DataflowWriteChannel)params.into
}
else {
result = CH.create()
}
return result
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy