nextflow.extension.CollectFileOp.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nextflow Show documentation
Show all versions of nextflow Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner
/*
* Copyright 2013-2024, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.extension
import java.nio.file.Path
import groovy.util.logging.Slf4j
import groovyx.gpars.dataflow.DataflowReadChannel
import groovyx.gpars.dataflow.DataflowWriteChannel
import nextflow.Channel
import nextflow.Global
import nextflow.file.FileCollector
import nextflow.file.FileHelper
import nextflow.file.SimpleFileCollector
import nextflow.file.SortFileCollector
import nextflow.util.CacheHelper
import static nextflow.util.CacheHelper.HashMode
import static nextflow.util.CheckHelper.checkParams
/**
* Implements the body of {@link OperatorImpl#collectFile(groovyx.gpars.dataflow.DataflowReadChannel)} operator
*
* @author Paolo Di Tommaso
*/
@Slf4j
class CollectFileOp {
static final Map COLLECT_FILE_PARAMS = [
sort: [Boolean,'none','true','natural','index','hash','deep',Closure,Comparator],
seed: Object,
name: [Path, Object],
storeDir: [Path,File,CharSequence],
tempDir: [Path,File,CharSequence],
newLine: Boolean,
sliceMaxSize: Integer,
sliceMaxItems: Integer,
deleteTempFilesOnClose: Boolean,
cache: [Boolean, String],
skip: Integer,
keepHeader: Boolean
]
private final Map params
private DataflowWriteChannel result
private DataflowReadChannel channel
private FileCollector collector
private final Closure closure
private Path storeDir
private String fileName
CollectFileOp( final DataflowReadChannel channel, Map params, final Closure closure = null ) {
checkParams('collectFile', params, COLLECT_FILE_PARAMS)
this.params = params
this.channel = channel
this.closure = closure
this.result = CH.create()
createFileCollector()
defineStoreDirAndFileName()
defineHashingParams()
// make sure to delete the collector on termination
Global.onCleanup((it) -> collector.safeClose())
}
protected FileCollector getCollector() {
collector
}
protected defineHashingParams() {
// caching params
collector.resumable = Global.session.resumeMode
collector.cacheable = Global.session.cacheable && ( params?.cache?.toString() != 'false' )
collector.hashMode = HashMode.of(params?.cache) ?: HashMode.of(Global.session.config?.process?.cache) ?: HashMode.DEFAULT()
collector.hashKeys = [
Global.session.uniqueId,
params?.storeDir,
params?.seed,
params?.newLine
]
}
/*
* If a file of an absolute path is specified, the parent
* path is used as 'storeDir'
*/
protected defineStoreDirAndFileName() {
if( params?.name ) {
if( params.name instanceof Path || params.name.toString().contains('/') ) {
def _path = params.name as Path
fileName = _path.name
storeDir = _path.parent
}
else
fileName = params.name
}
/*
* check if a 'storeDir' is provided otherwise fallback to a temp
* folder in the session working directory
*/
if( params?.storeDir )
storeDir = params?.storeDir as Path
if( storeDir )
storeDir.createDirIfNotExists()
else
storeDir = FileHelper.createTempFolder(Global.session.workDir)
}
/*
* each time a value is received, invoke the closure and
* append its result value to a file
*/
protected processItem( item ) {
def value = closure ? closure.call(item) : item
// when the value is a list, the first item hold the grouping key
// all the others values are appended
if( value instanceof List && value.size()>1 ) {
for( int i=1; i1 ) {
for( int i=1; i it }
break
case 'index':
collector.sort = null
break
case null:
case 'hash':
collector.sort = { CacheHelper.hasher(it).hash().asLong() }
break
case 'deep':
collector.sort = { CacheHelper.hasher(it, CacheHelper.HashMode.DEEP).hash().asLong() }
break
case Closure:
case Comparator:
collector.sort = params.sort;
break
default:
throw new IllegalArgumentException("Not a valid collectFile `sort` parameter: ${params.sort}")
}
if( params?.sliceMaxSize )
collector.sliceMaxSize = params.sliceMaxSize
if( params?.sliceMaxItems )
collector.sliceMaxItems = params.sliceMaxItems
}
// set other params
collector.tempDir = params?.tempDir as Path
collector.newLine = params?.newLine as Boolean
collector.seed = params?.seed
if( params?.deleteTempFilesOnClose != null )
collector.deleteTempFilesOnClose = params.deleteTempFilesOnClose as boolean
if( params?.skip )
collector.skipLines = params?.skip
if( params?.keepHeader != null )
collector.keepHeader = params.keepHeader as boolean
if( collector.keepHeader ) {
// validate `seed` parameter
if( collector.seed != null )
throw new IllegalArgumentException("Parameter `keepHeader` and `seed` conflict -- check operator `collectFile`")
// validate `skip` parameter
if( params?.skip == null )
collector.skipLines = 1
else if( collector.skipLines < 1 )
throw new IllegalArgumentException("Parameter `skip` must be greater than zero when `keepHeader` is specified -- check operator `collectFile`")
}
return collector
}
DataflowWriteChannel apply() {
DataflowHelper.subscribeImpl( channel, [onNext: this.&processItem, onComplete: this.&emitItems] )
return result
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy