nextflow.extension.GroupTupleOp.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nextflow Show documentation
Show all versions of nextflow Show documentation
A DSL modelled around the UNIX pipe concept, that simplifies writing parallel and scalable pipelines in a portable manner
/*
* Copyright 2013-2024, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nextflow.extension
import groovy.util.logging.Slf4j
import groovyx.gpars.dataflow.DataflowReadChannel
import groovyx.gpars.dataflow.DataflowWriteChannel
import nextflow.Channel
import nextflow.util.ArrayBag
import nextflow.util.CacheHelper
import nextflow.util.CheckHelper
/**
* Implements {@link OperatorImpl#groupTuple} operator logic
*
* @author Paolo Di Tommaso
*/
@Slf4j
class GroupTupleOp {
static private Map GROUP_TUPLE_PARAMS = [ by: [Integer, List], sort: [Boolean, 'true','natural','deep','hash',Closure,Comparator], size: Integer, remainder: Boolean ]
static private List GROUP_DEFAULT_INDEX = [0]
/**
* Comparator used to sort tuple entries (when required)
*/
private Comparator comparator
private int size
private List indices
private DataflowWriteChannel target
private DataflowReadChannel channel
private boolean remainder
private Map groups = [:]
private sort
GroupTupleOp(Map params, DataflowReadChannel source) {
CheckHelper.checkParams('groupTuple', params, GROUP_TUPLE_PARAMS)
channel = source
indices = getGroupTupleIndices(params)
size = params?.size ?: 0
remainder = params?.remainder ?: false
sort = params?.sort
defineComparator()
}
GroupTupleOp setTarget(DataflowWriteChannel target) {
this.target = target
return this
}
static private List getGroupTupleIndices( Map params ) {
if( params?.by == null )
return GROUP_DEFAULT_INDEX
if( params.by instanceof List )
return params.by as List
if( params.by instanceof Integer || params.by.toString().isInteger() )
return [params.by as Integer]
throw new IllegalArgumentException("Not a valid `by` index for `groupTuple` operator: '${params.by}' -- It must be an integer value or a list of integers")
}
/*
* Collects received values grouping by key
*/
private void collect(List tuple) {
final key = tuple[indices] // the actual grouping key
final len = tuple.size()
final List items = groups.getOrCreate(key) { // get the group for the specified key
def result = new ArrayList(len) // create if does not exist
for( int i=0; i0 && sz==count ) {
bindTuple(items, sz)
groups.remove(key)
}
}
/*
* finalize the grouping binding the remaining values
*/
private void finalise(nop) {
groups.each { keys, items -> bindTuple(items, size ?: sizeBy(keys)) }
target.bind(Channel.STOP)
}
/*
* bind collected items to the target channel
*/
private void bindTuple( List items, int sz ) {
def tuple = new ArrayList(items)
if( !remainder && sz>0 ) {
// verify exist it contains 'size' elements
List list = items.find { it instanceof List }
if( list.size() != sz ) {
return
}
}
if( comparator ) {
sortInnerLists(tuple, comparator)
}
target.bind( tuple )
}
/**
* Define the comparator to be used depending the #sort property
*/
private void defineComparator( ) {
/*
* comparator logic used to sort tuple elements
*/
switch(sort) {
case null:
break
case true:
case 'true':
case 'natural':
comparator = { o1,o2 -> o1<=>o2 } as Comparator
break;
case 'hash':
comparator = { o1, o2 ->
def h1 = CacheHelper.hasher(o1).hash()
def h2 = CacheHelper.hasher(o2).hash()
return h1.asLong() <=> h2.asLong()
} as Comparator
break
case 'deep':
comparator = { o1, o2 ->
def h1 = CacheHelper.hasher(o1, CacheHelper.HashMode.DEEP).hash()
def h2 = CacheHelper.hasher(o2, CacheHelper.HashMode.DEEP).hash()
return h1.asLong() <=> h2.asLong()
} as Comparator
break
case Comparator:
comparator = sort as Comparator
break
case Closure:
final closure = (Closure)sort
if( closure.getMaximumNumberOfParameters()==2 ) {
comparator = sort as Comparator
}
else if( closure.getMaximumNumberOfParameters()==1 ) {
comparator = { o1, o2 ->
def v1 = closure.call(o1)
def v2 = closure.call(o2)
return v1 <=> v2
} as Comparator
}
else
throw new IllegalArgumentException("Invalid groupTuple option - The closure should have 1 or 2 arguments")
break
default:
throw new IllegalArgumentException("Not a valid sort argument: ${sort}")
}
}
/**
* Main method to invoke the operator
*
* @return The resulting channel
*/
DataflowWriteChannel apply() {
if( target == null )
target = CH.create()
/*
* apply the logic the the source channel
*/
DataflowHelper.subscribeImpl(channel, [onNext: this.&collect, onComplete: this.&finalise])
/*
* return the target channel
*/
return target
}
private static sortInnerLists( List tuple, Comparator c ) {
for( int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy