All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.engine.spark.RangePartitionCoalescer Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.engine.spark;

import org.apache.spark.Partition;
import org.apache.spark.rdd.PartitionCoalescer;
import org.apache.spark.rdd.PartitionGroup;
import org.apache.spark.rdd.RDD;
import scala.collection.Seq;
import scala.jdk.javaapi.CollectionConverters;
import java.io.Serial;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;

/**
 * A {@link PartitionCoalescer} that allows a range of partitions to be coalesced into groups.
 */
class RangePartitionCoalescer implements PartitionCoalescer, Serializable {

    @Serial
    private static final long serialVersionUID = 1L;

    private List maxEndPartitionIndexes;

    /**
     * @param maxEndPartitionIndexes the indexes of the end of each coalesced partition, so that
     *                               coalesced partition {@code i} in the coalesced partitions is made up of partitions
     *                               from index {@code i} to {@code maxEndPartitionIndexes.get(i)} (inclusive)
     */
    public RangePartitionCoalescer(List maxEndPartitionIndexes) {
        this.maxEndPartitionIndexes = maxEndPartitionIndexes;
    }

    @Override
    public PartitionGroup[] coalesce(int maxPartitions, RDD parent) {
        if (maxPartitions != parent.getNumPartitions()) {
            throw new IllegalArgumentException("Cannot use " + getClass().getSimpleName() +
                    " with a different number of partitions to the parent RDD.");
        }
        List partitions = Arrays.asList(parent.getPartitions());
        PartitionGroup[] groups = new PartitionGroup[partitions.size()];

        for (int i = 0; i < partitions.size(); i++) {
            Seq preferredLocations = parent.getPreferredLocations(partitions.get(i));
            scala.Option preferredLocation = scala.Option.apply
                    (preferredLocations.isEmpty() ? null : preferredLocations.apply(0));
            PartitionGroup group = new PartitionGroup(preferredLocation);
            List partitionsInGroup =
                    partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
            group.partitions().addAll(CollectionConverters.asScala(partitionsInGroup).toList());
            groups[i] = group;
        }
        return groups;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy