gorsat.spark.GorPartitioning Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation

GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine

There is a newer version: 4.3.2

Show newest version

package gorsat.spark;

import org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution;
import org.apache.spark.sql.connector.read.partitioning.Distribution;
import org.apache.spark.sql.connector.read.partitioning.Partitioning;

import java.util.Arrays;

public class GorPartitioning implements Partitioning {

    @Override
    public int numPartitions() {
        return 2;
    }

    @Override
    public boolean satisfy(Distribution distribution) {
        if (distribution instanceof ClusteredDistribution) {
            String[] clusteredCols = ((ClusteredDistribution) distribution).clusteredColumns;
            return Arrays.asList(clusteredCols).contains("i");
        }

        return false;
    }
}