gorsat.spark.GorReaderFactory Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation

GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine

There is a newer version: 4.3.2

Show newest version

package gorsat.spark;

import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.connector.read.InputPartition;
import org.apache.spark.sql.connector.read.PartitionReader;
import org.apache.spark.sql.connector.read.PartitionReaderFactory;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class GorReaderFactory implements PartitionReaderFactory {
    StructType schema;
    String redisUri;
    String jobId;
    String cacheFile;
    String useCpp;
    String projectRoot;
    String cacheDir;

    public GorReaderFactory(StructType schema, String redisUri, String jobId, String cacheFile, String projectRoot, String cacheDir, String useCpp) {
        this.schema = schema;
        this.redisUri = redisUri;
        this.jobId = jobId;
        this.cacheFile = cacheFile;
        this.useCpp = useCpp;
        this.projectRoot = projectRoot;
        this.cacheDir = cacheDir;
    }

    @Override
    public PartitionReader createReader(InputPartition partition) {
        StructField[] fields = schema.fields();
        GorRangeInputPartition p = (GorRangeInputPartition) partition;
        PartitionReader partitionReader;
        if(useCpp != null && useCpp.equalsIgnoreCase("blue")) {
            partitionReader = new NativePartitionReader(fields,p);
        } else if(fields.length>1) {
            partitionReader = new GorPartitionReader(schema,p,redisUri,jobId,projectRoot,cacheDir,useCpp);
        } else {
            partitionReader = new GorStringPartitionReader(schema,p,redisUri,jobId,projectRoot,cacheDir,useCpp);
        }
        return partitionReader;
    }
}