All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gorsat.commands.PysparkAnalysis Maven / Gradle / Ivy

Go to download

GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine

There is a newer version: 4.3.2
Show newest version
package gorsat.commands;

import gorsat.Commands.Analysis;
import org.apache.spark.api.python.Py4JServer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.gorpipe.spark.GorSparkUtilities;

import java.io.IOException;
import java.util.Map;

public class PysparkAnalysis extends Analysis {
    static Py4JServer py4JServer = null;

    public static Dataset pyspark(Dataset ds, String cmd) throws IOException, InterruptedException {
        SparkSession spark = GorSparkUtilities.getSparkSession(null,null);
        if(py4JServer == null) {
            py4JServer = new Py4JServer(spark.sparkContext().conf());
            py4JServer.start();
        }

        ProcessBuilder pb = new ProcessBuilder("python3", cmd.trim());
        Map env = pb.environment();
        env.put("PYSPARK_GATEWAY_PORT",Integer.toString(py4JServer.getListeningPort()));
        env.put("PYSPARK_GATEWAY_SECRET",py4JServer.secret());
        env.put("PYSPARK_PIN_THREAD","true");

        Process p = pb.start();
        String error = new String(p.getErrorStream().readAllBytes());
        int exitCode = p.waitFor();
        System.err.println("exitCode " + exitCode + " " + error);
        //return (Dataset)ds.filter("gene_symbol = 'BRCA2'");
        return spark.sql("select * from input");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy