gorsat.commands.PysparkAnalysis Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation
Show all versions of gor-spark Show documentation
GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine
package gorsat.commands;
import gorsat.Commands.Analysis;
import org.apache.spark.api.python.Py4JServer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.gorpipe.spark.GorSparkUtilities;
import java.io.IOException;
import java.util.Map;
public class PysparkAnalysis extends Analysis {
static Py4JServer py4JServer = null;
public static Dataset pyspark(Dataset extends Row> ds, String cmd) throws IOException, InterruptedException {
SparkSession spark = GorSparkUtilities.getSparkSession(null,null);
if(py4JServer == null) {
py4JServer = new Py4JServer(spark.sparkContext().conf());
py4JServer.start();
}
ProcessBuilder pb = new ProcessBuilder("python3", cmd.trim());
Map env = pb.environment();
env.put("PYSPARK_GATEWAY_PORT",Integer.toString(py4JServer.getListeningPort()));
env.put("PYSPARK_GATEWAY_SECRET",py4JServer.secret());
env.put("PYSPARK_PIN_THREAD","true");
Process p = pb.start();
String error = new String(p.getErrorStream().readAllBytes());
int exitCode = p.waitFor();
System.err.println("exitCode " + exitCode + " " + error);
//return (Dataset)ds.filter("gene_symbol = 'BRCA2'");
return spark.sql("select * from input");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy