gorsat.process.GorSpark Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation
Show all versions of gor-spark Show documentation
GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine
package gorsat.process;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.stream.StreamSupport;
import gorsat.BatchedPipeStepIteratorAdaptor;
import gorsat.Commands.Analysis;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import org.apache.spark.sql.types.StructType;
import org.gorpipe.gor.model.Row;
import org.gorpipe.gor.monitor.GorMonitor;
import org.gorpipe.gor.session.GorSession;
import org.gorpipe.spark.SparkGorMonitor;
import org.gorpipe.spark.SparkGorRow;
public class GorSpark implements MapPartitionsFunction {
StructType schema;
boolean nor;
String header;
String gorcmd;
String gorroot;
String uri;
String jobId;
public GorSpark(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot) {
this.schema = schema;
this.nor = nor;
this.header = inputHeader;
this.gorcmd = gorcmd;
this.gorroot = gorroot;
}
public GorSpark(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot, String uri, String jobId) {
this(inputHeader,nor,schema,gorcmd,gorroot);
this.uri = uri;
this.jobId = jobId;
}
public void setSchema(StructType st) {
schema = st;
}
public PipeInstance query() {
Path projectPath = Paths.get(gorroot);
GenericSessionFactory gsf = Files.exists(projectPath) ? new GenericSessionFactory(gorroot, "result_cache") : new GenericSessionFactory();
GorSession gps = gsf.create();
gps.setNorContext(nor);
if( uri != null ) {
GorMonitor gorMonitor = new SparkGorMonitor(uri, jobId);
gps.getSystemContext().setMonitor(gorMonitor);
}
PipeInstance pi = new PipeInstance(gps.getGorContext());
pi.init(gorcmd, true, header);
return pi;
}
BatchedPipeStepIteratorAdaptor getIterator(Iterator extends Row> iterator) {
PipeInstance pi = query();
Analysis an = pi.getPipeStep();
return new BatchedPipeStepIteratorAdaptor(iterator, an, header, GorPipe.brsConfig());
}
@Override
public Iterator call(Iterator iterator) {
//return getIterator(iterator);
return StreamSupport.stream(getIterator(iterator),false).map(r -> (Row)new SparkGorRow(r,schema)).iterator();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy