org.gorpipe.spark.PipeFunction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation
Show all versions of gor-spark Show documentation
GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine
package org.gorpipe.spark;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import java.io.*;
import java.util.Iterator;
public class PipeFunction implements MapPartitionsFunction, Serializable {
String cmd;
String header;
public PipeFunction(String cmd, String header) {
this.cmd = cmd;
this.header = header;
}
@Override
public Iterator call(Iterator iterator) throws Exception {
String[] split = cmd.split(" ");
ProcessBuilder pb = new ProcessBuilder(split);
Process p = pb.start();
OutputStream os = p.getOutputStream();
InputStream err = p.getErrorStream();
Thread t = new Thread(() -> {
try {
os.write(header.getBytes());
os.write('\n');
while( iterator.hasNext() ) {
os.write(iterator.next().getBytes());
os.write('\n');
}
os.close();
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("Unable to write to procss",e);
}
});
t.start();
Thread t2 = new Thread(() -> {
try {
int r = err.read();
while( r != -1 ) {
System.err.print((char)r);
r = err.read();
}
System.err.println();
err.close();
} catch (IOException e) {
e.printStackTrace();
}
});
t2.start();
InputStreamReader isr = new InputStreamReader(p.getInputStream());
BufferedReader br = new BufferedReader(isr);
return br.lines().skip(1).iterator();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy