gorsat.process.GorSparkMaterialize Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation
Show all versions of gor-spark Show documentation
GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine
package gorsat.process;
import gorsat.BatchedPipeStepIteratorAdaptor;
import org.apache.spark.sql.types.StructType;
import org.gorpipe.gor.model.Row;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
public class GorSparkMaterialize extends GorSpark {
int max;
public GorSparkMaterialize(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot, int max) {
super(inputHeader, nor, schema, gorcmd, gorroot);
this.max = max;
}
public GorSparkMaterialize(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot, String uri, String jobId, int max) {
super(inputHeader,nor,schema,gorcmd,gorroot,uri,jobId);
this.max = max;
}
@Override
public Iterator call(Iterator iterator) {
BatchedPipeStepIteratorAdaptor bpia = getIterator(iterator);
List res = StreamSupport.stream(bpia, false).limit(max).collect(Collectors.toList());
bpia.close();
return res.stream().iterator();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy