gorsat.process.GorSparkMaterialize Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gor-spark Show documentation

GORpipe allows analysis of large sets of genomic and phenotypic tabular data using a declarative query language in a parallel execution engine

There is a newer version: 4.3.2

Show newest version

package gorsat.process;

import gorsat.BatchedPipeStepIteratorAdaptor;
import org.apache.spark.sql.types.StructType;
import org.gorpipe.gor.model.Row;

import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

public class GorSparkMaterialize extends GorSpark {
    int max;

    public GorSparkMaterialize(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot, int max) {
        super(inputHeader, nor, schema, gorcmd, gorroot);
        this.max = max;
    }

    public GorSparkMaterialize(String inputHeader, boolean nor, StructType schema, String gorcmd, String gorroot, String uri, String jobId, int max) {
        super(inputHeader,nor,schema,gorcmd,gorroot,uri,jobId);
        this.max = max;
    }

    @Override
    public Iterator call(Iterator iterator) {
        BatchedPipeStepIteratorAdaptor bpia = getIterator(iterator);
        List res = StreamSupport.stream(bpia, false).limit(max).collect(Collectors.toList());
        bpia.close();
        return res.stream().iterator();
    }
}