All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gorsat.spark.NativePartitionReader Maven / Gradle / Ivy

package gorsat.spark;

import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.connector.read.PartitionReader;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.unsafe.types.UTF8String;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import static org.apache.spark.sql.types.DataTypes.*;
import static org.apache.spark.sql.types.DataTypes.LongType;

public class NativePartitionReader implements PartitionReader {
    BufferedReader br;
    String currentLine;
    GenericInternalRow gir;
    StructField[] fields;
    GorRangeInputPartition p;

    public NativePartitionReader(StructField[] fields, GorRangeInputPartition gorRangeInputPartition) {
        this.fields = fields;
        gir = new GenericInternalRow(new Object[fields.length]);
        p = gorRangeInputPartition;
    }

    @Override
    public boolean next() {
        try {
            if(br==null) {
                ProcessBuilder processBuilder = new ProcessBuilder("cgor", "-p", p.chr, p.path);
                Process process = processBuilder.start();
                InputStream is = process.getInputStream();
                InputStreamReader isr = new InputStreamReader(is);
                br = new BufferedReader(isr);
                br.readLine();
            }
            currentLine = br.readLine();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return currentLine != null;
    }

    public void updateRow(int start, int last, int i) {
        String str = currentLine.substring(start, last);
        StructField sf = fields[i];
        if(sf.dataType() == StringType) {
            gir.update(i, UTF8String.fromString(str));
        } else if(sf.dataType() == IntegerType) {
            gir.update(i, Integer.parseInt(str));
        } else if(sf.dataType() == DoubleType) {
            gir.update(i, Double.parseDouble(str));
        } else if(sf.dataType() == LongType) {
            gir.update(i, Long.parseLong(str));
        }
    }

    @Override
    public InternalRow get() {
        int start = 0;
        int i = 0;
        for(; i < fields.length-1; i++) {
            int last = currentLine.indexOf('\t', start+1);
            updateRow(start,last,i);
            start = last+1;
        }

        int last = currentLine.length();
        updateRow(start, last, i);

        return gir;
    }

    @Override
    public void close() throws IOException {
        br.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy