All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.castled.utils.CsvStructParser Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
package io.castled.utils;

import io.castled.exceptions.CastledRuntimeException;
import io.castled.schema.IncompatibleValueException;
import io.castled.schema.SchemaMapper;
import io.castled.schema.models.FieldSchema;
import io.castled.schema.models.Tuple;
import io.castled.schema.models.RecordSchema;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.io.*;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.zip.GZIPInputStream;

@Slf4j
public class CsvStructParser {

    private final Path filePath;
    private final Iterator csvRecordIterator;
    private final SchemaMapper schemaMapper;
    private final RecordSchema recordSchema;
    private long readLineNumber = 0;

    public CsvStructParser(Path filePath, SchemaMapper schemaMapper,
                           RecordSchema recordSchema, boolean gzipped) throws IOException {
        this.filePath = filePath;
        this.schemaMapper = schemaMapper;
        this.recordSchema = recordSchema;

        if (gzipped) {
            this.csvRecordIterator = new CSVParser(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(filePath.toFile())))),
                    CSVFormat.RFC4180.withHeader().withSkipHeaderRecord()).iterator();
        } else {
            this.csvRecordIterator = new CSVParser(new BufferedReader(new FileReader(filePath.toFile())),
                    CSVFormat.RFC4180.withHeader().withSkipHeaderRecord()).iterator();
        }
        readLineNumber = 1;
    }

    public Tuple getRecord(long lineNumber) {
        try {
            while (this.csvRecordIterator.hasNext()) {
                CSVRecord csvRecord = this.csvRecordIterator.next();
                readLineNumber++;
                if (readLineNumber > lineNumber) {
                    throw new CastledRuntimeException(String.format("Reader has passed reading line number %d", lineNumber));
                }
                if (readLineNumber == lineNumber) {
                    Tuple.Builder recordBuilder = Tuple.builder();
                    for (FieldSchema field : recordSchema.getFieldSchemas()) {
                        recordBuilder.put(field, this.schemaMapper.transformValue(csvRecord.get(field.getName()), field.getSchema()));
                    }
                    return recordBuilder.build();
                }
            }
            throw new CastledRuntimeException(String.format("Line number %d not present in file", lineNumber));
        } catch (IncompatibleValueException e) {
            log.error("Failed to read csv record in file {}", filePath.toString(), e);
            throw new CastledRuntimeException(e);
        }

    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy