All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.export.parquet.ExportParquetStrategy Maven / Gradle / Ivy

package apoc.export.parquet;

import org.apache.parquet.example.data.Group;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.ExampleParquetWriter;
import org.apache.parquet.schema.MessageType;

import java.io.IOException;
import java.util.List;

public interface ExportParquetStrategy {

    OUT export(IN data, ParquetConfig config);

    default  void writeRows(List rows, ParquetWriter writer, ParquetExportType type, MessageType schema) {
        rows.stream()
                .map(i -> type.toRecord(schema, i))
                .forEach(i -> {
            try {
                writer.write(i);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        rows.clear();
    }

    default ParquetWriter getBuild(MessageType schema, ExampleParquetWriter.Builder builder)  {
        try {
            return builder
                    .withType(schema)
                    // TODO - configurable. This generate a .crc file
                    .withValidation(false)
                    // TODO - check other configs, e.g. .enableDictionaryEncoding(), .withDictionaryPageSize(2*1024) etc..
                    .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
                    .build();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy