org.apache.parquet.hadoop.BdecParquetReader Maven / Gradle / Ivy
/*
* Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved.
*/
package org.apache.parquet.hadoop;
import com.google.common.annotations.VisibleForTesting;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import net.snowflake.ingest.utils.ErrorCode;
import net.snowflake.ingest.utils.SFException;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.ParquetReadOptions;
import org.apache.parquet.hadoop.api.InitContext;
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.DelegatingSeekableInputStream;
import org.apache.parquet.io.InputFile;
import org.apache.parquet.io.SeekableInputStream;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
/**
* BDEC specific parquet reader.
*
* Resides in parquet package because, it uses {@link InternalParquetRecordReader} that is
* package private.
*/
public class BdecParquetReader implements AutoCloseable {
private final InternalParquetRecordReader> reader;
private final ParquetFileReader fileReader;
/**
* @param data buffer where the data that has to be read resides.
* @throws IOException
*/
public BdecParquetReader(byte[] data) throws IOException {
ParquetReadOptions options = ParquetReadOptions.builder().build();
fileReader = ParquetFileReader.open(new BdecInputFile(data), options);
reader = new InternalParquetRecordReader<>(new BdecReadSupport(), options.getRecordFilter());
reader.initialize(fileReader, options);
}
/**
* Reads the current row, i.e. list of values.
*
* @return current row
* @throws IOException
*/
public List