io.trino.parquet.reader.ParquetReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of trino-parquet Show documentation
Show all versions of trino-parquet Show documentation
Trino - Parquet file format support
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.parquet.reader;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ListMultimap;
import com.google.errorprone.annotations.FormatMethod;
import io.airlift.log.Logger;
import io.trino.memory.context.AggregatedMemoryContext;
import io.trino.parquet.ChunkKey;
import io.trino.parquet.Column;
import io.trino.parquet.DiskRange;
import io.trino.parquet.Field;
import io.trino.parquet.GroupField;
import io.trino.parquet.ParquetCorruptionException;
import io.trino.parquet.ParquetDataSource;
import io.trino.parquet.ParquetReaderOptions;
import io.trino.parquet.ParquetWriteValidation;
import io.trino.parquet.PrimitiveField;
import io.trino.parquet.metadata.ColumnChunkMetadata;
import io.trino.parquet.metadata.PrunedBlockMetadata;
import io.trino.parquet.predicate.TupleDomainParquetPredicate;
import io.trino.parquet.reader.FilteredOffsetIndex.OffsetRange;
import io.trino.plugin.base.metrics.LongCount;
import io.trino.spi.Page;
import io.trino.spi.block.ArrayBlock;
import io.trino.spi.block.Block;
import io.trino.spi.block.DictionaryBlock;
import io.trino.spi.block.RowBlock;
import io.trino.spi.block.RunLengthEncodedBlock;
import io.trino.spi.metrics.Metric;
import io.trino.spi.metrics.Metrics;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.Type;
import jakarta.annotation.Nullable;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.internal.column.columnindex.OffsetIndex;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
import org.joda.time.DateTimeZone;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.parquet.ParquetValidationUtils.validateParquet;
import static io.trino.parquet.ParquetWriteValidation.StatisticsValidation;
import static io.trino.parquet.ParquetWriteValidation.StatisticsValidation.createStatisticsValidationBuilder;
import static io.trino.parquet.ParquetWriteValidation.WriteChecksumBuilder;
import static io.trino.parquet.ParquetWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder;
import static io.trino.parquet.reader.ListColumnReader.calculateCollectionOffsets;
import static io.trino.parquet.reader.PageReader.createPageReader;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
public class ParquetReader
implements Closeable
{
private static final Logger log = Logger.get(ParquetReader.class);
private static final int INITIAL_BATCH_SIZE = 1;
private static final int BATCH_SIZE_GROWTH_FACTOR = 2;
public static final String PARQUET_CODEC_METRIC_PREFIX = "ParquetReaderCompressionFormat_";
public static final String COLUMN_INDEX_ROWS_FILTERED = "ParquetColumnIndexRowsFiltered";
private final Optional fileCreatedBy;
private final List rowGroups;
private final List columnFields;
private final List primitiveFields;
private final ParquetDataSource dataSource;
private final ColumnReaderFactory columnReaderFactory;
private final AggregatedMemoryContext memoryContext;
private int currentRowGroup = -1;
private PrunedBlockMetadata currentBlockMetadata;
private long currentGroupRowCount;
/**
* Index in the Parquet file of the first row of the current group
*/
private long firstRowIndexInGroup;
/**
* Index in the current group of the next row
*/
private long nextRowInGroup;
private int batchSize;
private int nextBatchSize = INITIAL_BATCH_SIZE;
private final Map columnReaders;
private final Map maxBytesPerCell;
private double maxCombinedBytesPerRow;
private final ParquetReaderOptions options;
private int maxBatchSize;
private AggregatedMemoryContext currentRowGroupMemoryContext;
private final Map chunkReaders;
private final Optional writeValidation;
private final Optional writeChecksumBuilder;
private final Optional rowGroupStatisticsValidation;
private final FilteredRowRanges[] blockRowRanges;
private final ParquetBlockFactory blockFactory;
private final Map> codecMetrics;
private long columnIndexRowsFiltered = -1;
public ParquetReader(
Optional fileCreatedBy,
List columnFields,
List rowGroups,
ParquetDataSource dataSource,
DateTimeZone timeZone,
AggregatedMemoryContext memoryContext,
ParquetReaderOptions options,
Function exceptionTransform,
Optional parquetPredicate,
Optional writeValidation)
throws IOException
{
this.fileCreatedBy = requireNonNull(fileCreatedBy, "fileCreatedBy is null");
requireNonNull(columnFields, "columnFields is null");
this.columnFields = ImmutableList.copyOf(columnFields);
this.primitiveFields = getPrimitiveFields(columnFields.stream().map(Column::field).collect(toImmutableList()));
this.rowGroups = requireNonNull(rowGroups, "rowGroups is null");
this.dataSource = requireNonNull(dataSource, "dataSource is null");
this.columnReaderFactory = new ColumnReaderFactory(timeZone, options);
this.memoryContext = requireNonNull(memoryContext, "memoryContext is null");
this.currentRowGroupMemoryContext = memoryContext.newAggregatedMemoryContext();
this.options = requireNonNull(options, "options is null");
this.maxBatchSize = options.getMaxReadBlockRowCount();
this.columnReaders = new HashMap<>();
this.maxBytesPerCell = new HashMap<>();
this.writeValidation = requireNonNull(writeValidation, "writeValidation is null");
validateWrite(
validation -> fileCreatedBy.equals(Optional.of(validation.getCreatedBy())),
"Expected created by %s, found %s",
writeValidation.map(ParquetWriteValidation::getCreatedBy),
fileCreatedBy);
validateBlockMetadata(rowGroups);
this.writeChecksumBuilder = writeValidation.map(validation -> createWriteChecksumBuilder(validation.getTypes()));
this.rowGroupStatisticsValidation = writeValidation.map(validation -> createStatisticsValidationBuilder(validation.getTypes()));
requireNonNull(parquetPredicate, "parquetPredicate is null");
Optional filter = Optional.empty();
if (parquetPredicate.isPresent() && options.isUseColumnIndex()) {
filter = parquetPredicate.get().toParquetFilter(timeZone);
}
this.blockRowRanges = calculateFilteredRowRanges(rowGroups, filter, primitiveFields);
this.blockFactory = new ParquetBlockFactory(exceptionTransform);
ListMultimap ranges = ArrayListMultimap.create();
Map codecMetrics = new HashMap<>();
for (int rowGroup = 0; rowGroup < rowGroups.size(); rowGroup++) {
PrunedBlockMetadata blockMetadata = rowGroups.get(rowGroup).prunedBlockMetadata();
long rowGroupRowCount = blockMetadata.getRowCount();
for (PrimitiveField field : primitiveFields) {
int columnId = field.getId();
ColumnChunkMetadata chunkMetadata = blockMetadata.getColumnChunkMetaData(field.getDescriptor());
ColumnPath columnPath = chunkMetadata.getPath();
long startingPosition = chunkMetadata.getStartingPos();
long totalLength = chunkMetadata.getTotalSize();
long totalDataSize = 0;
FilteredOffsetIndex filteredOffsetIndex = null;
if (blockRowRanges[rowGroup] != null) {
filteredOffsetIndex = getFilteredOffsetIndex(blockRowRanges[rowGroup], rowGroup, rowGroupRowCount, columnPath);
}
if (filteredOffsetIndex == null) {
DiskRange range = new DiskRange(startingPosition, totalLength);
totalDataSize = range.getLength();
ranges.put(new ChunkKey(columnId, rowGroup), range);
}
else {
List offsetRanges = filteredOffsetIndex.calculateOffsetRanges(startingPosition);
for (OffsetRange offsetRange : offsetRanges) {
DiskRange range = new DiskRange(offsetRange.getOffset(), offsetRange.getLength());
totalDataSize += range.getLength();
ranges.put(new ChunkKey(columnId, rowGroup), range);
}
// Initialize columnIndexRowsFiltered only when column indexes are found and used
columnIndexRowsFiltered = 0;
}
// Update the metrics which records the codecs used along with data size
codecMetrics.merge(
PARQUET_CODEC_METRIC_PREFIX + chunkMetadata.getCodec().name(),
new LongCount(totalDataSize),
LongCount::mergeWith);
}
}
this.codecMetrics = ImmutableMap.copyOf(codecMetrics);
this.chunkReaders = dataSource.planRead(ranges, memoryContext);
}
@Override
public void close()
throws IOException
{
// Release memory usage from column readers
columnReaders.clear();
currentRowGroupMemoryContext.close();
for (ChunkedInputStream chunkedInputStream : chunkReaders.values()) {
chunkedInputStream.close();
}
dataSource.close();
if (writeChecksumBuilder.isPresent()) {
ParquetWriteValidation parquetWriteValidation = writeValidation.orElseThrow();
parquetWriteValidation.validateChecksum(dataSource.getId(), writeChecksumBuilder.get().build());
}
}
public Page nextPage()
throws IOException
{
int batchSize = nextBatch();
if (batchSize <= 0) {
return null;
}
// create a lazy page
blockFactory.nextPage();
Block[] blocks = new Block[columnFields.size()];
for (int channel = 0; channel < columnFields.size(); channel++) {
Field field = columnFields.get(channel).field();
blocks[channel] = blockFactory.createBlock(batchSize, () -> readBlock(field));
}
Page page = new Page(batchSize, blocks);
validateWritePageChecksum(page);
return page;
}
/**
* Get the global row index of the first row in the last batch.
*/
public long lastBatchStartRow()
{
return firstRowIndexInGroup + nextRowInGroup - batchSize;
}
private int nextBatch()
throws IOException
{
if (nextRowInGroup >= currentGroupRowCount && !advanceToNextRowGroup()) {
return -1;
}
batchSize = min(nextBatchSize, maxBatchSize);
nextBatchSize = min(batchSize * BATCH_SIZE_GROWTH_FACTOR, options.getMaxReadBlockRowCount());
batchSize = toIntExact(min(batchSize, currentGroupRowCount - nextRowInGroup));
nextRowInGroup += batchSize;
columnReaders.values().forEach(reader -> reader.prepareNextRead(batchSize));
return batchSize;
}
private boolean advanceToNextRowGroup()
throws IOException
{
currentRowGroupMemoryContext.close();
currentRowGroupMemoryContext = memoryContext.newAggregatedMemoryContext();
freeCurrentRowGroupBuffers();
if (currentRowGroup >= 0 && rowGroupStatisticsValidation.isPresent()) {
StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
writeValidation.orElseThrow().validateRowGroupStatistics(dataSource.getId(), currentBlockMetadata, statisticsValidation.build());
statisticsValidation.reset();
}
currentRowGroup++;
if (currentRowGroup == rowGroups.size()) {
return false;
}
RowGroupInfo rowGroupInfo = rowGroups.get(currentRowGroup);
currentBlockMetadata = rowGroupInfo.prunedBlockMetadata();
firstRowIndexInGroup = rowGroupInfo.fileRowOffset();
currentGroupRowCount = currentBlockMetadata.getRowCount();
FilteredRowRanges currentGroupRowRanges = blockRowRanges[currentRowGroup];
log.debug("advanceToNextRowGroup dataSource %s, currentRowGroup %d, rowRanges %s, currentBlockMetadata %s", dataSource.getId(), currentRowGroup, currentGroupRowRanges, currentBlockMetadata);
if (currentGroupRowRanges != null) {
long rowCount = currentGroupRowRanges.getRowCount();
columnIndexRowsFiltered += currentGroupRowCount - rowCount;
if (rowCount == 0) {
// Filters on multiple columns with page indexes may yield non-overlapping row ranges and eliminate the entire row group.
// Advance to next row group to ensure that we don't return a null Page and close the page source before all row groups are processed
return advanceToNextRowGroup();
}
currentGroupRowCount = rowCount;
}
nextRowInGroup = 0L;
initializeColumnReaders();
return true;
}
private void freeCurrentRowGroupBuffers()
{
if (currentRowGroup < 0) {
return;
}
for (PrimitiveField field : primitiveFields) {
ChunkedInputStream chunkedStream = chunkReaders.get(new ChunkKey(field.getId(), currentRowGroup));
if (chunkedStream != null) {
chunkedStream.close();
}
}
}
private ColumnChunk readArray(GroupField field)
throws IOException
{
List parameters = field.getType().getTypeParameters();
checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
Optional children = field.getChildren().get(0);
if (children.isEmpty()) {
return new ColumnChunk(field.getType().createNullBlock(), new int[] {}, new int[] {});
}
Field elementField = children.get();
ColumnChunk columnChunk = readColumnChunk(elementField);
ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
int positionsCount = collectionPositions.offsets().length - 1;
Block arrayBlock = ArrayBlock.fromElementBlock(positionsCount, collectionPositions.isNull(), collectionPositions.offsets(), columnChunk.getBlock());
return new ColumnChunk(arrayBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
private ColumnChunk readMap(GroupField field)
throws IOException
{
List parameters = field.getType().getTypeParameters();
checkArgument(parameters.size() == 2, "Maps must have two type parameters, found %s", parameters.size());
Block[] blocks = new Block[parameters.size()];
ColumnChunk columnChunk = readColumnChunk(field.getChildren().get(0).get());
blocks[0] = columnChunk.getBlock();
Optional valueField = field.getChildren().get(1);
blocks[1] = valueField.isPresent() ? readColumnChunk(valueField.get()).getBlock() : parameters.get(1).createNullBlock();
ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
Block mapBlock = ((MapType) field.getType()).createBlockFromKeyValue(collectionPositions.isNull(), collectionPositions.offsets(), blocks[0], blocks[1]);
return new ColumnChunk(mapBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
private ColumnChunk readStruct(GroupField field)
throws IOException
{
Block[] blocks = new Block[field.getType().getTypeParameters().size()];
ColumnChunk columnChunk = null;
List> parameters = field.getChildren();
for (int i = 0; i < blocks.length; i++) {
Optional parameter = parameters.get(i);
if (parameter.isPresent()) {
columnChunk = readColumnChunk(parameter.get());
blocks[i] = columnChunk.getBlock();
}
}
if (columnChunk == null) {
throw new ParquetCorruptionException(dataSource.getId(), "Struct field does not have any children: %s", field);
}
StructColumnReader.RowBlockPositions structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
Optional isNull = structIsNull.isNull();
for (int i = 0; i < blocks.length; i++) {
if (blocks[i] == null) {
blocks[i] = RunLengthEncodedBlock.create(field.getType().getTypeParameters().get(i), null, structIsNull.positionsCount());
}
else if (isNull.isPresent()) {
blocks[i] = toNotNullSupressedBlock(structIsNull.positionsCount(), isNull.get(), blocks[i]);
}
}
Block rowBlock = RowBlock.fromNotNullSuppressedFieldBlocks(structIsNull.positionsCount(), structIsNull.isNull(), blocks);
return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
}
private static Block toNotNullSupressedBlock(int positionCount, boolean[] rowIsNull, Block fieldBlock)
{
// find a existing position in the block that is null
int nullIndex = -1;
if (fieldBlock.mayHaveNull()) {
for (int position = 0; position < fieldBlock.getPositionCount(); position++) {
if (fieldBlock.isNull(position)) {
nullIndex = position;
break;
}
}
}
// if there are no null positions, append a null to the end of the block
if (nullIndex == -1) {
fieldBlock = fieldBlock.getLoadedBlock();
nullIndex = fieldBlock.getPositionCount();
fieldBlock = fieldBlock.copyWithAppendedNull();
}
// create a dictionary that maps null positions to the null index
int[] dictionaryIds = new int[positionCount];
int nullSuppressedPosition = 0;
for (int position = 0; position < positionCount; position++) {
if (rowIsNull[position]) {
dictionaryIds[position] = nullIndex;
}
else {
dictionaryIds[position] = nullSuppressedPosition;
nullSuppressedPosition++;
}
}
return DictionaryBlock.create(positionCount, fieldBlock, dictionaryIds);
}
@Nullable
private FilteredOffsetIndex getFilteredOffsetIndex(FilteredRowRanges rowRanges, int rowGroup, long rowGroupRowCount, ColumnPath columnPath)
{
Optional rowGroupColumnIndexStore = this.rowGroups.get(rowGroup).columnIndexStore();
if (rowGroupColumnIndexStore.isEmpty()) {
return null;
}
// We have a selective rowRanges for the rowGroup, every column must have a valid offset index
// to figure out which rows need to be read from the required parquet pages
OffsetIndex offsetIndex = requireNonNull(
rowGroupColumnIndexStore.get().getOffsetIndex(columnPath),
format("Missing OffsetIndex for column %s", columnPath));
return FilteredOffsetIndex.filterOffsetIndex(offsetIndex, rowRanges.getParquetRowRanges(), rowGroupRowCount);
}
private ColumnChunk readPrimitive(PrimitiveField field)
throws IOException
{
ColumnDescriptor columnDescriptor = field.getDescriptor();
int fieldId = field.getId();
ColumnReader columnReader = columnReaders.get(fieldId);
if (!columnReader.hasPageReader()) {
validateParquet(currentBlockMetadata.getRowCount() > 0, dataSource.getId(), "Row group has 0 rows");
ColumnChunkMetadata metadata = currentBlockMetadata.getColumnChunkMetaData(columnDescriptor);
FilteredRowRanges rowRanges = blockRowRanges[currentRowGroup];
OffsetIndex offsetIndex = null;
if (rowRanges != null) {
offsetIndex = getFilteredOffsetIndex(rowRanges, currentRowGroup, currentBlockMetadata.getRowCount(), metadata.getPath());
}
ChunkedInputStream columnChunkInputStream = chunkReaders.get(new ChunkKey(fieldId, currentRowGroup));
columnReader.setPageReader(
createPageReader(dataSource.getId(), columnChunkInputStream, metadata, columnDescriptor, offsetIndex, fileCreatedBy),
Optional.ofNullable(rowRanges));
}
ColumnChunk columnChunk = columnReader.readPrimitive();
// update max size per primitive column chunk
double bytesPerCell = ((double) columnChunk.getMaxBlockSize()) / batchSize;
double bytesPerCellDelta = bytesPerCell - maxBytesPerCell.getOrDefault(fieldId, 0.0);
if (bytesPerCellDelta > 0) {
// update batch size
maxCombinedBytesPerRow += bytesPerCellDelta;
maxBatchSize = toIntExact(min(maxBatchSize, max(1, (long) (options.getMaxReadBlockSize().toBytes() / maxCombinedBytesPerRow))));
maxBytesPerCell.put(fieldId, bytesPerCell);
}
return columnChunk;
}
public List getColumnFields()
{
return columnFields;
}
public Metrics getMetrics()
{
ImmutableMap.Builder> metrics = ImmutableMap.>builder()
.putAll(codecMetrics);
if (columnIndexRowsFiltered >= 0) {
metrics.put(COLUMN_INDEX_ROWS_FILTERED, new LongCount(columnIndexRowsFiltered));
}
return new Metrics(metrics.buildOrThrow());
}
private void initializeColumnReaders()
{
for (PrimitiveField field : primitiveFields) {
columnReaders.put(
field.getId(),
columnReaderFactory.create(field, currentRowGroupMemoryContext));
}
}
public static List getPrimitiveFields(List fields)
{
Map primitiveFields = new HashMap<>();
fields.forEach(field -> parseField(field, primitiveFields));
return ImmutableList.copyOf(primitiveFields.values());
}
private static void parseField(Field field, Map primitiveFields)
{
if (field instanceof PrimitiveField primitiveField) {
primitiveFields.put(primitiveField.getId(), primitiveField);
}
else if (field instanceof GroupField groupField) {
groupField.getChildren().stream()
.flatMap(Optional::stream)
.forEach(child -> parseField(child, primitiveFields));
}
}
public Block readBlock(Field field)
throws IOException
{
return readColumnChunk(field).getBlock();
}
private ColumnChunk readColumnChunk(Field field)
throws IOException
{
ColumnChunk columnChunk;
if (field.getType() instanceof RowType) {
columnChunk = readStruct((GroupField) field);
}
else if (field.getType() instanceof MapType) {
columnChunk = readMap((GroupField) field);
}
else if (field.getType() instanceof ArrayType) {
columnChunk = readArray((GroupField) field);
}
else {
columnChunk = readPrimitive((PrimitiveField) field);
}
return columnChunk;
}
public ParquetDataSource getDataSource()
{
return dataSource;
}
public AggregatedMemoryContext getMemoryContext()
{
return memoryContext;
}
private static FilteredRowRanges[] calculateFilteredRowRanges(
List rowGroups,
Optional filter,
List primitiveFields)
{
FilteredRowRanges[] blockRowRanges = new FilteredRowRanges[rowGroups.size()];
if (filter.isEmpty()) {
return blockRowRanges;
}
Set paths = primitiveFields.stream()
.map(field -> ColumnPath.get(field.getDescriptor().getPath()))
.collect(toImmutableSet());
for (int rowGroup = 0; rowGroup < rowGroups.size(); rowGroup++) {
RowGroupInfo rowGroupInfo = rowGroups.get(rowGroup);
Optional rowGroupColumnIndexStore = rowGroupInfo.columnIndexStore();
if (rowGroupColumnIndexStore.isEmpty()) {
continue;
}
long rowGroupRowCount = rowGroupInfo.prunedBlockMetadata().getRowCount();
FilteredRowRanges rowRanges = new FilteredRowRanges(ColumnIndexFilter.calculateRowRanges(
FilterCompat.get(filter.get()),
rowGroupColumnIndexStore.get(),
paths,
rowGroupRowCount));
if (rowRanges.getRowCount() < rowGroupRowCount) {
blockRowRanges[rowGroup] = rowRanges;
}
}
return blockRowRanges;
}
private void validateWritePageChecksum(Page page)
{
if (writeChecksumBuilder.isPresent()) {
page = page.getLoadedPage();
writeChecksumBuilder.get().addPage(page);
rowGroupStatisticsValidation.orElseThrow().addPage(page);
}
}
private void validateBlockMetadata(List rowGroups)
throws ParquetCorruptionException
{
if (writeValidation.isPresent()) {
writeValidation.get().validateBlocksMetadata(dataSource.getId(), rowGroups);
}
}
@SuppressWarnings("FormatStringAnnotation")
@FormatMethod
private void validateWrite(java.util.function.Predicate test, String messageFormat, Object... args)
throws ParquetCorruptionException
{
if (writeValidation.isPresent() && !test.test(writeValidation.get())) {
throw new ParquetCorruptionException(dataSource.getId(), "Write validation failed: " + messageFormat, args);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy