All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.parquet.ParquetReaderConfig Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive.parquet;

import io.airlift.configuration.Config;
import io.airlift.configuration.ConfigDescription;
import io.airlift.configuration.DefunctConfig;
import io.airlift.configuration.LegacyConfig;
import io.airlift.units.DataSize;
import io.airlift.units.MaxDataSize;
import io.airlift.units.MinDataSize;
import io.trino.parquet.ParquetReaderOptions;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.NotNull;

@DefunctConfig({
        "hive.parquet.fail-on-corrupted-statistics",
        "parquet.fail-on-corrupted-statistics",
        "parquet.optimized-reader.enabled",
        "parquet.optimized-nested-reader.enabled"
})
public class ParquetReaderConfig
{
    public static final String PARQUET_READER_MAX_SMALL_FILE_THRESHOLD = "15MB";

    private ParquetReaderOptions options = new ParquetReaderOptions();

    public boolean isIgnoreStatistics()
    {
        return options.isIgnoreStatistics();
    }

    @Config("parquet.ignore-statistics")
    @ConfigDescription("Ignore statistics from Parquet to allow querying files with corrupted or incorrect statistics")
    public ParquetReaderConfig setIgnoreStatistics(boolean ignoreStatistics)
    {
        options = options.withIgnoreStatistics(ignoreStatistics);
        return this;
    }

    @NotNull
    public DataSize getMaxReadBlockSize()
    {
        return options.getMaxReadBlockSize();
    }

    @Config("parquet.max-read-block-size")
    @LegacyConfig("hive.parquet.max-read-block-size")
    public ParquetReaderConfig setMaxReadBlockSize(DataSize maxReadBlockSize)
    {
        options = options.withMaxReadBlockSize(maxReadBlockSize);
        return this;
    }

    @Min(128)
    @Max(65536)
    public int getMaxReadBlockRowCount()
    {
        return options.getMaxReadBlockRowCount();
    }

    @Config("parquet.max-read-block-row-count")
    @ConfigDescription("Maximum number of rows read in a batch")
    public ParquetReaderConfig setMaxReadBlockRowCount(int length)
    {
        options = options.withMaxReadBlockRowCount(length);
        return this;
    }

    @NotNull
    public DataSize getMaxMergeDistance()
    {
        return options.getMaxMergeDistance();
    }

    @Config("parquet.max-merge-distance")
    public ParquetReaderConfig setMaxMergeDistance(DataSize distance)
    {
        options = options.withMaxMergeDistance(distance);
        return this;
    }

    @NotNull
    @MinDataSize("1MB")
    public DataSize getMaxBufferSize()
    {
        return options.getMaxBufferSize();
    }

    @Config("parquet.max-buffer-size")
    public ParquetReaderConfig setMaxBufferSize(DataSize size)
    {
        options = options.withMaxBufferSize(size);
        return this;
    }

    @Config("parquet.use-column-index")
    @ConfigDescription("Enable using Parquet column indexes")
    public ParquetReaderConfig setUseColumnIndex(boolean useColumnIndex)
    {
        options = options.withUseColumnIndex(useColumnIndex);
        return this;
    }

    public boolean isUseColumnIndex()
    {
        return options.isUseColumnIndex();
    }

    @Config("parquet.use-bloom-filter")
    @ConfigDescription("Use Parquet Bloom filters")
    public ParquetReaderConfig setUseBloomFilter(boolean useBloomFilter)
    {
        options = options.withBloomFilter(useBloomFilter);
        return this;
    }

    public boolean isUseBloomFilter()
    {
        return options.useBloomFilter();
    }

    @Config("parquet.small-file-threshold")
    @ConfigDescription("Size below which a parquet file will be read entirely")
    public ParquetReaderConfig setSmallFileThreshold(DataSize smallFileThreshold)
    {
        options = options.withSmallFileThreshold(smallFileThreshold);
        return this;
    }

    @NotNull
    @MaxDataSize(PARQUET_READER_MAX_SMALL_FILE_THRESHOLD)
    public DataSize getSmallFileThreshold()
    {
        return options.getSmallFileThreshold();
    }

    @Config("parquet.experimental.vectorized-decoding.enabled")
    @ConfigDescription("Enable using Java Vector API for faster decoding of parquet files")
    public ParquetReaderConfig setVectorizedDecodingEnabled(boolean vectorizedDecodingEnabled)
    {
        options = options.withVectorizedDecodingEnabled(vectorizedDecodingEnabled);
        return this;
    }

    public boolean isVectorizedDecodingEnabled()
    {
        return options.isVectorizedDecodingEnabled();
    }

    public ParquetReaderOptions toParquetReaderOptions()
    {
        return options;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy