All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hudi.HudiConfig Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hudi;

import com.google.common.collect.ImmutableList;
import io.airlift.configuration.Config;
import io.airlift.configuration.ConfigDescription;
import io.airlift.configuration.DefunctConfig;
import io.airlift.units.DataSize;
import jakarta.validation.constraints.DecimalMax;
import jakarta.validation.constraints.DecimalMin;
import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.NotNull;

import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.util.Locale.ENGLISH;

@DefunctConfig({
        "hudi.min-partition-batch-size",
        "hudi.max-partition-batch-size",
        "hudi.metadata-enabled",
})
public class HudiConfig
{
    private List columnsToHide = ImmutableList.of();
    private boolean shouldUseParquetColumnNames = true;
    private boolean sizeBasedSplitWeightsEnabled = true;
    private DataSize standardSplitWeightSize = DataSize.of(128, MEGABYTE);
    private double minimumAssignedSplitWeight = 0.05;
    private int maxSplitsPerSecond = Integer.MAX_VALUE;
    private int maxOutstandingSplits = 1000;
    private int splitLoaderParallelism = 4;
    private int splitGeneratorParallelism = 4;
    private long perTransactionMetastoreCacheMaximumSize = 2000;
    private boolean queryPartitionFilterRequired;

    public List getColumnsToHide()
    {
        return columnsToHide;
    }

    @Config("hudi.columns-to-hide")
    @ConfigDescription("List of column names that will be hidden from the query output. " +
            "It can be used to hide Hudi meta fields. By default, no fields are hidden.")
    public HudiConfig setColumnsToHide(List columnsToHide)
    {
        this.columnsToHide = columnsToHide.stream()
                .map(s -> s.toLowerCase(ENGLISH))
                .collect(toImmutableList());
        return this;
    }

    @Config("hudi.parquet.use-column-names")
    @ConfigDescription("Access Parquet columns using names from the file. If disabled, then columns are accessed using index."
            + "Only applicable to Parquet file format.")
    public HudiConfig setUseParquetColumnNames(boolean shouldUseParquetColumnNames)
    {
        this.shouldUseParquetColumnNames = shouldUseParquetColumnNames;
        return this;
    }

    public boolean getUseParquetColumnNames()
    {
        return this.shouldUseParquetColumnNames;
    }

    @Config("hudi.size-based-split-weights-enabled")
    @ConfigDescription("Unlike uniform splitting, size-based splitting ensures that each batch of splits has enough data to process. " +
            "By default, it is enabled to improve performance.")
    public HudiConfig setSizeBasedSplitWeightsEnabled(boolean sizeBasedSplitWeightsEnabled)
    {
        this.sizeBasedSplitWeightsEnabled = sizeBasedSplitWeightsEnabled;
        return this;
    }

    public boolean isSizeBasedSplitWeightsEnabled()
    {
        return sizeBasedSplitWeightsEnabled;
    }

    @Config("hudi.standard-split-weight-size")
    @ConfigDescription("The split size corresponding to the standard weight (1.0) "
            + "when size based split weights are enabled.")
    public HudiConfig setStandardSplitWeightSize(DataSize standardSplitWeightSize)
    {
        this.standardSplitWeightSize = standardSplitWeightSize;
        return this;
    }

    @NotNull
    public DataSize getStandardSplitWeightSize()
    {
        return standardSplitWeightSize;
    }

    @Config("hudi.minimum-assigned-split-weight")
    @ConfigDescription("Minimum weight that a split can be assigned when size based split weights are enabled.")
    public HudiConfig setMinimumAssignedSplitWeight(double minimumAssignedSplitWeight)
    {
        this.minimumAssignedSplitWeight = minimumAssignedSplitWeight;
        return this;
    }

    @DecimalMax("1")
    @DecimalMin(value = "0", inclusive = false)
    public double getMinimumAssignedSplitWeight()
    {
        return minimumAssignedSplitWeight;
    }

    @Min(1)
    public int getMaxSplitsPerSecond()
    {
        return maxSplitsPerSecond;
    }

    @Config("hudi.max-splits-per-second")
    @ConfigDescription("Rate at which splits are enqueued for processing. The queue will throttle if this rate limit is breached.")
    public HudiConfig setMaxSplitsPerSecond(int maxSplitsPerSecond)
    {
        this.maxSplitsPerSecond = maxSplitsPerSecond;
        return this;
    }

    @Min(1)
    public int getMaxOutstandingSplits()
    {
        return maxOutstandingSplits;
    }

    @Config("hudi.max-outstanding-splits")
    @ConfigDescription("Maximum outstanding splits in a batch enqueued for processing.")
    public HudiConfig setMaxOutstandingSplits(int maxOutstandingSplits)
    {
        this.maxOutstandingSplits = maxOutstandingSplits;
        return this;
    }

    @Min(1)
    public int getSplitGeneratorParallelism()
    {
        return splitGeneratorParallelism;
    }

    @Config("hudi.split-generator-parallelism")
    @ConfigDescription("Number of threads to generate splits from partitions.")
    public HudiConfig setSplitGeneratorParallelism(int splitGeneratorParallelism)
    {
        this.splitGeneratorParallelism = splitGeneratorParallelism;
        return this;
    }

    @Min(1)
    public int getSplitLoaderParallelism()
    {
        return splitLoaderParallelism;
    }

    @Config("hudi.split-loader-parallelism")
    @ConfigDescription("Number of threads to run background split loader. A single background split loader is needed per query.")
    public HudiConfig setSplitLoaderParallelism(int splitLoaderParallelism)
    {
        this.splitLoaderParallelism = splitLoaderParallelism;
        return this;
    }

    @Min(1)
    public long getPerTransactionMetastoreCacheMaximumSize()
    {
        return perTransactionMetastoreCacheMaximumSize;
    }

    @Config("hudi.per-transaction-metastore-cache-maximum-size")
    public HudiConfig setPerTransactionMetastoreCacheMaximumSize(long perTransactionMetastoreCacheMaximumSize)
    {
        this.perTransactionMetastoreCacheMaximumSize = perTransactionMetastoreCacheMaximumSize;
        return this;
    }

    @Config("hudi.query-partition-filter-required")
    @ConfigDescription("Require a filter on at least one partition column")
    public HudiConfig setQueryPartitionFilterRequired(boolean queryPartitionFilterRequired)
    {
        this.queryPartitionFilterRequired = queryPartitionFilterRequired;
        return this;
    }

    public boolean isQueryPartitionFilterRequired()
    {
        return queryPartitionFilterRequired;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy