All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hudi.HudiSessionProperties Maven / Gradle / Ivy

There is a newer version: 458
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hudi;

import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
import io.airlift.units.DataSize;
import io.trino.plugin.base.session.SessionPropertiesProvider;
import io.trino.plugin.hive.parquet.ParquetReaderConfig;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.session.PropertyMetadata;
import io.trino.spi.type.ArrayType;

import java.util.Collection;
import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.base.session.PropertyMetadataUtil.dataSizeProperty;
import static io.trino.plugin.base.session.PropertyMetadataUtil.validateMaxDataSize;
import static io.trino.plugin.hive.parquet.ParquetReaderConfig.PARQUET_READER_MAX_SMALL_FILE_THRESHOLD;
import static io.trino.spi.StandardErrorCode.INVALID_SESSION_PROPERTY;
import static io.trino.spi.session.PropertyMetadata.booleanProperty;
import static io.trino.spi.session.PropertyMetadata.doubleProperty;
import static io.trino.spi.session.PropertyMetadata.integerProperty;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;

public class HudiSessionProperties
        implements SessionPropertiesProvider
{
    private static final String COLUMNS_TO_HIDE = "columns_to_hide";
    private static final String USE_PARQUET_COLUMN_NAMES = "use_parquet_column_names";
    private static final String PARQUET_SMALL_FILE_THRESHOLD = "parquet_small_file_threshold";
    private static final String PARQUET_VECTORIZED_DECODING_ENABLED = "parquet_vectorized_decoding_enabled";
    private static final String SIZE_BASED_SPLIT_WEIGHTS_ENABLED = "size_based_split_weights_enabled";
    private static final String STANDARD_SPLIT_WEIGHT_SIZE = "standard_split_weight_size";
    private static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight";
    private static final String MAX_SPLITS_PER_SECOND = "max_splits_per_second";
    private static final String MAX_OUTSTANDING_SPLITS = "max_outstanding_splits";
    private static final String SPLIT_GENERATOR_PARALLELISM = "split_generator_parallelism";
    private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required";

    private final List> sessionProperties;

    @Inject
    public HudiSessionProperties(HudiConfig hudiConfig, ParquetReaderConfig parquetReaderConfig)
    {
        sessionProperties = ImmutableList.of(
                new PropertyMetadata<>(
                        COLUMNS_TO_HIDE,
                        "List of column names that will be hidden",
                        new ArrayType(VARCHAR),
                        List.class,
                        hudiConfig.getColumnsToHide(),
                        false,
                        value -> ((Collection) value).stream()
                                .map(name -> ((String) name).toLowerCase(ENGLISH))
                                .collect(toImmutableList()),
                        value -> value),
                booleanProperty(
                        USE_PARQUET_COLUMN_NAMES,
                        "Access parquet columns using names from the file. If disabled, then columns are accessed using index.",
                        hudiConfig.getUseParquetColumnNames(),
                        false),
                dataSizeProperty(
                        PARQUET_SMALL_FILE_THRESHOLD,
                        "Parquet: Size below which a parquet file will be read entirely",
                        parquetReaderConfig.getSmallFileThreshold(),
                        value -> validateMaxDataSize(PARQUET_SMALL_FILE_THRESHOLD, value, DataSize.valueOf(PARQUET_READER_MAX_SMALL_FILE_THRESHOLD)),
                        false),
                booleanProperty(
                        PARQUET_VECTORIZED_DECODING_ENABLED,
                        "Enable using Java Vector API for faster decoding of parquet files",
                        parquetReaderConfig.isVectorizedDecodingEnabled(),
                        false),
                booleanProperty(
                        SIZE_BASED_SPLIT_WEIGHTS_ENABLED,
                        format("If enabled, size-based splitting ensures that each batch of splits has enough data to process as defined by %s", STANDARD_SPLIT_WEIGHT_SIZE),
                        hudiConfig.isSizeBasedSplitWeightsEnabled(),
                        false),
                dataSizeProperty(
                        STANDARD_SPLIT_WEIGHT_SIZE,
                        "The split size corresponding to the standard weight (1.0) when size-based split weights are enabled",
                        hudiConfig.getStandardSplitWeightSize(),
                        false),
                doubleProperty(
                        MINIMUM_ASSIGNED_SPLIT_WEIGHT,
                        "Minimum assigned split weight when size-based split weights are enabled",
                        hudiConfig.getMinimumAssignedSplitWeight(),
                        value -> {
                            if (!Double.isFinite(value) || value <= 0 || value > 1) {
                                throw new TrinoException(INVALID_SESSION_PROPERTY, format("%s must be > 0 and <= 1.0: %s", MINIMUM_ASSIGNED_SPLIT_WEIGHT, value));
                            }
                        },
                        false),
                integerProperty(
                        MAX_SPLITS_PER_SECOND,
                        "Rate at which splits are enqueued for processing. The queue will throttle if this rate limit is breached.",
                        hudiConfig.getMaxSplitsPerSecond(),
                        false),
                integerProperty(
                        MAX_OUTSTANDING_SPLITS,
                        "Maximum outstanding splits in a batch enqueued for processing",
                        hudiConfig.getMaxOutstandingSplits(),
                        false),
                integerProperty(
                        SPLIT_GENERATOR_PARALLELISM,
                        "Number of threads to generate splits from partitions",
                        hudiConfig.getSplitGeneratorParallelism(),
                        false),
                booleanProperty(
                        QUERY_PARTITION_FILTER_REQUIRED,
                        "Require a filter on at least one partition column",
                        hudiConfig.isQueryPartitionFilterRequired(),
                        false));
    }

    @Override
    public List> getSessionProperties()
    {
        return sessionProperties;
    }

    @SuppressWarnings("unchecked")
    public static List getColumnsToHide(ConnectorSession session)
    {
        return (List) session.getProperty(COLUMNS_TO_HIDE, List.class);
    }

    public static boolean shouldUseParquetColumnNames(ConnectorSession session)
    {
        return session.getProperty(USE_PARQUET_COLUMN_NAMES, Boolean.class);
    }

    public static DataSize getParquetSmallFileThreshold(ConnectorSession session)
    {
        return session.getProperty(PARQUET_SMALL_FILE_THRESHOLD, DataSize.class);
    }

    public static boolean isParquetVectorizedDecodingEnabled(ConnectorSession session)
    {
        return session.getProperty(PARQUET_VECTORIZED_DECODING_ENABLED, Boolean.class);
    }

    public static boolean isSizeBasedSplitWeightsEnabled(ConnectorSession session)
    {
        return session.getProperty(SIZE_BASED_SPLIT_WEIGHTS_ENABLED, Boolean.class);
    }

    public static DataSize getStandardSplitWeightSize(ConnectorSession session)
    {
        return session.getProperty(STANDARD_SPLIT_WEIGHT_SIZE, DataSize.class);
    }

    public static double getMinimumAssignedSplitWeight(ConnectorSession session)
    {
        return session.getProperty(MINIMUM_ASSIGNED_SPLIT_WEIGHT, Double.class);
    }

    public static int getMaxSplitsPerSecond(ConnectorSession session)
    {
        return session.getProperty(MAX_SPLITS_PER_SECOND, Integer.class);
    }

    public static int getMaxOutstandingSplits(ConnectorSession session)
    {
        return session.getProperty(MAX_OUTSTANDING_SPLITS, Integer.class);
    }

    public static int getSplitGeneratorParallelism(ConnectorSession session)
    {
        return session.getProperty(SPLIT_GENERATOR_PARALLELISM, Integer.class);
    }

    public static boolean isQueryPartitionFilterRequired(ConnectorSession session)
    {
        return session.getProperty(QUERY_PARTITION_FILTER_REQUIRED, Boolean.class);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy