All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.HiveSessionProperties Maven / Gradle / Ivy

Go to download

This is a Databricks build of Trino's Hive plugin which includes support for HTTP based transport for it's Hive metastore thrift interface.

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive;

import com.google.common.collect.ImmutableList;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
import io.trino.orc.OrcWriteValidation.OrcWriteValidationMode;
import io.trino.plugin.base.session.SessionPropertiesProvider;
import io.trino.plugin.hive.orc.OrcReaderConfig;
import io.trino.plugin.hive.orc.OrcWriterConfig;
import io.trino.plugin.hive.parquet.ParquetReaderConfig;
import io.trino.plugin.hive.parquet.ParquetWriterConfig;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.session.PropertyMetadata;
import io.trino.spi.type.ArrayType;

import javax.inject.Inject;

import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.plugin.base.session.PropertyMetadataUtil.dataSizeProperty;
import static io.trino.plugin.base.session.PropertyMetadataUtil.durationProperty;
import static io.trino.spi.StandardErrorCode.INVALID_SESSION_PROPERTY;
import static io.trino.spi.session.PropertyMetadata.booleanProperty;
import static io.trino.spi.session.PropertyMetadata.doubleProperty;
import static io.trino.spi.session.PropertyMetadata.enumProperty;
import static io.trino.spi.session.PropertyMetadata.integerProperty;
import static io.trino.spi.session.PropertyMetadata.stringProperty;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;

public final class HiveSessionProperties
        implements SessionPropertiesProvider
{
    private static final String BUCKET_EXECUTION_ENABLED = "bucket_execution_enabled";
    private static final String VALIDATE_BUCKETING = "validate_bucketing";
    private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size";
    private static final String PARALLEL_PARTITIONED_BUCKETED_WRITES = "parallel_partitioned_bucketed_writes";
    private static final String FORCE_LOCAL_SCHEDULING = "force_local_scheduling";
    private static final String INSERT_EXISTING_PARTITIONS_BEHAVIOR = "insert_existing_partitions_behavior";
    private static final String CSV_NATIVE_READER_ENABLED = "csv_native_reader_enabled";
    private static final String CSV_NATIVE_WRITER_ENABLED = "csv_native_writer_enabled";
    private static final String JSON_NATIVE_READER_ENABLED = "json_native_reader_enabled";
    private static final String JSON_NATIVE_WRITER_ENABLED = "json_native_writer_enabled";
    private static final String REGEX_NATIVE_READER_ENABLED = "regex_native_reader_enabled";
    private static final String TEXT_FILE_NATIVE_READER_ENABLED = "text_file_native_reader_enabled";
    private static final String TEXT_FILE_NATIVE_WRITER_ENABLED = "text_file_native_writer_enabled";
    private static final String SEQUENCE_FILE_NATIVE_READER_ENABLED = "sequence_file_native_reader_enabled";
    private static final String SEQUENCE_FILE_NATIVE_WRITER_ENABLED = "sequence_file_native_writer_enabled";
    private static final String ORC_BLOOM_FILTERS_ENABLED = "orc_bloom_filters_enabled";
    private static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance";
    private static final String ORC_MAX_BUFFER_SIZE = "orc_max_buffer_size";
    private static final String ORC_STREAM_BUFFER_SIZE = "orc_stream_buffer_size";
    private static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold";
    private static final String ORC_MAX_READ_BLOCK_SIZE = "orc_max_read_block_size";
    private static final String ORC_LAZY_READ_SMALL_RANGES = "orc_lazy_read_small_ranges";
    private static final String ORC_NESTED_LAZY_ENABLED = "orc_nested_lazy_enabled";
    private static final String ORC_STRING_STATISTICS_LIMIT = "orc_string_statistics_limit";
    private static final String ORC_OPTIMIZED_WRITER_VALIDATE = "orc_optimized_writer_validate";
    private static final String ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE = "orc_optimized_writer_validate_percentage";
    private static final String ORC_OPTIMIZED_WRITER_VALIDATE_MODE = "orc_optimized_writer_validate_mode";
    private static final String ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE = "orc_optimized_writer_min_stripe_size";
    private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE = "orc_optimized_writer_max_stripe_size";
    private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS = "orc_optimized_writer_max_stripe_rows";
    private static final String ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY = "orc_optimized_writer_max_dictionary_memory";
    private static final String ORC_USE_COLUMN_NAME = "orc_use_column_names";
    private static final String HIVE_STORAGE_FORMAT = "hive_storage_format";
    private static final String COMPRESSION_CODEC = "compression_codec";
    private static final String RESPECT_TABLE_FORMAT = "respect_table_format";
    private static final String CREATE_EMPTY_BUCKET_FILES = "create_empty_bucket_files";
    private static final String PARQUET_USE_COLUMN_NAME = "parquet_use_column_names";
    private static final String PARQUET_IGNORE_STATISTICS = "parquet_ignore_statistics";
    private static final String PARQUET_USE_COLUMN_INDEX = "parquet_use_column_index";
    private static final String PARQUET_USE_BLOOM_FILTER = "parquet_use_bloom_filter";
    private static final String PARQUET_MAX_READ_BLOCK_SIZE = "parquet_max_read_block_size";
    private static final String PARQUET_MAX_READ_BLOCK_ROW_COUNT = "parquet_max_read_block_row_count";
    private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled";
    private static final String PARQUET_OPTIMIZED_NESTED_READER_ENABLED = "parquet_optimized_nested_reader_enabled";
    private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size";
    private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size";
    private static final String PARQUET_WRITER_BATCH_SIZE = "parquet_writer_batch_size";
    private static final String PARQUET_OPTIMIZED_WRITER_VALIDATION_PERCENTAGE = "parquet_optimized_writer_validation_percentage";
    private static final String MAX_SPLIT_SIZE = "max_split_size";
    private static final String MAX_INITIAL_SPLIT_SIZE = "max_initial_split_size";
    private static final String RCFILE_OPTIMIZED_WRITER_VALIDATE = "rcfile_optimized_writer_validate";
    private static final String SORTED_WRITING_ENABLED = "sorted_writing_enabled";
    private static final String PROPAGATE_TABLE_SCAN_SORTING_PROPERTIES = "propagate_table_scan_sorting_properties";
    private static final String STATISTICS_ENABLED = "statistics_enabled";
    private static final String PARTITION_STATISTICS_SAMPLE_SIZE = "partition_statistics_sample_size";
    private static final String IGNORE_CORRUPTED_STATISTICS = "ignore_corrupted_statistics";
    private static final String COLLECT_COLUMN_STATISTICS_ON_WRITE = "collect_column_statistics_on_write";
    private static final String OPTIMIZE_MISMATCHED_BUCKET_COUNT = "optimize_mismatched_bucket_count";
    private static final String S3_SELECT_PUSHDOWN_ENABLED = "s3_select_pushdown_enabled";
    private static final String TEMPORARY_STAGING_DIRECTORY_ENABLED = "temporary_staging_directory_enabled";
    private static final String TEMPORARY_STAGING_DIRECTORY_PATH = "temporary_staging_directory_path";
    private static final String DELEGATE_TRANSACTIONAL_MANAGED_TABLE_LOCATION_TO_METASTORE = "delegate_transactional_managed_table_location_to_metastore";
    private static final String IGNORE_ABSENT_PARTITIONS = "ignore_absent_partitions";
    private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required";
    private static final String QUERY_PARTITION_FILTER_REQUIRED_SCHEMAS = "query_partition_filter_required_schemas";
    private static final String PROJECTION_PUSHDOWN_ENABLED = "projection_pushdown_enabled";
    private static final String TIMESTAMP_PRECISION = "timestamp_precision";
    private static final String PARQUET_OPTIMIZED_WRITER_ENABLED = "parquet_optimized_writer_enabled";
    private static final String DYNAMIC_FILTERING_WAIT_TIMEOUT = "dynamic_filtering_wait_timeout";
    private static final String OPTIMIZE_SYMLINK_LISTING = "optimize_symlink_listing";
    private static final String HIVE_VIEWS_LEGACY_TRANSLATION = "hive_views_legacy_translation";
    private static final String ICEBERG_CATALOG_NAME = "iceberg_catalog_name";
    public static final String DELTA_LAKE_CATALOG_NAME = "delta_lake_catalog_name";
    public static final String HUDI_CATALOG_NAME = "hudi_catalog_name";
    public static final String SIZE_BASED_SPLIT_WEIGHTS_ENABLED = "size_based_split_weights_enabled";
    public static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight";
    public static final String NON_TRANSACTIONAL_OPTIMIZE_ENABLED = "non_transactional_optimize_enabled";

    private final List> sessionProperties;

    public enum InsertExistingPartitionsBehavior
    {
        ERROR,
        APPEND,
        OVERWRITE,
        /**/;

        public static InsertExistingPartitionsBehavior valueOf(String value, boolean immutablePartitions)
        {
            InsertExistingPartitionsBehavior enumValue = valueOf(value.toUpperCase(ENGLISH));
            checkArgument(isValid(enumValue, immutablePartitions), "Trino is configured to treat Hive partitions as immutable. %s is not allowed to be set to %s", INSERT_EXISTING_PARTITIONS_BEHAVIOR, APPEND);
            return enumValue;
        }

        static boolean isValid(InsertExistingPartitionsBehavior value, boolean immutable)
        {
            return !(immutable && value == APPEND);
        }
    }

    @Inject
    public HiveSessionProperties(
            HiveConfig hiveConfig,
            HiveFormatsConfig hiveFormatsConfig,
            OrcReaderConfig orcReaderConfig,
            OrcWriterConfig orcWriterConfig,
            ParquetReaderConfig parquetReaderConfig,
            ParquetWriterConfig parquetWriterConfig)
    {
        sessionProperties = ImmutableList.of(
                booleanProperty(
                        BUCKET_EXECUTION_ENABLED,
                        "Enable bucket-aware execution: only use a single worker per bucket",
                        hiveConfig.isBucketExecutionEnabled(),
                        false),
                booleanProperty(
                        VALIDATE_BUCKETING,
                        "Verify that data is bucketed correctly when reading",
                        hiveConfig.isValidateBucketing(),
                        false),
                dataSizeProperty(
                        TARGET_MAX_FILE_SIZE,
                        "Target maximum size of written files; the actual size may be larger",
                        hiveConfig.getTargetMaxFileSize(),
                        false),
                booleanProperty(
                        PARALLEL_PARTITIONED_BUCKETED_WRITES,
                        "Improve parallelism of partitioned and bucketed table writes",
                        hiveConfig.isParallelPartitionedBucketedWrites(),
                        false),
                booleanProperty(
                        FORCE_LOCAL_SCHEDULING,
                        "Only schedule splits on workers colocated with data node",
                        hiveConfig.isForceLocalScheduling(),
                        false),
                new PropertyMetadata<>(
                        INSERT_EXISTING_PARTITIONS_BEHAVIOR,
                        "Behavior on insert existing partitions; this session property doesn't control behavior on insert existing unpartitioned table",
                        VARCHAR,
                        InsertExistingPartitionsBehavior.class,
                        hiveConfig.getInsertExistingPartitionsBehavior(),
                        false,
                        value -> InsertExistingPartitionsBehavior.valueOf((String) value, hiveConfig.isImmutablePartitions()),
                        InsertExistingPartitionsBehavior::toString),
                booleanProperty(
                        CSV_NATIVE_READER_ENABLED,
                        "Use native CSV reader",
                        hiveFormatsConfig.isCsvNativeReaderEnabled(),
                        false),
                booleanProperty(
                        CSV_NATIVE_WRITER_ENABLED,
                        "Use native CSV writer",
                        hiveFormatsConfig.isCsvNativeWriterEnabled(),
                        false),
                booleanProperty(
                        JSON_NATIVE_READER_ENABLED,
                        "Use native JSON reader",
                        hiveFormatsConfig.isJsonNativeReaderEnabled(),
                        false),
                booleanProperty(
                        JSON_NATIVE_WRITER_ENABLED,
                        "Use native JSON writer",
                        hiveFormatsConfig.isJsonNativeWriterEnabled(),
                        false),
                booleanProperty(
                        REGEX_NATIVE_READER_ENABLED,
                        "Use native REGEX reader",
                        hiveFormatsConfig.isRegexNativeReaderEnabled(),
                        false),
                booleanProperty(
                        TEXT_FILE_NATIVE_READER_ENABLED,
                        "Use native text file reader",
                        hiveFormatsConfig.isTextFileNativeReaderEnabled(),
                        false),
                booleanProperty(
                        TEXT_FILE_NATIVE_WRITER_ENABLED,
                        "Use native text file writer",
                        hiveFormatsConfig.isTextFileNativeWriterEnabled(),
                        false),
                booleanProperty(
                        SEQUENCE_FILE_NATIVE_READER_ENABLED,
                        "Use native sequence file reader",
                        hiveFormatsConfig.isSequenceFileNativeReaderEnabled(),
                        false),
                booleanProperty(
                        SEQUENCE_FILE_NATIVE_WRITER_ENABLED,
                        "Use native sequence file writer",
                        hiveFormatsConfig.isSequenceFileNativeWriterEnabled(),
                        false),
                booleanProperty(
                        ORC_BLOOM_FILTERS_ENABLED,
                        "ORC: Enable bloom filters for predicate pushdown",
                        orcReaderConfig.isBloomFiltersEnabled(),
                        false),
                dataSizeProperty(
                        ORC_MAX_MERGE_DISTANCE,
                        "ORC: Maximum size of gap between two reads to merge into a single read",
                        orcReaderConfig.getMaxMergeDistance(),
                        false),
                dataSizeProperty(
                        ORC_MAX_BUFFER_SIZE,
                        "ORC: Maximum size of a single read",
                        orcReaderConfig.getMaxBufferSize(),
                        false),
                dataSizeProperty(
                        ORC_STREAM_BUFFER_SIZE,
                        "ORC: Size of buffer for streaming reads",
                        orcReaderConfig.getStreamBufferSize(),
                        false),
                dataSizeProperty(
                        ORC_TINY_STRIPE_THRESHOLD,
                        "ORC: Threshold below which an ORC stripe or file will read in its entirety",
                        orcReaderConfig.getTinyStripeThreshold(),
                        false),
                dataSizeProperty(
                        ORC_MAX_READ_BLOCK_SIZE,
                        "ORC: Soft max size of Trino blocks produced by ORC reader",
                        orcReaderConfig.getMaxBlockSize(),
                        false),
                booleanProperty(
                        ORC_LAZY_READ_SMALL_RANGES,
                        "Experimental: ORC: Read small file segments lazily",
                        orcReaderConfig.isLazyReadSmallRanges(),
                        false),
                booleanProperty(
                        ORC_NESTED_LAZY_ENABLED,
                        "Experimental: ORC: Lazily read nested data",
                        orcReaderConfig.isNestedLazy(),
                        false),
                dataSizeProperty(
                        ORC_STRING_STATISTICS_LIMIT,
                        "ORC: Maximum size of string statistics; drop if exceeding",
                        orcWriterConfig.getStringStatisticsLimit(),
                        false),
                booleanProperty(
                        ORC_OPTIMIZED_WRITER_VALIDATE,
                        "ORC: Force all validation for files",
                        orcWriterConfig.getValidationPercentage() > 0.0,
                        false),
                new PropertyMetadata<>(
                        ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE,
                        "ORC: sample percentage for validation for files",
                        DOUBLE,
                        Double.class,
                        orcWriterConfig.getValidationPercentage(),
                        false,
                        value -> {
                            double doubleValue = (double) value;
                            if (doubleValue < 0.0 || doubleValue > 100.0) {
                                throw new TrinoException(
                                        INVALID_SESSION_PROPERTY,
                                        format("%s must be between 0.0 and 100.0 inclusive: %s", ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, doubleValue));
                            }
                            return doubleValue;
                        },
                        value -> value),
                enumProperty(
                        ORC_OPTIMIZED_WRITER_VALIDATE_MODE,
                        "ORC: Level of detail in ORC validation",
                        OrcWriteValidationMode.class,
                        orcWriterConfig.getValidationMode(),
                        false),
                dataSizeProperty(
                        ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE,
                        "ORC: Min stripe size",
                        orcWriterConfig.getStripeMinSize(),
                        false),
                dataSizeProperty(
                        ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE,
                        "ORC: Max stripe size",
                        orcWriterConfig.getStripeMaxSize(),
                        false),
                integerProperty(
                        ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS,
                        "ORC: Max stripe row count",
                        orcWriterConfig.getStripeMaxRowCount(),
                        false),
                dataSizeProperty(
                        ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY,
                        "ORC: Max dictionary memory",
                        orcWriterConfig.getDictionaryMaxMemory(),
                        false),
                booleanProperty(
                        ORC_USE_COLUMN_NAME,
                        "ORC: Access ORC columns using names from the file",
                        orcReaderConfig.isUseColumnNames(),
                        false),
                enumProperty(
                        HIVE_STORAGE_FORMAT,
                        "Default storage format for new tables or partitions",
                        HiveStorageFormat.class,
                        hiveConfig.getHiveStorageFormat(),
                        false),
                enumProperty(
                        COMPRESSION_CODEC,
                        "Compression codec to use when writing files",
                        HiveCompressionOption.class,
                        hiveConfig.getHiveCompressionCodec(),
                        false),
                booleanProperty(
                        RESPECT_TABLE_FORMAT,
                        "Write new partitions using table format rather than default storage format",
                        hiveConfig.isRespectTableFormat(),
                        false),
                booleanProperty(
                        CREATE_EMPTY_BUCKET_FILES,
                        "Create empty files for buckets that have no data",
                        hiveConfig.isCreateEmptyBucketFiles(),
                        false),
                booleanProperty(
                        PARQUET_USE_COLUMN_NAME,
                        "Parquet: Access Parquet columns using names from the file",
                        hiveConfig.isUseParquetColumnNames(),
                        false),
                booleanProperty(
                        PARQUET_IGNORE_STATISTICS,
                        "Ignore statistics from Parquet to allow querying files with corrupted or incorrect statistics",
                        parquetReaderConfig.isIgnoreStatistics(),
                        false),
                booleanProperty(
                        PARQUET_USE_COLUMN_INDEX,
                        "Use Parquet column index",
                        parquetReaderConfig.isUseColumnIndex(),
                        false),
                booleanProperty(
                        PARQUET_USE_BLOOM_FILTER,
                        "Use Parquet bloomfilter",
                        parquetReaderConfig.isUseBloomFilter(),
                        false),
                dataSizeProperty(
                        PARQUET_MAX_READ_BLOCK_SIZE,
                        "Parquet: Maximum size of a block to read",
                        parquetReaderConfig.getMaxReadBlockSize(),
                        false),
                integerProperty(
                        PARQUET_MAX_READ_BLOCK_ROW_COUNT,
                        "Parquet: Maximum number of rows read in a batch",
                        parquetReaderConfig.getMaxReadBlockRowCount(),
                        value -> {
                            if (value < 128 || value > 65536) {
                                throw new TrinoException(
                                        INVALID_SESSION_PROPERTY,
                                        format("%s must be between 128 and 65536: %s", PARQUET_MAX_READ_BLOCK_ROW_COUNT, value));
                            }
                        },
                        false),
                booleanProperty(
                        PARQUET_OPTIMIZED_READER_ENABLED,
                        "Use optimized Parquet reader",
                        parquetReaderConfig.isOptimizedReaderEnabled(),
                        false),
                booleanProperty(
                        PARQUET_OPTIMIZED_NESTED_READER_ENABLED,
                        "Use optimized Parquet reader for nested columns",
                        parquetReaderConfig.isOptimizedNestedReaderEnabled(),
                        false),
                dataSizeProperty(
                        PARQUET_WRITER_BLOCK_SIZE,
                        "Parquet: Writer block size",
                        parquetWriterConfig.getBlockSize(),
                        false),
                dataSizeProperty(
                        PARQUET_WRITER_PAGE_SIZE,
                        "Parquet: Writer page size",
                        parquetWriterConfig.getPageSize(),
                        false),
                integerProperty(
                        PARQUET_WRITER_BATCH_SIZE,
                        "Parquet: Maximum number of rows passed to the writer in each batch",
                        parquetWriterConfig.getBatchSize(),
                        false),
                new PropertyMetadata<>(
                        PARQUET_OPTIMIZED_WRITER_VALIDATION_PERCENTAGE,
                        "Parquet: sample percentage for validation of written files",
                        DOUBLE,
                        Double.class,
                        parquetWriterConfig.getValidationPercentage(),
                        false,
                        value -> {
                            double doubleValue = (double) value;
                            if (doubleValue < 0.0 || doubleValue > 100.0) {
                                throw new TrinoException(
                                        INVALID_SESSION_PROPERTY,
                                        format("%s must be between 0.0 and 100.0 inclusive: %s", PARQUET_OPTIMIZED_WRITER_VALIDATION_PERCENTAGE, doubleValue));
                            }
                            return doubleValue;
                        },
                        value -> value),
                dataSizeProperty(
                        MAX_SPLIT_SIZE,
                        "Max split size",
                        hiveConfig.getMaxSplitSize(),
                        true),
                dataSizeProperty(
                        MAX_INITIAL_SPLIT_SIZE,
                        "Max initial split size",
                        hiveConfig.getMaxInitialSplitSize(),
                        true),
                booleanProperty(
                        RCFILE_OPTIMIZED_WRITER_VALIDATE,
                        "RCFile: Validate writer files",
                        hiveConfig.isRcfileWriterValidate(),
                        false),
                booleanProperty(
                        SORTED_WRITING_ENABLED,
                        "Enable writing to bucketed sorted tables",
                        hiveConfig.isSortedWritingEnabled(),
                        false),
                booleanProperty(
                        PROPAGATE_TABLE_SCAN_SORTING_PROPERTIES,
                        "Use sorted table layout to generate more efficient execution plans. May lead to incorrect results if files are not sorted as per table definition.",
                        hiveConfig.isPropagateTableScanSortingProperties(),
                        false),
                booleanProperty(
                        STATISTICS_ENABLED,
                        "Expose table statistics",
                        hiveConfig.isTableStatisticsEnabled(),
                        false),
                integerProperty(
                        PARTITION_STATISTICS_SAMPLE_SIZE,
                        "Maximum sample size of the partitions column statistics",
                        hiveConfig.getPartitionStatisticsSampleSize(),
                        false),
                booleanProperty(
                        IGNORE_CORRUPTED_STATISTICS,
                        "Experimental: Ignore corrupted statistics rather than failing",
                        hiveConfig.isIgnoreCorruptedStatistics(),
                        false),
                booleanProperty(
                        COLLECT_COLUMN_STATISTICS_ON_WRITE,
                        "Enables automatic column level statistics collection on write",
                        hiveConfig.isCollectColumnStatisticsOnWrite(),
                        false),
                booleanProperty(
                        OPTIMIZE_MISMATCHED_BUCKET_COUNT,
                        "Experimental: Enable optimization to avoid shuffle when bucket count is compatible but not the same",
                        hiveConfig.isOptimizeMismatchedBucketCount(),
                        false),
                booleanProperty(
                        S3_SELECT_PUSHDOWN_ENABLED,
                        "S3 Select pushdown enabled",
                        hiveConfig.isS3SelectPushdownEnabled(),
                        false),
                booleanProperty(
                        TEMPORARY_STAGING_DIRECTORY_ENABLED,
                        "Should use temporary staging directory for write operations",
                        hiveConfig.isTemporaryStagingDirectoryEnabled(),
                        false),
                stringProperty(
                        TEMPORARY_STAGING_DIRECTORY_PATH,
                        "Temporary staging directory location",
                        hiveConfig.getTemporaryStagingDirectoryPath(),
                        false),
                booleanProperty(
                        DELEGATE_TRANSACTIONAL_MANAGED_TABLE_LOCATION_TO_METASTORE,
                        "When transactional managed table is created via Trino the location will not be set in request sent to HMS and location will be determined by metastore; if this property is set to true CREATE TABLE AS queries are not supported.",
                        hiveConfig.isDelegateTransactionalManagedTableLocationToMetastore(),
                        true),
                booleanProperty(
                        IGNORE_ABSENT_PARTITIONS,
                        "Ignore partitions when the file system location does not exist rather than failing the query.",
                        hiveConfig.isIgnoreAbsentPartitions(),
                        false),
                booleanProperty(
                        QUERY_PARTITION_FILTER_REQUIRED,
                        "Require filter on partition column",
                        hiveConfig.isQueryPartitionFilterRequired(),
                        false),
                new PropertyMetadata<>(
                        QUERY_PARTITION_FILTER_REQUIRED_SCHEMAS,
                        "List of schemas for which filter on partition column is enforced.",
                        new ArrayType(VARCHAR),
                        Set.class,
                        hiveConfig.getQueryPartitionFilterRequiredSchemas(),
                        false,
                        object -> ((Collection) object).stream()
                                .map(String.class::cast)
                                .peek(property -> {
                                    if (isNullOrEmpty(property)) {
                                        throw new TrinoException(INVALID_SESSION_PROPERTY, format("Invalid null or empty value in %s property", QUERY_PARTITION_FILTER_REQUIRED_SCHEMAS));
                                    }
                                })
                                .map(schema -> schema.toLowerCase(ENGLISH))
                                .collect(toImmutableSet()),
                        value -> value),
                booleanProperty(
                        PROJECTION_PUSHDOWN_ENABLED,
                        "Projection push down enabled for hive",
                        hiveConfig.isProjectionPushdownEnabled(),
                        false),
                enumProperty(
                        TIMESTAMP_PRECISION,
                        "Precision for timestamp columns in Hive tables",
                        HiveTimestampPrecision.class,
                        hiveConfig.getTimestampPrecision(),
                        false),
                booleanProperty(
                        PARQUET_OPTIMIZED_WRITER_ENABLED,
                        "Enable optimized writer",
                        parquetWriterConfig.isParquetOptimizedWriterEnabled(),
                        false),
                durationProperty(
                        DYNAMIC_FILTERING_WAIT_TIMEOUT,
                        "Duration to wait for completion of dynamic filters during split generation",
                        hiveConfig.getDynamicFilteringWaitTimeout(),
                        false),
                booleanProperty(
                        OPTIMIZE_SYMLINK_LISTING,
                        "Optimize listing for SymlinkTextFormat tables with files in a single directory",
                        hiveConfig.isOptimizeSymlinkListing(),
                        false),
                booleanProperty(
                        HIVE_VIEWS_LEGACY_TRANSLATION,
                        "Use legacy Hive view translation mechanism",
                        hiveConfig.isLegacyHiveViewTranslation(),
                        false),
                stringProperty(
                        ICEBERG_CATALOG_NAME,
                        "Catalog to redirect to when an Iceberg table is referenced",
                        hiveConfig.getIcebergCatalogName().orElse(null),
                        // Session-level redirections configuration does not work well with views, as view body is analyzed in context
                        // of a session with properties stripped off. Thus, this property is more of a test-only, or at most POC usefulness.
                        true),
                booleanProperty(
                        SIZE_BASED_SPLIT_WEIGHTS_ENABLED,
                        "Enable estimating split weights based on size in bytes",
                        hiveConfig.isSizeBasedSplitWeightsEnabled(),
                        false),
                doubleProperty(
                        MINIMUM_ASSIGNED_SPLIT_WEIGHT,
                        "Minimum assigned split weight when size based split weighting is enabled",
                        hiveConfig.getMinimumAssignedSplitWeight(),
                        value -> {
                            if (!Double.isFinite(value) || value <= 0 || value > 1) {
                                throw new TrinoException(INVALID_SESSION_PROPERTY, format("%s must be > 0 and <= 1.0: %s", MINIMUM_ASSIGNED_SPLIT_WEIGHT, value));
                            }
                        },
                        false),
                booleanProperty(
                        NON_TRANSACTIONAL_OPTIMIZE_ENABLED,
                        "Enable OPTIMIZE table procedure",
                        false,
                        false),
                stringProperty(
                        DELTA_LAKE_CATALOG_NAME,
                        "Catalog to redirect to when a Delta Lake table is referenced",
                        hiveConfig.getDeltaLakeCatalogName().orElse(null),
                        // Session-level redirections configuration does not work well with views, as view body is analyzed in context
                        // of a session with properties stripped off. Thus, this property is more of a test-only, or at most POC usefulness.
                        true),
                stringProperty(
                        HUDI_CATALOG_NAME,
                        "Catalog to redirect to when a Hudi table is referenced",
                        hiveConfig.getHudiCatalogName().orElse(null),
                        // Session-level redirections configuration does not work well with views, as view body is analyzed in context
                        // of a session with properties stripped off. Thus, this property is more of a test-only, or at most POC usefulness.
                        true));
    }

    @Override
    public List> getSessionProperties()
    {
        return sessionProperties;
    }

    public static boolean isBucketExecutionEnabled(ConnectorSession session)
    {
        return session.getProperty(BUCKET_EXECUTION_ENABLED, Boolean.class);
    }

    public static boolean isValidateBucketing(ConnectorSession session)
    {
        return session.getProperty(VALIDATE_BUCKETING, Boolean.class);
    }

    public static DataSize getTargetMaxFileSize(ConnectorSession session)
    {
        return session.getProperty(TARGET_MAX_FILE_SIZE, DataSize.class);
    }

    public static boolean isParallelPartitionedBucketedWrites(ConnectorSession session)
    {
        return session.getProperty(PARALLEL_PARTITIONED_BUCKETED_WRITES, Boolean.class);
    }

    public static boolean isForceLocalScheduling(ConnectorSession session)
    {
        return session.getProperty(FORCE_LOCAL_SCHEDULING, Boolean.class);
    }

    public static InsertExistingPartitionsBehavior getInsertExistingPartitionsBehavior(ConnectorSession session)
    {
        return session.getProperty(INSERT_EXISTING_PARTITIONS_BEHAVIOR, InsertExistingPartitionsBehavior.class);
    }

    public static boolean isCsvNativeReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(CSV_NATIVE_READER_ENABLED, Boolean.class);
    }

    public static boolean isCsvNativeWriterEnabled(ConnectorSession session)
    {
        return session.getProperty(CSV_NATIVE_WRITER_ENABLED, Boolean.class);
    }

    public static boolean isJsonNativeReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(JSON_NATIVE_READER_ENABLED, Boolean.class);
    }

    public static boolean isJsonNativeWriterEnabled(ConnectorSession session)
    {
        return session.getProperty(JSON_NATIVE_WRITER_ENABLED, Boolean.class);
    }

    public static boolean isRegexNativeReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(REGEX_NATIVE_READER_ENABLED, Boolean.class);
    }

    public static boolean isTextFileNativeReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(TEXT_FILE_NATIVE_READER_ENABLED, Boolean.class);
    }

    public static boolean isTextFileNativeWriterEnabled(ConnectorSession session)
    {
        return session.getProperty(TEXT_FILE_NATIVE_WRITER_ENABLED, Boolean.class);
    }

    public static boolean isSequenceFileNativeReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(SEQUENCE_FILE_NATIVE_READER_ENABLED, Boolean.class);
    }

    public static boolean isSequenceFileNativeWriterEnabled(ConnectorSession session)
    {
        return session.getProperty(SEQUENCE_FILE_NATIVE_WRITER_ENABLED, Boolean.class);
    }

    public static boolean isOrcBloomFiltersEnabled(ConnectorSession session)
    {
        return session.getProperty(ORC_BLOOM_FILTERS_ENABLED, Boolean.class);
    }

    public static DataSize getOrcMaxMergeDistance(ConnectorSession session)
    {
        return session.getProperty(ORC_MAX_MERGE_DISTANCE, DataSize.class);
    }

    public static DataSize getOrcMaxBufferSize(ConnectorSession session)
    {
        return session.getProperty(ORC_MAX_BUFFER_SIZE, DataSize.class);
    }

    public static DataSize getOrcStreamBufferSize(ConnectorSession session)
    {
        return session.getProperty(ORC_STREAM_BUFFER_SIZE, DataSize.class);
    }

    public static DataSize getOrcTinyStripeThreshold(ConnectorSession session)
    {
        return session.getProperty(ORC_TINY_STRIPE_THRESHOLD, DataSize.class);
    }

    public static DataSize getOrcMaxReadBlockSize(ConnectorSession session)
    {
        return session.getProperty(ORC_MAX_READ_BLOCK_SIZE, DataSize.class);
    }

    public static boolean getOrcLazyReadSmallRanges(ConnectorSession session)
    {
        return session.getProperty(ORC_LAZY_READ_SMALL_RANGES, Boolean.class);
    }

    public static boolean isOrcNestedLazy(ConnectorSession session)
    {
        return session.getProperty(ORC_NESTED_LAZY_ENABLED, Boolean.class);
    }

    public static DataSize getOrcStringStatisticsLimit(ConnectorSession session)
    {
        return session.getProperty(ORC_STRING_STATISTICS_LIMIT, DataSize.class);
    }

    public static boolean isOrcOptimizedWriterValidate(ConnectorSession session)
    {
        boolean validate = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE, Boolean.class);
        double percentage = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, Double.class);

        checkArgument(percentage >= 0.0 && percentage <= 100.0);

        // session property can disabled validation
        if (!validate) {
            return false;
        }

        // session property cannot force validation when sampling is enabled
        // todo change this if session properties support null
        return ThreadLocalRandom.current().nextDouble(100) < percentage;
    }

    public static OrcWriteValidationMode getOrcOptimizedWriterValidateMode(ConnectorSession session)
    {
        return session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_MODE, OrcWriteValidationMode.class);
    }

    public static DataSize getOrcOptimizedWriterMinStripeSize(ConnectorSession session)
    {
        return session.getProperty(ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE, DataSize.class);
    }

    public static DataSize getOrcOptimizedWriterMaxStripeSize(ConnectorSession session)
    {
        return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE, DataSize.class);
    }

    public static int getOrcOptimizedWriterMaxStripeRows(ConnectorSession session)
    {
        return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS, Integer.class);
    }

    public static DataSize getOrcOptimizedWriterMaxDictionaryMemory(ConnectorSession session)
    {
        return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY, DataSize.class);
    }

    public static boolean isUseOrcColumnNames(ConnectorSession session)
    {
        return session.getProperty(ORC_USE_COLUMN_NAME, Boolean.class);
    }

    public static HiveStorageFormat getHiveStorageFormat(ConnectorSession session)
    {
        return session.getProperty(HIVE_STORAGE_FORMAT, HiveStorageFormat.class);
    }

    public static HiveCompressionOption getCompressionCodec(ConnectorSession session)
    {
        return session.getProperty(COMPRESSION_CODEC, HiveCompressionOption.class);
    }

    public static boolean isRespectTableFormat(ConnectorSession session)
    {
        return session.getProperty(RESPECT_TABLE_FORMAT, Boolean.class);
    }

    public static boolean isCreateEmptyBucketFiles(ConnectorSession session)
    {
        return session.getProperty(CREATE_EMPTY_BUCKET_FILES, Boolean.class);
    }

    public static boolean isUseParquetColumnNames(ConnectorSession session)
    {
        return session.getProperty(PARQUET_USE_COLUMN_NAME, Boolean.class);
    }

    public static boolean isParquetIgnoreStatistics(ConnectorSession session)
    {
        return session.getProperty(PARQUET_IGNORE_STATISTICS, Boolean.class);
    }

    public static boolean isParquetUseColumnIndex(ConnectorSession session)
    {
        return session.getProperty(PARQUET_USE_COLUMN_INDEX, Boolean.class);
    }

    public static boolean useParquetBloomFilter(ConnectorSession session)
    {
        return session.getProperty(PARQUET_USE_BLOOM_FILTER, Boolean.class);
    }

    public static DataSize getParquetMaxReadBlockSize(ConnectorSession session)
    {
        return session.getProperty(PARQUET_MAX_READ_BLOCK_SIZE, DataSize.class);
    }

    public static int getParquetMaxReadBlockRowCount(ConnectorSession session)
    {
        return session.getProperty(PARQUET_MAX_READ_BLOCK_ROW_COUNT, Integer.class);
    }

    public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class);
    }

    public static boolean isParquetOptimizedNestedReaderEnabled(ConnectorSession session)
    {
        return session.getProperty(PARQUET_OPTIMIZED_NESTED_READER_ENABLED, Boolean.class);
    }

    public static DataSize getParquetWriterBlockSize(ConnectorSession session)
    {
        return session.getProperty(PARQUET_WRITER_BLOCK_SIZE, DataSize.class);
    }

    public static DataSize getParquetWriterPageSize(ConnectorSession session)
    {
        return session.getProperty(PARQUET_WRITER_PAGE_SIZE, DataSize.class);
    }

    public static int getParquetBatchSize(ConnectorSession session)
    {
        return session.getProperty(PARQUET_WRITER_BATCH_SIZE, Integer.class);
    }

    public static boolean isParquetOptimizedWriterValidate(ConnectorSession session)
    {
        double percentage = session.getProperty(PARQUET_OPTIMIZED_WRITER_VALIDATION_PERCENTAGE, Double.class);
        checkArgument(percentage >= 0.0 && percentage <= 100.0);
        return ThreadLocalRandom.current().nextDouble(100) < percentage;
    }

    public static DataSize getMaxSplitSize(ConnectorSession session)
    {
        return session.getProperty(MAX_SPLIT_SIZE, DataSize.class);
    }

    public static DataSize getMaxInitialSplitSize(ConnectorSession session)
    {
        return session.getProperty(MAX_INITIAL_SPLIT_SIZE, DataSize.class);
    }

    public static boolean isRcfileOptimizedWriterValidate(ConnectorSession session)
    {
        return session.getProperty(RCFILE_OPTIMIZED_WRITER_VALIDATE, Boolean.class);
    }

    public static boolean isSortedWritingEnabled(ConnectorSession session)
    {
        return session.getProperty(SORTED_WRITING_ENABLED, Boolean.class);
    }

    public static boolean isPropagateTableScanSortingProperties(ConnectorSession session)
    {
        return session.getProperty(PROPAGATE_TABLE_SCAN_SORTING_PROPERTIES, Boolean.class);
    }

    public static boolean isS3SelectPushdownEnabled(ConnectorSession session)
    {
        return session.getProperty(S3_SELECT_PUSHDOWN_ENABLED, Boolean.class);
    }

    public static boolean isStatisticsEnabled(ConnectorSession session)
    {
        return session.getProperty(STATISTICS_ENABLED, Boolean.class);
    }

    public static int getPartitionStatisticsSampleSize(ConnectorSession session)
    {
        int size = session.getProperty(PARTITION_STATISTICS_SAMPLE_SIZE, Integer.class);
        if (size < 1) {
            throw new TrinoException(INVALID_SESSION_PROPERTY, format("%s must be greater than 0: %s", PARTITION_STATISTICS_SAMPLE_SIZE, size));
        }
        return size;
    }

    public static boolean isIgnoreCorruptedStatistics(ConnectorSession session)
    {
        return session.getProperty(IGNORE_CORRUPTED_STATISTICS, Boolean.class);
    }

    public static boolean isCollectColumnStatisticsOnWrite(ConnectorSession session)
    {
        return session.getProperty(COLLECT_COLUMN_STATISTICS_ON_WRITE, Boolean.class);
    }

    public static boolean isOptimizedMismatchedBucketCount(ConnectorSession session)
    {
        return session.getProperty(OPTIMIZE_MISMATCHED_BUCKET_COUNT, Boolean.class);
    }

    public static boolean isTemporaryStagingDirectoryEnabled(ConnectorSession session)
    {
        return session.getProperty(TEMPORARY_STAGING_DIRECTORY_ENABLED, Boolean.class);
    }

    public static String getTemporaryStagingDirectoryPath(ConnectorSession session)
    {
        return session.getProperty(TEMPORARY_STAGING_DIRECTORY_PATH, String.class);
    }

    public static boolean isDelegateTransactionalManagedTableLocationToMetastore(ConnectorSession session)
    {
        return session.getProperty(DELEGATE_TRANSACTIONAL_MANAGED_TABLE_LOCATION_TO_METASTORE, Boolean.class);
    }

    public static boolean isIgnoreAbsentPartitions(ConnectorSession session)
    {
        return session.getProperty(IGNORE_ABSENT_PARTITIONS, Boolean.class);
    }

    public static boolean isQueryPartitionFilterRequired(ConnectorSession session)
    {
        return session.getProperty(QUERY_PARTITION_FILTER_REQUIRED, Boolean.class);
    }

    @SuppressWarnings("unchecked cast")
    public static Set getQueryPartitionFilterRequiredSchemas(ConnectorSession session)
    {
        Set schemas = (Set) session.getProperty(QUERY_PARTITION_FILTER_REQUIRED_SCHEMAS, Set.class);
        requireNonNull(schemas, "queryPartitionFilterRequiredSchemas is null");
        return schemas;
    }

    public static boolean isProjectionPushdownEnabled(ConnectorSession session)
    {
        return session.getProperty(PROJECTION_PUSHDOWN_ENABLED, Boolean.class);
    }

    public static HiveTimestampPrecision getTimestampPrecision(ConnectorSession session)
    {
        return session.getProperty(TIMESTAMP_PRECISION, HiveTimestampPrecision.class);
    }

    public static boolean isParquetOptimizedWriterEnabled(ConnectorSession session)
    {
        return session.getProperty(PARQUET_OPTIMIZED_WRITER_ENABLED, Boolean.class);
    }

    public static Duration getDynamicFilteringWaitTimeout(ConnectorSession session)
    {
        return session.getProperty(DYNAMIC_FILTERING_WAIT_TIMEOUT, Duration.class);
    }

    public static boolean isOptimizeSymlinkListing(ConnectorSession session)
    {
        return session.getProperty(OPTIMIZE_SYMLINK_LISTING, Boolean.class);
    }

    public static boolean isHiveViewsLegacyTranslation(ConnectorSession session)
    {
        return session.getProperty(HIVE_VIEWS_LEGACY_TRANSLATION, Boolean.class);
    }

    public static Optional getIcebergCatalogName(ConnectorSession session)
    {
        return Optional.ofNullable(session.getProperty(ICEBERG_CATALOG_NAME, String.class));
    }

    public static boolean isSizeBasedSplitWeightsEnabled(ConnectorSession session)
    {
        return session.getProperty(SIZE_BASED_SPLIT_WEIGHTS_ENABLED, Boolean.class);
    }

    public static double getMinimumAssignedSplitWeight(ConnectorSession session)
    {
        return session.getProperty(MINIMUM_ASSIGNED_SPLIT_WEIGHT, Double.class);
    }

    public static boolean isNonTransactionalOptimizeEnabled(ConnectorSession session)
    {
        return session.getProperty(NON_TRANSACTIONAL_OPTIMIZE_ENABLED, Boolean.class);
    }

    public static Optional getDeltaLakeCatalogName(ConnectorSession session)
    {
        return Optional.ofNullable(session.getProperty(DELTA_LAKE_CATALOG_NAME, String.class));
    }

    public static Optional getHudiCatalogName(ConnectorSession session)
    {
        return Optional.ofNullable(session.getProperty(HUDI_CATALOG_NAME, String.class));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy