All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.hive.metastore.MetastoreUtil Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.metastore;

import com.facebook.airlift.log.Logger;
import com.facebook.presto.common.Page;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.predicate.Domain;
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.BigintType;
import com.facebook.presto.common.type.BooleanType;
import com.facebook.presto.common.type.CharType;
import com.facebook.presto.common.type.DateType;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.Decimals;
import com.facebook.presto.common.type.DistinctType;
import com.facebook.presto.common.type.DoubleType;
import com.facebook.presto.common.type.EnumType;
import com.facebook.presto.common.type.IntegerType;
import com.facebook.presto.common.type.MapType;
import com.facebook.presto.common.type.RealType;
import com.facebook.presto.common.type.RowType;
import com.facebook.presto.common.type.SmallintType;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.TinyintType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeWithName;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.ColumnConverter;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.hive.HiveBasicStatistics;
import com.facebook.presto.hive.HiveType;
import com.facebook.presto.hive.PartitionNameWithVersion;
import com.facebook.presto.hive.PartitionOfflineException;
import com.facebook.presto.hive.TableOfflineException;
import com.facebook.presto.hive.TypeTranslator;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.ConnectorViewDefinition;
import com.facebook.presto.spi.ErrorCodeSupplier;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.StandardErrorCode;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.ViewNotFoundException;
import com.facebook.presto.spi.security.PrestoPrincipal;
import com.facebook.presto.spi.statistics.ColumnStatisticType;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.primitives.Longs;
import io.airlift.slice.Slice;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.ProtectMode;
import org.apache.hadoop.io.Text;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;

import javax.annotation.Nullable;

import java.io.IOException;
import java.math.BigInteger;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.Chars.isCharType;
import static com.facebook.presto.common.type.Chars.padSpaces;
import static com.facebook.presto.common.type.DateType.DATE;
import static com.facebook.presto.common.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.common.type.TypeUtils.isNumericType;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.common.type.Varchars.isVarcharType;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static com.facebook.presto.hive.HiveType.toHiveType;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.DELETE;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.INSERT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.SELECT;
import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.UPDATE;
import static com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE;
import static com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW;
import static com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.facebook.presto.spi.security.PrincipalType.USER;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.base.Strings.padEnd;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.io.BaseEncoding.base16;
import static java.lang.Float.intBitsToFloat;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Locale.ENGLISH;
import static java.util.stream.Collectors.toList;
import static org.apache.hadoop.hive.common.FileUtils.unescapePathName;
import static org.apache.hadoop.hive.metastore.ColumnType.typeToThriftType;
import static org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB;
import static org.joda.time.DateTimeZone.UTC;

public class MetastoreUtil
{
    private static final Logger log = Logger.get(MetastoreUtil.class);

    public static final String METASTORE_HEADERS = "metastore_headers";
    public static final String PRESTO_OFFLINE = "presto_offline";
    public static final String AVRO_SCHEMA_URL_KEY = "avro.schema.url";
    public static final String PRESTO_VIEW_FLAG = "presto_view";
    public static final String PRESTO_MATERIALIZED_VIEW_FLAG = "presto_materialized_view";
    public static final String PRESTO_QUERY_ID_NAME = "presto_query_id";
    public static final String HIVE_DEFAULT_DYNAMIC_PARTITION = "__HIVE_DEFAULT_PARTITION__";
    public static final String USER_DEFINED_TYPE_ENCODING_ENABLED = "user_defined_type_encoding";
    public static final String DEFAULT_METASTORE_USER = "presto";

    @SuppressWarnings("OctalInteger")
    public static final FsPermission ALL_PERMISSIONS = new FsPermission((short) 0777);

    private static final String PARTITION_VALUE_WILDCARD = "";
    private static final String NUM_FILES = "numFiles";
    private static final String NUM_ROWS = "numRows";
    private static final String RAW_DATA_SIZE = "rawDataSize";
    private static final String TOTAL_SIZE = "totalSize";
    // transient_lastDdlTime is the parameter recording the latest ddl time.
    // It should be added in STATS_PROPERTIES so that it can be skipped when
    // updating StatisticsParameters, which allows hive find this dismiss
    // parameter and create a new transient_lastDdlTime with present time
    // rather than copying the old transient_lastDdlTime to hive partition.
    private static final String TRANSIENT_LAST_DDL_TIME = "transient_lastDdlTime";
    private static final Set STATS_PROPERTIES = ImmutableSet.of(NUM_FILES, NUM_ROWS, RAW_DATA_SIZE, TOTAL_SIZE, TRANSIENT_LAST_DDL_TIME);
    private static final Set ORDERABLE_STATS = ImmutableSet.of(MIN_VALUE, MAX_VALUE);
    public static final String ICEBERG_TABLE_TYPE_NAME = "table_type";
    public static final String ICEBERG_TABLE_TYPE_VALUE = "iceberg";
    public static final String SPARK_TABLE_PROVIDER_KEY = "spark.sql.sources.provider";
    public static final String DELTA_LAKE_PROVIDER = "delta";
    public static final String TABLE_COMMENT = "comment";
    public static final String PRESTO_VIEW_COMMENT = "Presto View";
    public static final String PRESTO_VERSION_NAME = "presto_version";
    public static final String PRESTO_VIEW_EXPANDED_TEXT_MARKER = "/* Presto View */";

    private MetastoreUtil()
    {
    }

    public static boolean isArrayType(Type type)
    {
        return type.getTypeSignature().getBase().equals(StandardTypes.ARRAY);
    }

    public static boolean isMapType(Type type)
    {
        return type.getTypeSignature().getBase().equals(StandardTypes.MAP);
    }

    public static boolean isRowType(Type type)
    {
        return type.getTypeSignature().getBase().equals(StandardTypes.ROW);
    }

    public static void checkCondition(boolean condition, ErrorCodeSupplier errorCode, String formatString, Object... args)
    {
        if (!condition) {
            throw new PrestoException(errorCode, format(formatString, args));
        }
    }

    public static Properties getHiveSchema(Table table)
    {
        // Mimics function in Hive: MetaStoreUtils.getTableMetadata(Table)
        return getHiveSchema(
                table.getStorage(),
                table.getDataColumns(),
                table.getDataColumns(),
                table.getParameters(),
                table.getDatabaseName(),
                table.getTableName(),
                table.getPartitionColumns().stream().map(column -> column.getName()).collect(toList()),
                table.getPartitionColumns().stream().map(column -> column.getType()).collect(toList()));
    }

    public static Properties getHiveSchema(Partition partition, Table table)
    {
        // Mimics function in Hive: MetaStoreUtils.getSchema(Partition, Table)
        return getHiveSchema(
                partition.getStorage(),
                partition.getColumns(),
                table.getDataColumns(),
                table.getParameters(),
                table.getDatabaseName(),
                table.getTableName(),
                table.getPartitionColumns().stream().map(column -> column.getName()).collect(toList()),
                table.getPartitionColumns().stream().map(column -> column.getType()).collect(toList()));
    }

    public static Properties getHiveSchema(
            Storage storage,
            List partitionDataColumns,
            List tableDataColumns,
            Map tableParameters,
            String databaseName,
            String tableName,
            List partitionKeyNames,
            List partitionKeyTypes)
    {
        // Mimics function in Hive:
        // MetaStoreUtils.getSchema(StorageDescriptor, StorageDescriptor, Map, String, String, List)

        Properties schema = new Properties();

        schema.setProperty(FILE_INPUT_FORMAT, storage.getStorageFormat().getInputFormat());
        schema.setProperty(FILE_OUTPUT_FORMAT, storage.getStorageFormat().getOutputFormat());

        schema.setProperty(META_TABLE_NAME, databaseName + "." + tableName);
        schema.setProperty(META_TABLE_LOCATION, storage.getLocation());

        if (storage.getBucketProperty().isPresent()) {
            List bucketedBy = storage.getBucketProperty().get().getBucketedBy();
            if (!bucketedBy.isEmpty()) {
                schema.setProperty(BUCKET_FIELD_NAME, Joiner.on(",").join(bucketedBy));
            }
            schema.setProperty(BUCKET_COUNT, Integer.toString(storage.getBucketProperty().get().getBucketCount()));
        }
        else {
            schema.setProperty(BUCKET_COUNT, "0");
        }

        for (Entry param : storage.getSerdeParameters().entrySet()) {
            schema.setProperty(param.getKey(), (param.getValue() != null) ? param.getValue() : "");
        }
        schema.setProperty(SERIALIZATION_LIB, storage.getStorageFormat().getSerDe());

        StringBuilder columnNameBuilder = new StringBuilder();
        StringBuilder columnTypeBuilder = new StringBuilder();
        StringBuilder columnCommentBuilder = new StringBuilder();
        boolean first = true;
        for (Column column : tableDataColumns) {
            if (!first) {
                columnNameBuilder.append(",");
                columnTypeBuilder.append(":");
                columnCommentBuilder.append('\0');
            }
            columnNameBuilder.append(column.getName());
            columnTypeBuilder.append(column.getType());
            columnCommentBuilder.append(column.getComment().orElse(""));
            first = false;
        }
        String columnNames = columnNameBuilder.toString();
        String columnTypes = columnTypeBuilder.toString();
        schema.setProperty(META_TABLE_COLUMNS, columnNames);
        schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes);
        schema.setProperty("columns.comments", columnCommentBuilder.toString());

        schema.setProperty(SERIALIZATION_DDL, toThriftDdl(tableName, partitionDataColumns));

        String partString = "";
        String partStringSep = "";
        String partTypesString = "";
        String partTypesStringSep = "";

        for (int index = 0; index < partitionKeyNames.size(); ++index) {
            String name = partitionKeyNames.get(index);
            HiveType type = partitionKeyTypes.get(index);

            partString += partStringSep;
            partString += name;
            partTypesString += partTypesStringSep;
            partTypesString += type.getHiveTypeName().toString();
            if (partStringSep.length() == 0) {
                partStringSep = "/";
                partTypesStringSep = ":";
            }
        }

        if (partString.length() > 0) {
            schema.setProperty(META_TABLE_PARTITION_COLUMNS, partString);
            schema.setProperty(META_TABLE_PARTITION_COLUMN_TYPES, partTypesString);
        }

        if (tableParameters != null) {
            for (Entry entry : tableParameters.entrySet()) {
                // add non-null parameters to the schema
                if (entry.getValue() != null) {
                    schema.setProperty(entry.getKey(), entry.getValue());
                }
            }
        }

        return schema;
    }

    public static Properties getHiveSchema(Map serdeParameters, Map tableParameters)
    {
        Properties schema = new Properties();
        for (Entry param : serdeParameters.entrySet()) {
            schema.setProperty(param.getKey(), (param.getValue() != null) ? param.getValue() : "");
        }
        for (Entry entry : tableParameters.entrySet()) {
            // add non-null parameters to the schema
            if (entry.getValue() != null) {
                schema.setProperty(entry.getKey(), entry.getValue());
            }
        }
        return schema;
    }

    /**
     * Recreates partition schema based on the table schema and the column
     * coercions map.
     * 

* partitionColumnCount is needed to handle cases when the partition * has less columns than the table. *

* If the partition has more columns than the table does, the partitionSchemaDifference * map is expected to contain information for the missing columns. */ public static List reconstructPartitionSchema(List tableSchema, int partitionColumnCount, Map partitionSchemaDifference, Optional> tableToPartitionColumns) { ImmutableList.Builder columns = ImmutableList.builder(); if (tableToPartitionColumns.isPresent()) { Map partitionToTableColumns = tableToPartitionColumns.get() .entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); for (int i = 0; i < partitionColumnCount; i++) { Column column = partitionSchemaDifference.get(i); if (column == null) { column = tableSchema.get(partitionToTableColumns.get(i)); } columns.add(column); } } else { for (int i = 0; i < partitionColumnCount; i++) { Column column = partitionSchemaDifference.get(i); if (column == null) { checkArgument( i < tableSchema.size(), "column descriptor for column with hiveColumnIndex %s not found: tableSchema: %s, partitionSchemaDifference: %s", i, tableSchema, partitionSchemaDifference); column = tableSchema.get(i); } columns.add(column); } } return columns.build(); } public static ProtectMode getProtectMode(Partition partition) { return getProtectMode(partition.getParameters()); } public static ProtectMode getProtectMode(Table table) { return getProtectMode(table.getParameters()); } public static String makePartName(List partitionColumns, List values) { checkArgument(partitionColumns.size() == values.size(), "Partition value count does not match the partition column count"); checkArgument(values.stream().allMatch(Objects::nonNull), "partitionValue must not have null elements"); List partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toList()); return FileUtils.makePartName(partitionColumnNames, values); } public static String getPartitionLocation(Table table, Optional partition) { if (!partition.isPresent()) { return table.getStorage().getLocation(); } return partition.get().getStorage().getLocation(); } private static String toThriftDdl(String structName, List columns) { // Mimics function in Hive: // MetaStoreUtils.getDDLFromFieldSchema(String, List) StringBuilder ddl = new StringBuilder(); ddl.append("struct "); ddl.append(structName); ddl.append(" { "); boolean first = true; for (Column column : columns) { if (first) { first = false; } else { ddl.append(", "); } ddl.append(typeToThriftType(column.getType().getHiveTypeName().toString())); ddl.append(' '); ddl.append(column.getName()); } ddl.append("}"); return ddl.toString(); } private static ProtectMode getProtectMode(Map parameters) { if (!parameters.containsKey(ProtectMode.PARAMETER_NAME)) { return new ProtectMode(); } else { return getProtectModeFromString(parameters.get(ProtectMode.PARAMETER_NAME)); } } public static void verifyOnline(SchemaTableName tableName, Optional partitionName, ProtectMode protectMode, Map parameters) { if (protectMode.offline) { if (partitionName.isPresent()) { throw new PartitionOfflineException(tableName, partitionName.get(), false, null); } throw new TableOfflineException(tableName, false, null); } String prestoOffline = parameters.get(PRESTO_OFFLINE); if (!isNullOrEmpty(prestoOffline)) { if (partitionName.isPresent()) { throw new PartitionOfflineException(tableName, partitionName.get(), true, prestoOffline); } throw new TableOfflineException(tableName, true, prestoOffline); } } public static void verifyCanDropColumn(ExtendedHiveMetastore metastore, MetastoreContext metastoreContext, String databaseName, String tableName, String columnName) { Table table = metastore.getTable(metastoreContext, databaseName, tableName) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); if (table.getPartitionColumns().stream().anyMatch(column -> column.getName().equals(columnName))) { throw new PrestoException(NOT_SUPPORTED, "Cannot drop partition columns"); } if (table.getDataColumns().size() <= 1) { throw new PrestoException(NOT_SUPPORTED, "Cannot drop the only non-partition column in a table"); } } public static FileSystem getFileSystem(HdfsEnvironment hdfsEnvironment, HdfsContext context, Path path) { try { return hdfsEnvironment.getFileSystem(context, path); } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Error getting file system. Path: %s", path), e); } } public static void renameFile(FileSystem fileSystem, Path source, Path target) { try { if (fileSystem.exists(target) || !fileSystem.rename(source, target)) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, getRenameErrorMessage(source, target)); } } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, getRenameErrorMessage(source, target), e); } } public static Map toPartitionNamesAndValues(String partitionName) { ImmutableMap.Builder resultBuilder = ImmutableMap.builder(); int index = 0; int length = partitionName.length(); while (index < length) { int keyStart = index; while (index < length && partitionName.charAt(index) != '=') { index++; } checkState(index < length, "Invalid partition spec: " + partitionName); int keyEnd = index++; int valueStart = index; while (index < length && partitionName.charAt(index) != '/') { index++; } int valueEnd = index++; String key = unescapePathName(partitionName.substring(keyStart, keyEnd)); String value = unescapePathName(partitionName.substring(valueStart, valueEnd)); resultBuilder.put(key, value); } return resultBuilder.build(); } public static List toPartitionValues(String partitionName) { // mimics Warehouse.makeValsFromName ImmutableList.Builder resultBuilder = ImmutableList.builder(); int start = 0; while (true) { while (start < partitionName.length() && partitionName.charAt(start) != '=') { start++; } start++; int end = start; while (end < partitionName.length() && partitionName.charAt(end) != '/') { end++; } if (start > partitionName.length()) { break; } resultBuilder.add(unescapePathName(partitionName.substring(start, end))); start = end + 1; } return resultBuilder.build(); } public static List extractPartitionValues(String partitionName) { return extractPartitionValues(partitionName, Optional.empty()); } public static List extractPartitionValues(String partitionName, Optional> partitionColumnNames) { ImmutableList.Builder values = ImmutableList.builder(); ImmutableList.Builder keys = ImmutableList.builder(); boolean inKey = true; int valueStart = -1; int keyStart = 0; int keyEnd = -1; for (int i = 0; i < partitionName.length(); i++) { char current = partitionName.charAt(i); if (inKey) { checkArgument(current != '/', "Invalid partition spec: %s", partitionName); if (current == '=') { inKey = false; valueStart = i + 1; keyEnd = i; } } else if (current == '/') { checkArgument(valueStart != -1, "Invalid partition spec: %s", partitionName); values.add(unescapePathName(partitionName.substring(valueStart, i))); keys.add(unescapePathName(partitionName.substring(keyStart, keyEnd))); inKey = true; valueStart = -1; keyStart = i + 1; } } checkArgument(!inKey, "Invalid partition spec: %s", partitionName); values.add(unescapePathName(partitionName.substring(valueStart, partitionName.length()))); keys.add(unescapePathName(partitionName.substring(keyStart, keyEnd))); if (!partitionColumnNames.isPresent() || partitionColumnNames.get().size() == 1) { return values.build(); } ImmutableList.Builder orderedValues = ImmutableList.builder(); partitionColumnNames.get() .forEach(columnName -> orderedValues.add(values.build().get(keys.build().indexOf(columnName)))); return orderedValues.build(); } public static List createPartitionValues(List partitionColumnTypes, Page partitionColumns, int position) { ImmutableList.Builder partitionValues = ImmutableList.builder(); for (int field = 0; field < partitionColumns.getChannelCount(); field++) { Object value = getField(partitionColumnTypes.get(field), partitionColumns.getBlock(field), position); if (value == null) { partitionValues.add(HIVE_DEFAULT_DYNAMIC_PARTITION); } else { String valueString = value.toString(); if (!CharMatcher.inRange((char) 0x20, (char) 0x7E).matchesAllOf(valueString)) { throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, "Hive partition keys can only contain printable ASCII characters (0x20 - 0x7E). Invalid value: " + base16().withSeparator(" ", 2).encode(valueString.getBytes(UTF_8))); } partitionValues.add(valueString); } } return partitionValues.build(); } public static Object getField(Type type, Block block, int position) { if (block.isNull(position)) { return null; } if (BooleanType.BOOLEAN.equals(type)) { return type.getBoolean(block, position); } if (BigintType.BIGINT.equals(type)) { return type.getLong(block, position); } if (IntegerType.INTEGER.equals(type)) { return (int) type.getLong(block, position); } if (SmallintType.SMALLINT.equals(type)) { return (short) type.getLong(block, position); } if (TinyintType.TINYINT.equals(type)) { return (byte) type.getLong(block, position); } if (RealType.REAL.equals(type)) { return intBitsToFloat((int) type.getLong(block, position)); } if (DoubleType.DOUBLE.equals(type)) { return type.getDouble(block, position); } if (type instanceof VarcharType) { return new Text(type.getSlice(block, position).getBytes()); } if (type instanceof CharType) { CharType charType = (CharType) type; return new Text(padEnd(type.getSlice(block, position).toStringUtf8(), charType.getLength(), ' ')); } if (VarbinaryType.VARBINARY.equals(type)) { return type.getSlice(block, position).getBytes(); } if (DateType.DATE.equals(type)) { long days = type.getLong(block, position); return new Date(UTC.getMillisKeepLocal(DateTimeZone.getDefault(), TimeUnit.DAYS.toMillis(days))); } if (TimestampType.TIMESTAMP.equals(type)) { long millisUtc = type.getLong(block, position); return new Timestamp(millisUtc); } if (type instanceof DecimalType) { DecimalType decimalType = (DecimalType) type; return getHiveDecimal(decimalType, block, position); } if (isArrayType(type)) { Type elementType = type.getTypeParameters().get(0); Block arrayBlock = block.getBlock(position); List list = new ArrayList<>(arrayBlock.getPositionCount()); for (int i = 0; i < arrayBlock.getPositionCount(); i++) { Object element = getField(elementType, arrayBlock, i); list.add(element); } return Collections.unmodifiableList(list); } if (isMapType(type)) { Type keyType = type.getTypeParameters().get(0); Type valueType = type.getTypeParameters().get(1); Block mapBlock = block.getBlock(position); Map map = new HashMap<>(); for (int i = 0; i < mapBlock.getPositionCount(); i += 2) { Object key = getField(keyType, mapBlock, i); Object value = getField(valueType, mapBlock, i + 1); map.put(key, value); } return Collections.unmodifiableMap(map); } if (isRowType(type)) { Block rowBlock = block.getBlock(position); List fieldTypes = type.getTypeParameters(); checkCondition(fieldTypes.size() == rowBlock.getPositionCount(), StandardErrorCode.GENERIC_INTERNAL_ERROR, "Expected row value field count does not match type field count"); List row = new ArrayList<>(rowBlock.getPositionCount()); for (int i = 0; i < rowBlock.getPositionCount(); i++) { Object element = getField(fieldTypes.get(i), rowBlock, i); row.add(element); } return Collections.unmodifiableList(row); } throw new PrestoException(NOT_SUPPORTED, "unsupported type: " + type); } public static HiveDecimal getHiveDecimal(DecimalType decimalType, Block block, int position) { BigInteger unscaledValue; if (decimalType.isShort()) { unscaledValue = BigInteger.valueOf(decimalType.getLong(block, position)); } else { unscaledValue = Decimals.decodeUnscaledValue(decimalType.getSlice(block, position)); } return HiveDecimal.create(unscaledValue, decimalType.getScale()); } public static void createDirectory(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path path) { try { if (!hdfsEnvironment.getFileSystem(context, path).mkdirs(path, ALL_PERMISSIONS)) { throw new IOException("mkdirs returned false"); } } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed to create directory: " + path, e); } // explicitly set permission since the default umask overrides it on creation try { hdfsEnvironment.getFileSystem(context, path).setPermission(path, ALL_PERMISSIONS); } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed to set permission on directory: " + path, e); } } public static boolean pathExists(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path path) { try { return hdfsEnvironment.getFileSystem(context, path).exists(path); } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed checking path: " + path, e); } } public static boolean isPrestoView(Table table) { return "true".equals(table.getParameters().get(PRESTO_VIEW_FLAG)); } public static boolean isPrestoMaterializedView(Table table) { if ("true".equals(table.getParameters().get(PRESTO_MATERIALIZED_VIEW_FLAG))) { checkState( !table.getViewOriginalText().map(Strings::isNullOrEmpty).orElse(true), "viewOriginalText field is not set for the Table metadata of materialized view %s.", table.getTableName()); return true; } return false; } private static String getRenameErrorMessage(Path source, Path target) { return format("Error moving data files from %s to final location %s", source, target); } public static List convertPredicateToParts(Map partitionPredicates) { List filter = new ArrayList<>(); for (Entry partitionPredicate : partitionPredicates.entrySet()) { Domain domain = partitionPredicate.getValue(); if (!domain.isAll()) { if (domain.isNullableSingleValue()) { Object value = domain.getNullableSingleValue(); Type type = domain.getType(); filter.add(convertRawValueToString(value, type)); } else { filter.add(PARTITION_VALUE_WILDCARD); } } else { filter.add(PARTITION_VALUE_WILDCARD); } } return filter; } public static String convertRawValueToString(Object value, Type type) { String val; if (value == null) { val = HIVE_DEFAULT_DYNAMIC_PARTITION; } else if (type instanceof CharType) { Slice slice = (Slice) value; val = padSpaces(slice, type).toStringUtf8(); } else if (type instanceof VarcharType) { Slice slice = (Slice) value; val = slice.toStringUtf8(); } else if (type instanceof DecimalType && !((DecimalType) type).isShort()) { Slice slice = (Slice) value; val = Decimals.toString(slice, ((DecimalType) type).getScale()); } else if (type instanceof DecimalType && ((DecimalType) type).isShort()) { val = Decimals.toString((long) value, ((DecimalType) type).getScale()); } else if (type instanceof DateType) { DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.date().withZoneUTC(); val = dateTimeFormatter.print(TimeUnit.DAYS.toMillis((long) value)); } else if (type instanceof TimestampType) { // we don't have time zone info, so just add a wildcard val = PARTITION_VALUE_WILDCARD; } else if (type instanceof TinyintType || type instanceof SmallintType || type instanceof IntegerType || type instanceof BigintType || type instanceof DoubleType || type instanceof RealType || type instanceof BooleanType) { val = value.toString(); } else { throw new PrestoException(NOT_SUPPORTED, format("Unsupported partition key type: %s", type.getDisplayName())); } return val; } /** * Hive calculates NDV considering null as a distinct value */ public static OptionalLong fromMetastoreDistinctValuesCount(OptionalLong distinctValuesCount, OptionalLong nullsCount, OptionalLong rowCount) { if (distinctValuesCount.isPresent() && nullsCount.isPresent() && rowCount.isPresent()) { return OptionalLong.of(fromMetastoreDistinctValuesCount(distinctValuesCount.getAsLong(), nullsCount.getAsLong(), rowCount.getAsLong())); } return OptionalLong.empty(); } public static long fromMetastoreDistinctValuesCount(long distinctValuesCount, long nullsCount, long rowCount) { long nonNullsCount = rowCount - nullsCount; if (nullsCount > 0 && distinctValuesCount > 0) { distinctValuesCount--; } // normalize distinctValuesCount in case there is a non null element if (nonNullsCount > 0 && distinctValuesCount == 0) { distinctValuesCount = 1; } // the metastore may store an estimate, so the value stored may be higher than the total number of rows if (distinctValuesCount > nonNullsCount) { return nonNullsCount; } return distinctValuesCount; } public static Set getSupportedColumnStatistics(Type type) { if (type.equals(BOOLEAN)) { return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_TRUE_VALUES); } if (isNumericType(type) || type.equals(DATE) || type.equals(TIMESTAMP)) { // TODO #7122 support non-legacy TIMESTAMP return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES); } if (isVarcharType(type) || isCharType(type)) { // TODO Collect MIN,MAX once it is used by the optimizer return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_DISTINCT_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); } if (type.equals(VARBINARY)) { return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); } if (type instanceof ArrayType || type instanceof RowType || type instanceof MapType) { return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES); } if (type instanceof TypeWithName) { return getSupportedColumnStatistics(((TypeWithName) type).getType()); } if (type instanceof DistinctType) { Set stats = getSupportedColumnStatistics(((DistinctType) type).getBaseType()); if (!type.isOrderable()) { stats = stats.stream().filter(stat -> !ORDERABLE_STATS.contains(stat)).collect(toImmutableSet()); } return stats; } if (type instanceof EnumType) { return getSupportedColumnStatistics(((EnumType) type).getValueType()); } // Throwing here to make sure this method is updated when a new type is added in Hive connector throw new IllegalArgumentException("Unsupported type: " + type); } public static HiveBasicStatistics getHiveBasicStatistics(Map parameters) { OptionalLong numFiles = parse(parameters.get(NUM_FILES)); OptionalLong numRows = parse(parameters.get(NUM_ROWS)); OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE)); OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE)); return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes); } private static OptionalLong parse(@Nullable String parameterValue) { if (parameterValue == null) { return OptionalLong.empty(); } Long longValue = Longs.tryParse(parameterValue); if (longValue == null || longValue < 0) { return OptionalLong.empty(); } return OptionalLong.of(longValue); } public static Map updateStatisticsParameters(Map parameters, HiveBasicStatistics statistics) { ImmutableMap.Builder result = ImmutableMap.builder(); parameters.forEach((key, value) -> { if (!STATS_PROPERTIES.contains(key)) { result.put(key, value); } }); statistics.getFileCount().ifPresent(count -> result.put(NUM_FILES, Long.toString(count))); statistics.getRowCount().ifPresent(count -> result.put(NUM_ROWS, Long.toString(count))); statistics.getInMemoryDataSizeInBytes().ifPresent(size -> result.put(RAW_DATA_SIZE, Long.toString(size))); statistics.getOnDiskDataSizeInBytes().ifPresent(size -> result.put(TOTAL_SIZE, Long.toString(size))); return result.build(); } public static Optional getMetastoreHeaders(ConnectorSession session) { try { return Optional.ofNullable(session.getProperty(METASTORE_HEADERS, String.class)); } catch (Exception e) { return Optional.empty(); } } public static boolean isUserDefinedTypeEncodingEnabled(ConnectorSession session) { try { return session.getProperty(USER_DEFINED_TYPE_ENCODING_ENABLED, Boolean.class); } catch (Exception e) { return false; } } public static boolean isManagedTable(String tableType) { return tableType.equals(MANAGED_TABLE.name()); } public static void deleteDirectoryRecursively(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path path, boolean recursive) { try { hdfsEnvironment.getFileSystem(context, path).delete(path, recursive); } catch (IOException | RuntimeException e) { // don't fail if unable to delete path log.warn(e, "Failed to delete path: " + path.toString()); } } public static boolean isDeltaLakeTable(Table table) { return isDeltaLakeTable(table.getParameters()); } public static boolean isDeltaLakeTable(Map tableParameters) { return tableParameters.containsKey(SPARK_TABLE_PROVIDER_KEY) && tableParameters.get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER); } public static boolean isIcebergTable(Table table) { return isIcebergTable(table.getParameters()); } public static boolean isIcebergTable(Map tableParameters) { return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(tableParameters.get(ICEBERG_TABLE_TYPE_NAME)); } public static PrincipalPrivileges buildInitialPrivilegeSet(String tableOwner) { PrestoPrincipal owner = new PrestoPrincipal(USER, tableOwner); return new PrincipalPrivileges( ImmutableMultimap.builder() .put(tableOwner, new HivePrivilegeInfo(SELECT, true, owner, owner)) .put(tableOwner, new HivePrivilegeInfo(INSERT, true, owner, owner)) .put(tableOwner, new HivePrivilegeInfo(UPDATE, true, owner, owner)) .put(tableOwner, new HivePrivilegeInfo(DELETE, true, owner, owner)) .build(), ImmutableMultimap.of()); } public static Map createViewProperties(ConnectorSession session, String prestoVersion) { return ImmutableMap.builder() .put(TABLE_COMMENT, PRESTO_VIEW_COMMENT) .put(PRESTO_VIEW_FLAG, "true") .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, session.getQueryId()) .build(); } public static Table createTableObjectForViewCreation(ConnectorSession session, ConnectorTableMetadata viewMetadata, Map properties, TypeTranslator typeTranslator, MetastoreContext metastoreContext, String encodedViewData) { List columns = new ArrayList<>(); ColumnConverter columnConverter = metastoreContext.getColumnConverter(); for (ColumnMetadata column : viewMetadata.getColumns()) { try { HiveType hiveType = toHiveType(typeTranslator, column.getType()); columns.add(new Column(column.getName(), hiveType, Optional.ofNullable(column.getComment()), columnConverter.getTypeMetadata(hiveType, column.getType().getTypeSignature()))); } catch (PrestoException e) { // if a view uses any unsupported hive types, include only a dummy column value if (e.getErrorCode().equals(NOT_SUPPORTED.toErrorCode())) { columns = ImmutableList.of( new Column( "dummy", HIVE_STRING, Optional.of(format("Using dummy because column %s uses unsupported Hive type %s ", column.getName(), column.getType())), Optional.empty())); break; } else { throw e; } } } SchemaTableName viewName = viewMetadata.getTable(); Table.Builder tableBuilder = Table.builder() .setDatabaseName(viewName.getSchemaName()) .setTableName(viewName.getTableName()) .setOwner(session.getUser()) .setTableType(VIRTUAL_VIEW) .setDataColumns(columns) .setPartitionColumns(ImmutableList.of()) .setParameters(properties) .setViewOriginalText(Optional.of(encodedViewData)) .setViewExpandedText(Optional.of(PRESTO_VIEW_EXPANDED_TEXT_MARKER)); tableBuilder.getStorageBuilder() .setStorageFormat(VIEW_STORAGE_FORMAT) .setLocation(""); return tableBuilder.build(); } public static void verifyAndPopulateViews(Table table, SchemaTableName schemaTableName, String decodedData, ImmutableMap.Builder views) { views.put(schemaTableName, new ConnectorViewDefinition( schemaTableName, Optional.ofNullable(table.getOwner()), decodedData)); } public static void checkIfNullView(ConnectorViewDefinition view, SchemaTableName viewName) { if (view == null) { throw new ViewNotFoundException(viewName); } } public static List getPartitionNamesWithEmptyVersion(Collection partitions) { return partitions.stream() .map(partition -> new PartitionNameWithVersion(partition, Optional.empty())) .collect(toImmutableList()); } public static List getPartitionNames(Collection partitionNameWithVersions) { return partitionNameWithVersions.stream() .map(PartitionNameWithVersion::getPartitionName) .collect(toImmutableList()); } }