All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.plugin.hive.HiveMetadata Maven / Gradle / Ivy

There is a newer version: 350
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.plugin.hive;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Suppliers;
import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import io.airlift.json.JsonCodec;
import io.airlift.slice.Slice;
import io.prestosql.plugin.base.CatalogName;
import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
import io.prestosql.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation;
import io.prestosql.plugin.hive.LocationService.WriteInfo;
import io.prestosql.plugin.hive.authentication.HiveIdentity;
import io.prestosql.plugin.hive.metastore.Column;
import io.prestosql.plugin.hive.metastore.Database;
import io.prestosql.plugin.hive.metastore.HiveColumnStatistics;
import io.prestosql.plugin.hive.metastore.HivePrincipal;
import io.prestosql.plugin.hive.metastore.Partition;
import io.prestosql.plugin.hive.metastore.PrincipalPrivileges;
import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore;
import io.prestosql.plugin.hive.metastore.SortingColumn;
import io.prestosql.plugin.hive.metastore.StorageFormat;
import io.prestosql.plugin.hive.metastore.Table;
import io.prestosql.plugin.hive.security.AccessControlMetadata;
import io.prestosql.plugin.hive.statistics.HiveStatisticsProvider;
import io.prestosql.plugin.hive.util.HiveUtil;
import io.prestosql.plugin.hive.util.HiveWriteUtils;
import io.prestosql.spi.ErrorType;
import io.prestosql.spi.PrestoException;
import io.prestosql.spi.StandardErrorCode;
import io.prestosql.spi.block.Block;
import io.prestosql.spi.connector.Assignment;
import io.prestosql.spi.connector.CatalogSchemaName;
import io.prestosql.spi.connector.ColumnHandle;
import io.prestosql.spi.connector.ColumnMetadata;
import io.prestosql.spi.connector.ConnectorInsertTableHandle;
import io.prestosql.spi.connector.ConnectorNewTableLayout;
import io.prestosql.spi.connector.ConnectorOutputMetadata;
import io.prestosql.spi.connector.ConnectorOutputTableHandle;
import io.prestosql.spi.connector.ConnectorPartitioningHandle;
import io.prestosql.spi.connector.ConnectorSession;
import io.prestosql.spi.connector.ConnectorTableHandle;
import io.prestosql.spi.connector.ConnectorTableMetadata;
import io.prestosql.spi.connector.ConnectorTablePartitioning;
import io.prestosql.spi.connector.ConnectorTableProperties;
import io.prestosql.spi.connector.ConnectorViewDefinition;
import io.prestosql.spi.connector.Constraint;
import io.prestosql.spi.connector.ConstraintApplicationResult;
import io.prestosql.spi.connector.DiscretePredicates;
import io.prestosql.spi.connector.InMemoryRecordSet;
import io.prestosql.spi.connector.ProjectionApplicationResult;
import io.prestosql.spi.connector.SchemaNotFoundException;
import io.prestosql.spi.connector.SchemaTableName;
import io.prestosql.spi.connector.SchemaTablePrefix;
import io.prestosql.spi.connector.SystemTable;
import io.prestosql.spi.connector.TableNotFoundException;
import io.prestosql.spi.connector.ViewNotFoundException;
import io.prestosql.spi.expression.ConnectorExpression;
import io.prestosql.spi.expression.Variable;
import io.prestosql.spi.predicate.Domain;
import io.prestosql.spi.predicate.NullableValue;
import io.prestosql.spi.predicate.TupleDomain;
import io.prestosql.spi.security.GrantInfo;
import io.prestosql.spi.security.PrestoPrincipal;
import io.prestosql.spi.security.Privilege;
import io.prestosql.spi.security.RoleGrant;
import io.prestosql.spi.statistics.ColumnStatisticMetadata;
import io.prestosql.spi.statistics.ColumnStatisticType;
import io.prestosql.spi.statistics.ComputedStatistics;
import io.prestosql.spi.statistics.TableStatisticType;
import io.prestosql.spi.statistics.TableStatistics;
import io.prestosql.spi.statistics.TableStatisticsMetadata;
import io.prestosql.spi.type.TimestampType;
import io.prestosql.spi.type.Type;
import io.prestosql.spi.type.TypeManager;
import io.prestosql.spi.type.VarcharType;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.OpenCSVSerde;
import org.apache.hadoop.mapred.JobConf;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Streams.stream;
import static io.prestosql.plugin.hive.HiveAnalyzeProperties.getColumnNames;
import static io.prestosql.plugin.hive.HiveAnalyzeProperties.getPartitionList;
import static io.prestosql.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns;
import static io.prestosql.plugin.hive.HiveApplyProjectionUtil.find;
import static io.prestosql.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables;
import static io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics;
import static io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics;
import static io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED;
import static io.prestosql.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME;
import static io.prestosql.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME;
import static io.prestosql.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME;
import static io.prestosql.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME;
import static io.prestosql.plugin.hive.HiveColumnHandle.createBaseColumn;
import static io.prestosql.plugin.hive.HiveColumnHandle.updateRowIdHandle;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR;
import static io.prestosql.plugin.hive.HivePartitionManager.extractPartitionValues;
import static io.prestosql.plugin.hive.HiveSessionProperties.getCompressionCodec;
import static io.prestosql.plugin.hive.HiveSessionProperties.getHiveStorageFormat;
import static io.prestosql.plugin.hive.HiveSessionProperties.getTimestampPrecision;
import static io.prestosql.plugin.hive.HiveSessionProperties.isBucketExecutionEnabled;
import static io.prestosql.plugin.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite;
import static io.prestosql.plugin.hive.HiveSessionProperties.isCreateEmptyBucketFiles;
import static io.prestosql.plugin.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount;
import static io.prestosql.plugin.hive.HiveSessionProperties.isProjectionPushdownEnabled;
import static io.prestosql.plugin.hive.HiveSessionProperties.isRespectTableFormat;
import static io.prestosql.plugin.hive.HiveSessionProperties.isSortedWritingEnabled;
import static io.prestosql.plugin.hive.HiveSessionProperties.isStatisticsEnabled;
import static io.prestosql.plugin.hive.HiveTableProperties.ANALYZE_COLUMNS_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.AVRO_SCHEMA_URL;
import static io.prestosql.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.CSV_ESCAPE;
import static io.prestosql.plugin.hive.HiveTableProperties.CSV_QUOTE;
import static io.prestosql.plugin.hive.HiveTableProperties.CSV_SEPARATOR;
import static io.prestosql.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS;
import static io.prestosql.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP;
import static io.prestosql.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT;
import static io.prestosql.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT;
import static io.prestosql.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY;
import static io.prestosql.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR;
import static io.prestosql.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR_ESCAPE;
import static io.prestosql.plugin.hive.HiveTableProperties.getAnalyzeColumns;
import static io.prestosql.plugin.hive.HiveTableProperties.getAvroSchemaUrl;
import static io.prestosql.plugin.hive.HiveTableProperties.getBucketProperty;
import static io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation;
import static io.prestosql.plugin.hive.HiveTableProperties.getFooterSkipCount;
import static io.prestosql.plugin.hive.HiveTableProperties.getHeaderSkipCount;
import static io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat;
import static io.prestosql.plugin.hive.HiveTableProperties.getNullFormat;
import static io.prestosql.plugin.hive.HiveTableProperties.getOrcBloomFilterColumns;
import static io.prestosql.plugin.hive.HiveTableProperties.getOrcBloomFilterFpp;
import static io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy;
import static io.prestosql.plugin.hive.HiveTableProperties.getSingleCharacterProperty;
import static io.prestosql.plugin.hive.HiveTableProperties.isTransactional;
import static io.prestosql.plugin.hive.HiveType.HIVE_STRING;
import static io.prestosql.plugin.hive.HiveType.toHiveType;
import static io.prestosql.plugin.hive.HiveWriterFactory.computeBucketedFileName;
import static io.prestosql.plugin.hive.PartitionUpdate.UpdateMode.APPEND;
import static io.prestosql.plugin.hive.PartitionUpdate.UpdateMode.NEW;
import static io.prestosql.plugin.hive.PartitionUpdate.UpdateMode.OVERWRITE;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getHiveSchema;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.getProtectMode;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.verifyOnline;
import static io.prestosql.plugin.hive.metastore.PrincipalPrivileges.fromHivePrivilegeInfos;
import static io.prestosql.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT;
import static io.prestosql.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat;
import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression;
import static io.prestosql.plugin.hive.util.ConfigurationUtils.toJobConf;
import static io.prestosql.plugin.hive.util.HiveBucketing.bucketedOnTimestamp;
import static io.prestosql.plugin.hive.util.HiveBucketing.getHiveBucketHandle;
import static io.prestosql.plugin.hive.util.HiveUtil.PRESTO_VIEW_FLAG;
import static io.prestosql.plugin.hive.util.HiveUtil.buildHiveViewConnectorDefinition;
import static io.prestosql.plugin.hive.util.HiveUtil.columnExtraInfo;
import static io.prestosql.plugin.hive.util.HiveUtil.decodeViewData;
import static io.prestosql.plugin.hive.util.HiveUtil.encodeViewData;
import static io.prestosql.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles;
import static io.prestosql.plugin.hive.util.HiveUtil.hiveColumnHandles;
import static io.prestosql.plugin.hive.util.HiveUtil.isPrestoView;
import static io.prestosql.plugin.hive.util.HiveUtil.toPartitionValues;
import static io.prestosql.plugin.hive.util.HiveUtil.verifyPartitionTypeSupported;
import static io.prestosql.plugin.hive.util.HiveWriteUtils.checkTableIsWritable;
import static io.prestosql.plugin.hive.util.HiveWriteUtils.initializeSerializer;
import static io.prestosql.plugin.hive.util.HiveWriteUtils.isS3FileSystem;
import static io.prestosql.plugin.hive.util.HiveWriteUtils.isWritableType;
import static io.prestosql.plugin.hive.util.Statistics.ReduceOperator.ADD;
import static io.prestosql.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap;
import static io.prestosql.plugin.hive.util.Statistics.createEmptyPartitionStatistics;
import static io.prestosql.plugin.hive.util.Statistics.fromComputedStatistics;
import static io.prestosql.plugin.hive.util.Statistics.reduce;
import static io.prestosql.plugin.hive.util.SystemTables.createSystemTable;
import static io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY;
import static io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY;
import static io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY;
import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED;
import static io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY;
import static io.prestosql.spi.predicate.TupleDomain.withColumnDomains;
import static io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT;
import static io.prestosql.spi.type.BigintType.BIGINT;
import static io.prestosql.spi.type.TimestampType.TIMESTAMP_MILLIS;
import static io.prestosql.spi.type.TypeUtils.isFloatingPointNaN;
import static io.prestosql.spi.type.VarcharType.createUnboundedVarcharType;
import static java.lang.Boolean.parseBoolean;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;

public class HiveMetadata
        implements TransactionalMetadata
{
    public static final String PRESTO_VERSION_NAME = "presto_version";
    public static final String PRESTO_QUERY_ID_NAME = "presto_query_id";
    public static final String BUCKETING_VERSION = "bucketing_version";
    public static final String TABLE_COMMENT = "comment";
    public static final String STORAGE_TABLE = "storage_table";
    private static final String TRANSACTIONAL = "transactional";

    private static final String ORC_BLOOM_FILTER_COLUMNS_KEY = "orc.bloom.filter.columns";
    private static final String ORC_BLOOM_FILTER_FPP_KEY = "orc.bloom.filter.fpp";

    public static final String SKIP_HEADER_COUNT_KEY = serdeConstants.HEADER_COUNT;
    public static final String SKIP_FOOTER_COUNT_KEY = serdeConstants.FOOTER_COUNT;

    private static final String TEXT_FIELD_SEPARATOR_KEY = serdeConstants.FIELD_DELIM;
    private static final String TEXT_FIELD_SEPARATOR_ESCAPE_KEY = serdeConstants.ESCAPE_CHAR;
    private static final String NULL_FORMAT_KEY = serdeConstants.SERIALIZATION_NULL_FORMAT;

    public static final String AVRO_SCHEMA_URL_KEY = "avro.schema.url";
    public static final String SPARK_TABLE_PROVIDER_KEY = "spark.sql.sources.provider";
    public static final String DELTA_LAKE_PROVIDER = "delta";

    private static final String CSV_SEPARATOR_KEY = OpenCSVSerde.SEPARATORCHAR;
    private static final String CSV_QUOTE_KEY = OpenCSVSerde.QUOTECHAR;
    private static final String CSV_ESCAPE_KEY = OpenCSVSerde.ESCAPECHAR;

    private final CatalogName catalogName;
    private final SemiTransactionalHiveMetastore metastore;
    private final HdfsEnvironment hdfsEnvironment;
    private final HivePartitionManager partitionManager;
    private final TypeManager typeManager;
    private final LocationService locationService;
    private final JsonCodec partitionUpdateCodec;
    private final boolean writesToNonManagedTablesEnabled;
    private final boolean createsOfNonManagedTablesEnabled;
    private final boolean translateHiveViews;
    private final String prestoVersion;
    private final HiveStatisticsProvider hiveStatisticsProvider;
    private final AccessControlMetadata accessControlMetadata;

    public HiveMetadata(
            CatalogName catalogName,
            SemiTransactionalHiveMetastore metastore,
            HdfsEnvironment hdfsEnvironment,
            HivePartitionManager partitionManager,
            boolean writesToNonManagedTablesEnabled,
            boolean createsOfNonManagedTablesEnabled,
            boolean translateHiveViews,
            TypeManager typeManager,
            LocationService locationService,
            JsonCodec partitionUpdateCodec,
            String prestoVersion,
            HiveStatisticsProvider hiveStatisticsProvider,
            AccessControlMetadata accessControlMetadata)
    {
        this.catalogName = requireNonNull(catalogName, "catalogName is null");
        this.metastore = requireNonNull(metastore, "metastore is null");
        this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
        this.partitionManager = requireNonNull(partitionManager, "partitionManager is null");
        this.typeManager = requireNonNull(typeManager, "typeManager is null");
        this.locationService = requireNonNull(locationService, "locationService is null");
        this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");
        this.writesToNonManagedTablesEnabled = writesToNonManagedTablesEnabled;
        this.createsOfNonManagedTablesEnabled = createsOfNonManagedTablesEnabled;
        this.translateHiveViews = translateHiveViews;
        this.prestoVersion = requireNonNull(prestoVersion, "prestoVersion is null");
        this.hiveStatisticsProvider = requireNonNull(hiveStatisticsProvider, "hiveStatisticsProvider is null");
        this.accessControlMetadata = requireNonNull(accessControlMetadata, "accessControlMetadata is null");
    }

    public SemiTransactionalHiveMetastore getMetastore()
    {
        return metastore;
    }

    @Override
    public List listSchemaNames(ConnectorSession session)
    {
        return metastore.getAllDatabases().stream()
                .filter(HiveMetadata::filterSchema)
                .collect(toImmutableList());
    }

    @Override
    public HiveTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName)
    {
        requireNonNull(tableName, "tableName is null");
        if (!filterSchema(tableName.getSchemaName())) {
            return null;
        }
        Optional table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName());
        if (table.isEmpty()) {
            return null;
        }

        if (isDeltaLakeTable(table.get())) {
            throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Cannot query Delta Lake table");
        }

        // we must not allow system tables due to how permissions are checked in SystemTableAwareAccessControl
        if (getSourceTableNameFromSystemTable(tableName).isPresent()) {
            throw new PrestoException(HIVE_INVALID_METADATA, "Unexpected table present in Hive metastore: " + tableName);
        }

        verifyOnline(tableName, Optional.empty(), getProtectMode(table.get()), table.get().getParameters());

        return new HiveTableHandle(
                tableName.getSchemaName(),
                tableName.getTableName(),
                table.get().getParameters(),
                getPartitionKeyColumnHandles(table.get(), typeManager),
                getHiveBucketHandle(table.get(), typeManager));
    }

    @Override
    public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map analyzeProperties)
    {
        HiveTableHandle handle = getTableHandle(session, tableName);
        if (handle == null) {
            return null;
        }
        Optional>> partitionValuesList = getPartitionList(analyzeProperties);
        Optional> analyzeColumnNames = getColumnNames(analyzeProperties);
        ConnectorTableMetadata tableMetadata = getTableMetadata(session, handle.getSchemaTableName());

        List partitionedBy = getPartitionedBy(tableMetadata.getProperties());

        if (partitionValuesList.isPresent()) {
            List> list = partitionValuesList.get();

            if (partitionedBy.isEmpty()) {
                throw new PrestoException(INVALID_ANALYZE_PROPERTY, "Partition list provided but table is not partitioned");
            }
            for (List values : list) {
                if (values.size() != partitionedBy.size()) {
                    throw new PrestoException(INVALID_ANALYZE_PROPERTY, "Partition value count does not match partition column count");
                }
            }

            handle = handle.withAnalyzePartitionValues(list);
            HivePartitionResult partitions = partitionManager.getPartitions(handle, list);
            handle = partitionManager.applyPartitionResult(handle, partitions, Optional.empty());
        }

        if (analyzeColumnNames.isPresent()) {
            Set columnNames = analyzeColumnNames.get();
            Set allColumnNames = tableMetadata.getColumns().stream()
                    .map(ColumnMetadata::getName)
                    .collect(toImmutableSet());
            if (!allColumnNames.containsAll(columnNames)) {
                throw new PrestoException(
                        INVALID_ANALYZE_PROPERTY,
                        format("Invalid columns specified for analysis: %s", Sets.difference(columnNames, allColumnNames)));
            }

            handle = handle.withAnalyzeColumnNames(columnNames);
        }

        return handle;
    }

    @Override
    public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName)
    {
        if (SystemTableHandler.PARTITIONS.matches(tableName)) {
            return getPartitionsSystemTable(session, tableName, SystemTableHandler.PARTITIONS.getSourceTableName(tableName));
        }
        if (SystemTableHandler.PROPERTIES.matches(tableName)) {
            return getPropertiesSystemTable(session, tableName, SystemTableHandler.PROPERTIES.getSourceTableName(tableName));
        }
        return Optional.empty();
    }

    private Optional getPropertiesSystemTable(ConnectorSession session, SchemaTableName tableName, SchemaTableName sourceTableName)
    {
        Optional
table = metastore.getTable(new HiveIdentity(session), sourceTableName.getSchemaName(), sourceTableName.getTableName()); if (table.isEmpty() || table.get().getTableType().equals(TableType.VIRTUAL_VIEW.name())) { throw new TableNotFoundException(tableName); } Map sortedTableParameters = ImmutableSortedMap.copyOf(table.get().getParameters()); List columns = sortedTableParameters.keySet().stream() .map(key -> new ColumnMetadata(key, VarcharType.VARCHAR)) .collect(toImmutableList()); List types = columns.stream() .map(ColumnMetadata::getType) .collect(toImmutableList()); Iterable> propertyValues = ImmutableList.of(ImmutableList.copyOf(sortedTableParameters.values())); return Optional.of(createSystemTable(new ConnectorTableMetadata(sourceTableName, columns), constraint -> new InMemoryRecordSet(types, propertyValues).cursor())); } private Optional getPartitionsSystemTable(ConnectorSession session, SchemaTableName tableName, SchemaTableName sourceTableName) { HiveTableHandle sourceTableHandle = getTableHandle(session, sourceTableName); if (sourceTableHandle == null) { return Optional.empty(); } List partitionColumns = sourceTableHandle.getPartitionColumns(); if (partitionColumns.isEmpty()) { return Optional.empty(); } List partitionColumnTypes = partitionColumns.stream() .map(HiveColumnHandle::getType) .collect(toImmutableList()); List partitionSystemTableColumns = partitionColumns.stream() .map(column -> ColumnMetadata.builder() .setName(column.getName()) .setType(column.getType()) .setComment(column.getComment()) .setHidden(column.isHidden()) .build()) .collect(toImmutableList()); Map fieldIdToColumnHandle = IntStream.range(0, partitionColumns.size()) .boxed() .collect(toImmutableMap(identity(), partitionColumns::get)); return Optional.of(createSystemTable( new ConnectorTableMetadata(tableName, partitionSystemTableColumns), constraint -> { TupleDomain targetTupleDomain = constraint.transform(fieldIdToColumnHandle::get); Predicate> targetPredicate = convertToPredicate(targetTupleDomain); Constraint targetConstraint = new Constraint(targetTupleDomain, targetPredicate); Iterable> records = () -> stream(partitionManager.getPartitions(metastore, new HiveIdentity(session), sourceTableHandle, targetConstraint).getPartitions()) .map(hivePartition -> IntStream.range(0, partitionColumns.size()) .mapToObj(fieldIdToColumnHandle::get) .map(columnHandle -> hivePartition.getKeys().get(columnHandle).getValue()) .collect(toList())) // nullable .iterator(); return new InMemoryRecordSet(partitionColumnTypes, records).cursor(); })); } @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle tableHandle) { HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; ConnectorTableMetadata tableMetadata = getTableMetadata(session, hiveTableHandle.getSchemaTableName()); return hiveTableHandle.getAnalyzeColumnNames() .map(columnNames -> new ConnectorTableMetadata( tableMetadata.getTable(), tableMetadata.getColumns(), ImmutableMap.builder() .putAll(tableMetadata.getProperties()) // we use table properties as a vehicle to pass to the analyzer the subset of columns to be analyzed .put(ANALYZE_COLUMNS_PROPERTY, columnNames) .build(), tableMetadata.getComment())) .orElse(tableMetadata); } private ConnectorTableMetadata getTableMetadata(ConnectorSession session, SchemaTableName tableName) { try { return doGetTableMetadata(session, tableName); } catch (PrestoException e) { throw e; } catch (RuntimeException e) { // Errors related to invalid or unsupported information in the Metastore should be handled explicitly (eg. as PrestoException(HIVE_INVALID_METADATA)). // This is just a catch-all solution so that we have any actionable information when eg. SELECT * FROM information_schema.columns fails. throw new RuntimeException("Failed to construct table metadata for table " + tableName, e); } } private ConnectorTableMetadata doGetTableMetadata(ConnectorSession session, SchemaTableName tableName) { Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); if (!translateHiveViews && isHiveOrPrestoView(table)) { throw new TableNotFoundException(tableName); } Function metadataGetter = columnMetadataGetter(table); ImmutableList.Builder columns = ImmutableList.builder(); for (HiveColumnHandle columnHandle : hiveColumnHandles(table, typeManager, getTimestampPrecision(session).getPrecision())) { columns.add(metadataGetter.apply(columnHandle)); } // External location property ImmutableMap.Builder properties = ImmutableMap.builder(); if (table.getTableType().equals(EXTERNAL_TABLE.name())) { properties.put(EXTERNAL_LOCATION_PROPERTY, table.getStorage().getLocation()); } // Storage format property try { HiveStorageFormat format = extractHiveStorageFormat(table); properties.put(STORAGE_FORMAT_PROPERTY, format); } catch (PrestoException ignored) { // todo fail if format is not known } // Partitioning property List partitionedBy = table.getPartitionColumns().stream() .map(Column::getName) .collect(toList()); if (!partitionedBy.isEmpty()) { properties.put(PARTITIONED_BY_PROPERTY, partitionedBy); } // Bucket properties table.getStorage().getBucketProperty().ifPresent(property -> { properties.put(BUCKETING_VERSION, property.getBucketingVersion().getVersion()); properties.put(BUCKET_COUNT_PROPERTY, property.getBucketCount()); properties.put(BUCKETED_BY_PROPERTY, property.getBucketedBy()); properties.put(SORTED_BY_PROPERTY, property.getSortedBy()); }); // Transactional properties String transactionalProperty = table.getParameters().get(HiveMetadata.TRANSACTIONAL); if (parseBoolean(transactionalProperty)) { properties.put(HiveTableProperties.TRANSACTIONAL, true); } // ORC format specific properties String orcBloomFilterColumns = table.getParameters().get(ORC_BLOOM_FILTER_COLUMNS_KEY); if (orcBloomFilterColumns != null) { properties.put(ORC_BLOOM_FILTER_COLUMNS, Splitter.on(',').trimResults().omitEmptyStrings().splitToList(orcBloomFilterColumns)); } String orcBloomFilterFfp = table.getParameters().get(ORC_BLOOM_FILTER_FPP_KEY); if (orcBloomFilterFfp != null) { properties.put(ORC_BLOOM_FILTER_FPP, Double.parseDouble(orcBloomFilterFfp)); } // Avro specific property String avroSchemaUrl = table.getParameters().get(AVRO_SCHEMA_URL_KEY); if (avroSchemaUrl != null) { properties.put(AVRO_SCHEMA_URL, avroSchemaUrl); } // Textfile and CSV specific properties getSerdeProperty(table, SKIP_HEADER_COUNT_KEY) .ifPresent(skipHeaderCount -> properties.put(SKIP_HEADER_LINE_COUNT, Integer.valueOf(skipHeaderCount))); getSerdeProperty(table, SKIP_FOOTER_COUNT_KEY) .ifPresent(skipFooterCount -> properties.put(SKIP_FOOTER_LINE_COUNT, Integer.valueOf(skipFooterCount))); // Multi-format property getSerdeProperty(table, NULL_FORMAT_KEY) .ifPresent(nullFormat -> properties.put(NULL_FORMAT_PROPERTY, nullFormat)); // Textfile specific properties getSerdeProperty(table, TEXT_FIELD_SEPARATOR_KEY) .ifPresent(fieldSeparator -> properties.put(TEXTFILE_FIELD_SEPARATOR, fieldSeparator)); getSerdeProperty(table, TEXT_FIELD_SEPARATOR_ESCAPE_KEY) .ifPresent(fieldEscape -> properties.put(TEXTFILE_FIELD_SEPARATOR_ESCAPE, fieldEscape)); // CSV specific properties getCsvSerdeProperty(table, CSV_SEPARATOR_KEY) .ifPresent(csvSeparator -> properties.put(CSV_SEPARATOR, csvSeparator)); getCsvSerdeProperty(table, CSV_QUOTE_KEY) .ifPresent(csvQuote -> properties.put(CSV_QUOTE, csvQuote)); getCsvSerdeProperty(table, CSV_ESCAPE_KEY) .ifPresent(csvEscape -> properties.put(CSV_ESCAPE, csvEscape)); Optional comment = Optional.ofNullable(table.getParameters().get(TABLE_COMMENT)); return new ConnectorTableMetadata(tableName, columns.build(), properties.build(), comment); } private static Optional getCsvSerdeProperty(Table table, String key) { return getSerdeProperty(table, key).map(csvSerdeProperty -> csvSerdeProperty.substring(0, 1)); } private static Optional getSerdeProperty(Table table, String key) { String serdePropertyValue = table.getStorage().getSerdeParameters().get(key); String tablePropertyValue = table.getParameters().get(key); if (serdePropertyValue != null && tablePropertyValue != null && !tablePropertyValue.equals(serdePropertyValue)) { // in Hive one can set conflicting values for the same property, in such case it looks like table properties are used throw new PrestoException( HIVE_INVALID_METADATA, format("Different values for '%s' set in serde properties and table properties: '%s' and '%s'", key, serdePropertyValue, tablePropertyValue)); } return firstNonNullable(tablePropertyValue, serdePropertyValue); } @Override public Optional getInfo(ConnectorTableHandle table) { return ((HiveTableHandle) table).getPartitions() .map(partitions -> new HiveInputInfo( partitions.stream() .map(HivePartition::getPartitionId) .collect(toImmutableList()), false)); } @Override public List listTables(ConnectorSession session, Optional optionalSchemaName) { ImmutableList.Builder tableNames = ImmutableList.builder(); for (String schemaName : listSchemas(session, optionalSchemaName)) { for (String tableName : metastore.getAllTables(schemaName)) { tableNames.add(new SchemaTableName(schemaName, tableName)); } } return tableNames.build(); } private List listSchemas(ConnectorSession session, Optional schemaName) { if (schemaName.isPresent()) { if (!filterSchema(schemaName.get())) { return ImmutableList.of(); } return ImmutableList.of(schemaName.get()); } return listSchemaNames(session); } @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName(); Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); return hiveColumnHandles(table, typeManager, getTimestampPrecision(session).getPrecision()).stream() .collect(toImmutableMap(HiveColumnHandle::getName, identity())); } @SuppressWarnings("TryWithIdenticalCatches") @Override public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { requireNonNull(prefix, "prefix is null"); ImmutableMap.Builder> columns = ImmutableMap.builder(); for (SchemaTableName tableName : listTables(session, prefix)) { try { columns.put(tableName, getTableMetadata(session, tableName).getColumns()); } catch (HiveViewNotSupportedException e) { // view is not supported } catch (TableNotFoundException e) { // table disappeared during listing operation } catch (PrestoException e) { // Skip this table if there's a failure due to Hive, a bad Serde, or bad metadata if (!e.getErrorCode().getType().equals(ErrorType.EXTERNAL)) { throw e; } } } return columns.build(); } @Override public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint) { if (!isStatisticsEnabled(session)) { return TableStatistics.empty(); } Map columns = getColumnHandles(session, tableHandle) .entrySet().stream() .filter(entry -> !((HiveColumnHandle) entry.getValue()).isHidden()) .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); Map columnTypes = columns.entrySet().stream() .collect(toImmutableMap(Map.Entry::getKey, entry -> getColumnMetadata(session, tableHandle, entry.getValue()).getType())); HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, new HiveIdentity(session), tableHandle, constraint); List partitions = partitionManager.getPartitionsAsList(partitionResult); return hiveStatisticsProvider.getTableStatistics(session, ((HiveTableHandle) tableHandle).getSchemaTableName(), columns, columnTypes, partitions); } private List listTables(ConnectorSession session, SchemaTablePrefix prefix) { if (prefix.getTable().isEmpty()) { return listTables(session, prefix.getSchema()); } SchemaTableName tableName = prefix.toSchemaTableName(); if (!filterSchema(tableName.getSchemaName())) { return ImmutableList.of(); } try { if (metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).isEmpty()) { return ImmutableList.of(); } } catch (HiveViewNotSupportedException e) { // exists, would be returned by listTables from schema } return ImmutableList.of(tableName); } /** * NOTE: This method does not return column comment */ @Override public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { return ((HiveColumnHandle) columnHandle).getColumnMetadata(); } @Override public void createSchema(ConnectorSession session, String schemaName, Map properties, PrestoPrincipal owner) { Optional location = HiveSchemaProperties.getLocation(properties).map(locationUri -> { try { hdfsEnvironment.getFileSystem(new HdfsContext(session, schemaName), new Path(locationUri)); } catch (IOException e) { throw new PrestoException(INVALID_SCHEMA_PROPERTY, "Invalid location URI: " + locationUri, e); } return locationUri; }); Database database = Database.builder() .setDatabaseName(schemaName) .setLocation(location) .setOwnerType(owner.getType()) .setOwnerName(owner.getName()) .build(); metastore.createDatabase(new HiveIdentity(session), database); } @Override public void dropSchema(ConnectorSession session, String schemaName) { // basic sanity check to provide a better error message if (!listTables(session, Optional.of(schemaName)).isEmpty() || !listViews(session, Optional.of(schemaName)).isEmpty()) { throw new PrestoException(SCHEMA_NOT_EMPTY, "Schema not empty: " + schemaName); } metastore.dropDatabase(new HiveIdentity(session), schemaName); } @Override public void renameSchema(ConnectorSession session, String source, String target) { metastore.renameDatabase(new HiveIdentity(session), source, target); } @Override public void setSchemaAuthorization(ConnectorSession session, String source, PrestoPrincipal principal) { metastore.setDatabaseOwner(new HiveIdentity(session), source, HivePrincipal.from(principal)); } @Override public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { SchemaTableName schemaTableName = tableMetadata.getTable(); String schemaName = schemaTableName.getSchemaName(); String tableName = schemaTableName.getTableName(); List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); if ((bucketProperty.isPresent() || !partitionedBy.isEmpty()) && getAvroSchemaUrl(tableMetadata.getProperties()) != null) { throw new PrestoException(NOT_SUPPORTED, "Bucketing/Partitioning columns not supported when Avro schema url is set"); } validateTimestampColumns(tableMetadata.getColumns()); List columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy)); HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(tableMetadata.getProperties()); Map tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session, schemaName, tableName)); hiveStorageFormat.validateColumns(columnHandles); Map columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName); List partitionColumns = partitionedBy.stream() .map(columnHandlesByName::get) .map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())) .collect(toList()); checkPartitionTypesSupported(partitionColumns); Path targetPath; boolean external; String externalLocation = getExternalLocation(tableMetadata.getProperties()); if (externalLocation != null) { if (!createsOfNonManagedTablesEnabled) { throw new PrestoException(NOT_SUPPORTED, "Cannot create non-managed Hive table"); } external = true; targetPath = getExternalLocationAsPath(externalLocation); checkExternalPath(new HdfsContext(session, schemaName, tableName), targetPath); } else { external = false; LocationHandle locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, Optional.empty()); targetPath = locationService.getQueryWriteInfo(locationHandle).getTargetPath(); } Table table = buildTableObject( session.getQueryId(), schemaName, tableName, session.getUser(), columnHandles, hiveStorageFormat, partitionedBy, bucketProperty, tableProperties, targetPath, external, prestoVersion); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner()); HiveBasicStatistics basicStatistics = (!external && table.getPartitionColumns().isEmpty()) ? createZeroStatistics() : createEmptyStatistics(); metastore.createTable( session, table, principalPrivileges, Optional.empty(), ignoreExisting, new PartitionStatistics(basicStatistics, ImmutableMap.of())); } private Map getEmptyTableProperties(ConnectorTableMetadata tableMetadata, Optional bucketProperty, HdfsContext hdfsContext) { HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(tableMetadata.getProperties()); ImmutableMap.Builder tableProperties = ImmutableMap.builder(); // When metastore is configured with metastore.create.as.acid=true, it will also change Presto-created tables // behind the scenes. In particular, this won't work with CTAS. // TODO (https://github.com/prestosql/presto/issues/1956) convert this into normal table property boolean transactional = HiveTableProperties.isTransactional(tableMetadata.getProperties()).orElse(false); tableProperties.put(TRANSACTIONAL, String.valueOf(transactional)); bucketProperty.ifPresent(hiveBucketProperty -> tableProperties.put(BUCKETING_VERSION, Integer.toString(hiveBucketProperty.getBucketingVersion().getVersion()))); // ORC format specific properties List columns = getOrcBloomFilterColumns(tableMetadata.getProperties()); if (columns != null && !columns.isEmpty()) { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.ORC, ORC_BLOOM_FILTER_COLUMNS); tableProperties.put(ORC_BLOOM_FILTER_COLUMNS_KEY, Joiner.on(",").join(columns)); tableProperties.put(ORC_BLOOM_FILTER_FPP_KEY, String.valueOf(getOrcBloomFilterFpp(tableMetadata.getProperties()))); } // Avro specific properties String avroSchemaUrl = getAvroSchemaUrl(tableMetadata.getProperties()); if (avroSchemaUrl != null) { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.AVRO, AVRO_SCHEMA_URL); tableProperties.put(AVRO_SCHEMA_URL_KEY, validateAndNormalizeAvroSchemaUrl(avroSchemaUrl, hdfsContext)); } // Textfile and CSV specific properties Set csvAndTextFile = ImmutableSet.of(HiveStorageFormat.TEXTFILE, HiveStorageFormat.CSV); getHeaderSkipCount(tableMetadata.getProperties()).ifPresent(headerSkipCount -> { if (headerSkipCount > 0) { checkFormatForProperty(hiveStorageFormat, csvAndTextFile, SKIP_HEADER_LINE_COUNT); tableProperties.put(SKIP_HEADER_COUNT_KEY, String.valueOf(headerSkipCount)); } if (headerSkipCount < 0) { throw new PrestoException(HIVE_INVALID_METADATA, format("Invalid value for %s property: %s", SKIP_HEADER_LINE_COUNT, headerSkipCount)); } }); getFooterSkipCount(tableMetadata.getProperties()).ifPresent(footerSkipCount -> { if (footerSkipCount > 0) { checkFormatForProperty(hiveStorageFormat, csvAndTextFile, SKIP_FOOTER_LINE_COUNT); tableProperties.put(SKIP_FOOTER_COUNT_KEY, String.valueOf(footerSkipCount)); } if (footerSkipCount < 0) { throw new PrestoException(HIVE_INVALID_METADATA, format("Invalid value for %s property: %s", SKIP_FOOTER_LINE_COUNT, footerSkipCount)); } }); // null_format is allowed in textfile, rctext, and sequencefile Set allowsNullFormat = ImmutableSet.of( HiveStorageFormat.TEXTFILE, HiveStorageFormat.RCTEXT, HiveStorageFormat.SEQUENCEFILE); getNullFormat(tableMetadata.getProperties()) .ifPresent(format -> { checkFormatForProperty(hiveStorageFormat, allowsNullFormat, NULL_FORMAT_PROPERTY); tableProperties.put(NULL_FORMAT_KEY, format.toString()); }); // Textfile-specific properties getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_FIELD_SEPARATOR) .ifPresent(separator -> { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, TEXT_FIELD_SEPARATOR_KEY); tableProperties.put(TEXT_FIELD_SEPARATOR_KEY, separator.toString()); }); getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_FIELD_SEPARATOR_ESCAPE) .ifPresent(escape -> { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, TEXT_FIELD_SEPARATOR_ESCAPE_KEY); tableProperties.put(TEXT_FIELD_SEPARATOR_ESCAPE_KEY, escape.toString()); }); // CSV specific properties getSingleCharacterProperty(tableMetadata.getProperties(), CSV_ESCAPE) .ifPresent(escape -> { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, CSV_ESCAPE); tableProperties.put(CSV_ESCAPE_KEY, escape.toString()); }); getSingleCharacterProperty(tableMetadata.getProperties(), CSV_QUOTE) .ifPresent(quote -> { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, CSV_QUOTE); tableProperties.put(CSV_QUOTE_KEY, quote.toString()); }); getSingleCharacterProperty(tableMetadata.getProperties(), CSV_SEPARATOR) .ifPresent(separator -> { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, CSV_SEPARATOR); tableProperties.put(CSV_SEPARATOR_KEY, separator.toString()); }); // Set bogus table stats to prevent Hive 2.x from gathering these stats at table creation. // These stats are not useful by themselves and can take very long time to collect when creating an // external table over large data set. tableProperties.put("numFiles", "-1"); tableProperties.put("totalSize", "-1"); // Table comment property tableMetadata.getComment().ifPresent(value -> tableProperties.put(TABLE_COMMENT, value)); return tableProperties.build(); } private static void checkFormatForProperty(HiveStorageFormat actualStorageFormat, HiveStorageFormat expectedStorageFormat, String propertyName) { if (actualStorageFormat != expectedStorageFormat) { throw new PrestoException(INVALID_TABLE_PROPERTY, format("Cannot specify %s table property for storage format: %s", propertyName, actualStorageFormat)); } } private static void checkFormatForProperty(HiveStorageFormat actualStorageFormat, Set expectedStorageFormats, String propertyName) { if (!expectedStorageFormats.contains(actualStorageFormat)) { throw new PrestoException(INVALID_TABLE_PROPERTY, format("Cannot specify %s table property for storage format: %s", propertyName, actualStorageFormat)); } } private String validateAndNormalizeAvroSchemaUrl(String url, HdfsContext context) { try { new URL(url).openStream().close(); return url; } catch (MalformedURLException e) { // try locally if (new File(url).exists()) { // hive needs url to have a protocol return new File(url).toURI().toString(); } // try hdfs try { if (!hdfsEnvironment.getFileSystem(context, new Path(url)).exists(new Path(url))) { throw new PrestoException(INVALID_TABLE_PROPERTY, "Cannot locate Avro schema file: " + url); } return url; } catch (IOException ex) { throw new PrestoException(INVALID_TABLE_PROPERTY, "Avro schema file is not a valid file system URI: " + url, ex); } } catch (IOException e) { throw new PrestoException(INVALID_TABLE_PROPERTY, "Cannot open Avro schema file: " + url, e); } } private static Path getExternalLocationAsPath(String location) { try { return new Path(location); } catch (IllegalArgumentException e) { throw new PrestoException(INVALID_TABLE_PROPERTY, "External location is not a valid file system URI: " + location, e); } } private void checkExternalPath(HdfsContext context, Path path) { try { if (!isS3FileSystem(context, hdfsEnvironment, path)) { if (!hdfsEnvironment.getFileSystem(context, path).isDirectory(path)) { throw new PrestoException(INVALID_TABLE_PROPERTY, "External location must be a directory: " + path); } } } catch (IOException e) { throw new PrestoException(INVALID_TABLE_PROPERTY, "External location is not a valid file system URI: " + path, e); } } private void checkPartitionTypesSupported(List partitionColumns) { for (Column partitionColumn : partitionColumns) { Type partitionType = typeManager.getType(partitionColumn.getType().getTypeSignature()); verifyPartitionTypeSupported(partitionColumn.getName(), partitionType); } } private static Table buildTableObject( String queryId, String schemaName, String tableName, String tableOwner, List columnHandles, HiveStorageFormat hiveStorageFormat, List partitionedBy, Optional bucketProperty, Map additionalTableParameters, Path targetPath, boolean external, String prestoVersion) { Map columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName); List partitionColumns = partitionedBy.stream() .map(columnHandlesByName::get) .map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())) .collect(toList()); Set partitionColumnNames = ImmutableSet.copyOf(partitionedBy); ImmutableList.Builder columns = ImmutableList.builder(); for (HiveColumnHandle columnHandle : columnHandles) { String name = columnHandle.getName(); HiveType type = columnHandle.getHiveType(); if (!partitionColumnNames.contains(name)) { verify(!columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property"); columns.add(new Column(name, type, columnHandle.getComment())); } else { verify(columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property"); } } ImmutableMap.Builder tableParameters = ImmutableMap.builder() .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, queryId) .putAll(additionalTableParameters); if (external) { tableParameters.put("EXTERNAL", "TRUE"); } Table.Builder tableBuilder = Table.builder() .setDatabaseName(schemaName) .setTableName(tableName) .setOwner(tableOwner) .setTableType((external ? EXTERNAL_TABLE : MANAGED_TABLE).name()) .setDataColumns(columns.build()) .setPartitionColumns(partitionColumns) .setParameters(tableParameters.build()); tableBuilder.getStorageBuilder() .setStorageFormat(fromHiveStorageFormat(hiveStorageFormat)) .setBucketProperty(bucketProperty) .setLocation(targetPath.toString()); return tableBuilder.build(); } @Override public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata column) { HiveTableHandle handle = (HiveTableHandle) tableHandle; failIfAvroSchemaIsSet(session, handle); metastore.addColumn(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName(), column.getName(), toHiveType(column.getType()), column.getComment()); } @Override public void renameColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle source, String target) { HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; failIfAvroSchemaIsSet(session, hiveTableHandle); HiveColumnHandle sourceHandle = (HiveColumnHandle) source; metastore.renameColumn(new HiveIdentity(session), hiveTableHandle.getSchemaName(), hiveTableHandle.getTableName(), sourceHandle.getName(), target); } @Override public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column) { HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; failIfAvroSchemaIsSet(session, hiveTableHandle); HiveColumnHandle columnHandle = (HiveColumnHandle) column; metastore.dropColumn(new HiveIdentity(session), hiveTableHandle.getSchemaName(), hiveTableHandle.getTableName(), columnHandle.getName()); } private void failIfAvroSchemaIsSet(ConnectorSession session, HiveTableHandle handle) { Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()) .orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName())); if (table.getParameters().containsKey(AVRO_SCHEMA_URL_KEY) || table.getStorage().getSerdeParameters().containsKey(AVRO_SCHEMA_URL_KEY)) { throw new PrestoException(NOT_SUPPORTED, "ALTER TABLE not supported when Avro schema url is set"); } } @Override public void renameTable(ConnectorSession session, ConnectorTableHandle tableHandle, SchemaTableName newTableName) { HiveTableHandle handle = (HiveTableHandle) tableHandle; metastore.renameTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName(), newTableName.getSchemaName(), newTableName.getTableName()); } @Override public void setTableComment(ConnectorSession session, ConnectorTableHandle tableHandle, Optional comment) { HiveTableHandle handle = (HiveTableHandle) tableHandle; metastore.commentTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName(), comment); } @Override public void setColumnComment(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column, Optional comment) { HiveTableHandle handle = (HiveTableHandle) tableHandle; HiveColumnHandle columnHandle = (HiveColumnHandle) column; metastore.commentColumn(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName(), columnHandle.getName(), comment); } @Override public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle) { HiveTableHandle handle = (HiveTableHandle) tableHandle; Optional
target = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()); if (target.isEmpty()) { throw new TableNotFoundException(handle.getSchemaTableName()); } metastore.dropTable(session, handle.getSchemaName(), handle.getTableName()); } @Override public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) { SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName(); metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); return tableHandle; } @Override public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection computedStatistics) { HiveIdentity identity = new HiveIdentity(session); HiveTableHandle handle = (HiveTableHandle) tableHandle; SchemaTableName tableName = handle.getSchemaTableName(); Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName())); List partitionColumns = table.getPartitionColumns(); List partitionColumnNames = partitionColumns.stream() .map(Column::getName) .collect(toImmutableList()); // TODO: revisit when handling write path List hiveColumnHandles = hiveColumnHandles(table, typeManager, TimestampType.DEFAULT_PRECISION); Map columnTypes = hiveColumnHandles.stream() .filter(columnHandle -> !columnHandle.isHidden()) .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); Map, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes); if (partitionColumns.isEmpty()) { // commit analyze to unpartitioned table metastore.setTableStatistics(identity, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.of()))); } else { List> partitionValuesList; if (handle.getAnalyzePartitionValues().isPresent()) { partitionValuesList = handle.getAnalyzePartitionValues().get(); } else { partitionValuesList = metastore.getPartitionNames(identity, handle.getSchemaName(), handle.getTableName()) .orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())) .stream() .map(HiveUtil::toPartitionValues) .collect(toImmutableList()); } ImmutableMap.Builder, PartitionStatistics> partitionStatistics = ImmutableMap.builder(); Map> columnStatisticTypes = hiveColumnHandles.stream() .filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())) .filter(column -> !column.isHidden()) .collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(column.getType())))); Supplier emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes)); int usedComputedStatistics = 0; for (List partitionValues : partitionValuesList) { ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues); if (collectedStatistics == null) { partitionStatistics.put(partitionValues, emptyPartitionStatistics.get()); } else { usedComputedStatistics++; partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics)); } } verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used"); metastore.setPartitionStatistics(identity, table, partitionStatistics.build()); } } @Override public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional layout) { Optional externalLocation = Optional.ofNullable(getExternalLocation(tableMetadata.getProperties())) .map(HiveMetadata::getExternalLocationAsPath); if (!createsOfNonManagedTablesEnabled && externalLocation.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Creating non-managed Hive tables is disabled"); } if (!writesToNonManagedTablesEnabled && externalLocation.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Writes to non-managed Hive tables is disabled"); } if (getAvroSchemaUrl(tableMetadata.getProperties()) != null) { throw new PrestoException(NOT_SUPPORTED, "CREATE TABLE AS not supported when Avro schema url is set"); } HiveStorageFormat tableStorageFormat = getHiveStorageFormat(tableMetadata.getProperties()); List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); boolean transactional = isTransactional(tableMetadata.getProperties()).orElse(false); // get the root directory for the database SchemaTableName schemaTableName = tableMetadata.getTable(); String schemaName = schemaTableName.getSchemaName(); String tableName = schemaTableName.getTableName(); Map tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session, schemaName, tableName)); List columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy)); HiveStorageFormat partitionStorageFormat = isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session); // unpartitioned tables ignore the partition storage format HiveStorageFormat actualStorageFormat = partitionedBy.isEmpty() ? tableStorageFormat : partitionStorageFormat; actualStorageFormat.validateColumns(columnHandles); Map columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName); List partitionColumns = partitionedBy.stream() .map(columnHandlesByName::get) .map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())) .collect(toList()); checkPartitionTypesSupported(partitionColumns); LocationHandle locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, externalLocation); HiveOutputTableHandle result = new HiveOutputTableHandle( schemaName, tableName, columnHandles, metastore.generatePageSinkMetadata(new HiveIdentity(session), schemaTableName), locationHandle, tableStorageFormat, partitionStorageFormat, partitionedBy, bucketProperty, session.getUser(), tableProperties, transactional, externalLocation.isPresent()); WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle); metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), schemaTableName); return result; } @Override public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, Collection computedStatistics) { HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle; List partitionUpdates = fragments.stream() .map(Slice::getBytes) .map(partitionUpdateCodec::fromJson) .collect(toList()); WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle()); Table table = buildTableObject( session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), writeInfo.getTargetPath(), handle.isExternal(), prestoVersion); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(handle.getTableOwner()); partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates); if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) { List partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, true, partitionUpdates); // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets)); for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) { Optional partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate)); createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames()); } } Map columnTypes = handle.getInputColumns().stream() .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); Map, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes); PartitionStatistics tableStatistics; if (table.getPartitionColumns().isEmpty()) { HiveBasicStatistics basicStatistics = partitionUpdates.stream() .map(PartitionUpdate::getStatistics) .reduce((first, second) -> reduce(first, second, ADD)) .orElse(createZeroStatistics()); tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of())); } else { tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); } metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics); if (!handle.getPartitionedBy().isEmpty()) { if (isRespectTableFormat(session)) { verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat()); } for (PartitionUpdate update : partitionUpdates) { Partition partition = buildPartitionObject(session, table, update); PartitionStatistics partitionStatistics = createPartitionStatistics( session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues())); metastore.addPartition( session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), partitionStatistics); } } return Optional.of(new HiveWrittenPartitions( partitionUpdates.stream() .map(PartitionUpdate::getName) .collect(toImmutableList()))); } private List computePartitionUpdatesForMissingBuckets( ConnectorSession session, HiveWritableTableHandle handle, Table table, boolean isCreateTable, List partitionUpdates) { ImmutableList.Builder partitionUpdatesForMissingBucketsBuilder = ImmutableList.builder(); HiveStorageFormat storageFormat = table.getPartitionColumns().isEmpty() ? handle.getTableStorageFormat() : handle.getPartitionStorageFormat(); for (PartitionUpdate partitionUpdate : partitionUpdates) { int bucketCount = handle.getBucketProperty().get().getBucketCount(); List fileNamesForMissingBuckets = computeFileNamesForMissingBuckets( session, table, storageFormat, partitionUpdate.getTargetPath(), bucketCount, isCreateTable && handle.isTransactional(), partitionUpdate); partitionUpdatesForMissingBucketsBuilder.add(new PartitionUpdate( partitionUpdate.getName(), partitionUpdate.getUpdateMode(), partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), fileNamesForMissingBuckets, 0, 0, 0)); } return partitionUpdatesForMissingBucketsBuilder.build(); } private List computeFileNamesForMissingBuckets( ConnectorSession session, Table table, HiveStorageFormat storageFormat, Path targetPath, int bucketCount, boolean transactionalCreateTable, PartitionUpdate partitionUpdate) { if (partitionUpdate.getFileNames().size() == bucketCount) { // fast path for common case return ImmutableList.of(); } HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName()); JobConf conf = toJobConf(hdfsEnvironment.getConfiguration(hdfsContext, targetPath)); configureCompression(conf, getCompressionCodec(session)); String fileExtension = HiveWriterFactory.getFileExtension(conf, fromHiveStorageFormat(storageFormat)); Set fileNames = ImmutableSet.copyOf(partitionUpdate.getFileNames()); ImmutableList.Builder missingFileNamesBuilder = ImmutableList.builder(); for (int i = 0; i < bucketCount; i++) { String fileName; if (transactionalCreateTable) { fileName = computeBucketedFileName(Optional.empty(), i) + fileExtension; } else { fileName = computeBucketedFileName(Optional.of(session.getQueryId()), i) + fileExtension; } if (!fileNames.contains(fileName)) { missingFileNamesBuilder.add(fileName); } } List missingFileNames = missingFileNamesBuilder.build(); verify(fileNames.size() + missingFileNames.size() == bucketCount); return missingFileNames; } private void createEmptyFiles(ConnectorSession session, Path path, Table table, Optional partition, List fileNames) { JobConf conf = toJobConf(hdfsEnvironment.getConfiguration(new HdfsContext(session, table.getDatabaseName(), table.getTableName()), path)); configureCompression(conf, getCompressionCodec(session)); Properties schema; StorageFormat format; if (partition.isPresent()) { schema = getHiveSchema(partition.get(), table); format = partition.get().getStorage().getStorageFormat(); } else { schema = getHiveSchema(table); format = table.getStorage().getStorageFormat(); } hdfsEnvironment.doAs(session.getUser(), () -> { for (String fileName : fileNames) { writeEmptyFile(session, new Path(path, fileName), conf, schema, format.getSerDe(), format.getOutputFormat()); } }); } private static void writeEmptyFile(ConnectorSession session, Path target, JobConf conf, Properties properties, String serDe, String outputFormatName) { // Some serializers such as Avro set a property in the schema. initializeSerializer(conf, properties, serDe); // The code below is not a try with resources because RecordWriter is not Closeable. FileSinkOperator.RecordWriter recordWriter = HiveWriteUtils.createRecordWriter(target, conf, properties, outputFormatName, session); try { recordWriter.close(false); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error write empty file to Hive", e); } } @Override public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) { HiveIdentity identity = new HiveIdentity(session); SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName(); Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); checkTableIsWritable(table, writesToNonManagedTablesEnabled); for (Column column : table.getDataColumns()) { if (!isWritableType(column.getType())) { throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table %s with column type %s not supported", tableName, column.getType())); } } List handles = hiveColumnHandles(table, typeManager, getTimestampPrecision(session).getPrecision()).stream() .filter(columnHandle -> !columnHandle.isHidden()) .collect(toList()); HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table); if (table.getParameters().containsKey(SKIP_HEADER_COUNT_KEY)) { throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table with %s property not supported", SKIP_HEADER_COUNT_KEY)); } if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) { throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY)); } LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table); HiveInsertTableHandle result = new HiveInsertTableHandle( tableName.getSchemaName(), tableName.getTableName(), handles, metastore.generatePageSinkMetadata(identity, tableName), locationHandle, table.getStorage().getBucketProperty(), tableStorageFormat, isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session)); WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle); metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName); return result; } @Override public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, Collection computedStatistics) { HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle; List partitionUpdates = fragments.stream() .map(Slice::getBytes) .map(partitionUpdateCodec::fromJson) .collect(toList()); HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat(); partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates); Table table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()) .orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName())); if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert"); } if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) { List partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, false, partitionUpdates); // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets)); for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) { Optional partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate)); createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames()); } } List partitionedBy = table.getPartitionColumns().stream() .map(Column::getName) .collect(toImmutableList()); Map columnTypes = handle.getInputColumns().stream() .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); Map, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes); for (PartitionUpdate partitionUpdate : partitionUpdates) { if (partitionUpdate.getName().isEmpty()) { // insert into unpartitioned table if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert"); } PartitionStatistics partitionStatistics = createPartitionStatistics( session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of())); if (partitionUpdate.getUpdateMode() == OVERWRITE) { // get privileges from existing table PrincipalPrivileges principalPrivileges = fromHivePrivilegeInfos(metastore.listTablePrivileges(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName(), Optional.empty())); // first drop it metastore.dropTable(session, handle.getSchemaName(), handle.getTableName()); // create the table with the new location metastore.createTable(session, table, principalPrivileges, Optional.of(partitionUpdate.getWritePath()), false, partitionStatistics); } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == APPEND) { // insert into unpartitioned table metastore.finishInsertIntoExistingTable( session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics); } else { throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode()); } } else if (partitionUpdate.getUpdateMode() == APPEND) { // insert into existing partition List partitionValues = toPartitionValues(partitionUpdate.getName()); PartitionStatistics partitionStatistics = createPartitionStatistics( session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues)); metastore.finishInsertIntoExistingPartition( session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics); } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) { // insert into new partition or overwrite existing partition Partition partition = buildPartitionObject(session, table, partitionUpdate); if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert"); } if (partitionUpdate.getUpdateMode() == OVERWRITE) { metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), partition.getValues(), true); } PartitionStatistics partitionStatistics = createPartitionStatistics( session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues())); metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), partitionStatistics); } else { throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode())); } } return Optional.of(new HiveWrittenPartitions( partitionUpdates.stream() .map(PartitionUpdate::getName) .collect(toImmutableList()))); } private Partition buildPartitionObject(ConnectorSession session, Table table, PartitionUpdate partitionUpdate) { return Partition.builder() .setDatabaseName(table.getDatabaseName()) .setTableName(table.getTableName()) .setColumns(table.getDataColumns()) .setValues(extractPartitionValues(partitionUpdate.getName())) .setParameters(ImmutableMap.builder() .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, session.getQueryId()) .build()) .withStorage(storage -> storage .setStorageFormat(isRespectTableFormat(session) ? table.getStorage().getStorageFormat() : fromHiveStorageFormat(getHiveStorageFormat(session))) .setLocation(partitionUpdate.getTargetPath().toString()) .setBucketProperty(table.getStorage().getBucketProperty()) .setSerdeParameters(table.getStorage().getSerdeParameters())) .build(); } private PartitionStatistics createPartitionStatistics( ConnectorSession session, Map columnTypes, ComputedStatistics computedStatistics) { Map computedColumnStatistics = computedStatistics.getColumnStatistics(); Block rowCountBlock = Optional.ofNullable(computedStatistics.getTableStatistics().get(ROW_COUNT)) .orElseThrow(() -> new VerifyException("rowCount not present")); verify(!rowCountBlock.isNull(0), "rowCount must never be null"); long rowCount = BIGINT.getLong(rowCountBlock, 0); HiveBasicStatistics rowCountOnlyBasicStatistics = new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(rowCount), OptionalLong.empty(), OptionalLong.empty()); return createPartitionStatistics(session, rowCountOnlyBasicStatistics, columnTypes, computedColumnStatistics); } private PartitionStatistics createPartitionStatistics( ConnectorSession session, HiveBasicStatistics basicStatistics, Map columnTypes, Map computedColumnStatistics) { long rowCount = basicStatistics.getRowCount().orElseThrow(() -> new IllegalArgumentException("rowCount not present")); Map columnStatistics = fromComputedStatistics( session, computedColumnStatistics, columnTypes, rowCount); return new PartitionStatistics(basicStatistics, columnStatistics); } private static Map getColumnStatistics(Map, ComputedStatistics> statistics, List partitionValues) { return Optional.ofNullable(statistics.get(partitionValues)) .map(ComputedStatistics::getColumnStatistics) .orElse(ImmutableMap.of()); } @Override public void createView(ConnectorSession session, SchemaTableName viewName, ConnectorViewDefinition definition, boolean replace) { HiveIdentity identity = new HiveIdentity(session); Map properties = ImmutableMap.builder() .put(TABLE_COMMENT, "Presto View") .put(PRESTO_VIEW_FLAG, "true") .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, session.getQueryId()) .build(); Column dummyColumn = new Column("dummy", HIVE_STRING, Optional.empty()); Table.Builder tableBuilder = Table.builder() .setDatabaseName(viewName.getSchemaName()) .setTableName(viewName.getTableName()) .setOwner(session.getUser()) .setTableType(TableType.VIRTUAL_VIEW.name()) .setDataColumns(ImmutableList.of(dummyColumn)) .setPartitionColumns(ImmutableList.of()) .setParameters(properties) .setViewOriginalText(Optional.of(encodeViewData(definition))) .setViewExpandedText(Optional.of("/* Presto View */")); tableBuilder.getStorageBuilder() .setStorageFormat(VIEW_STORAGE_FORMAT) .setLocation(""); Table table = tableBuilder.build(); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(session.getUser()); Optional
existing = metastore.getTable(identity, viewName.getSchemaName(), viewName.getTableName()); if (existing.isPresent()) { if (!replace || !HiveUtil.isPrestoView(existing.get())) { throw new ViewAlreadyExistsException(viewName); } metastore.replaceTable(identity, viewName.getSchemaName(), viewName.getTableName(), table, principalPrivileges); return; } try { metastore.createTable(session, table, principalPrivileges, Optional.empty(), false, new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); } catch (TableAlreadyExistsException e) { throw new ViewAlreadyExistsException(e.getTableName()); } } @Override public void renameView(ConnectorSession session, SchemaTableName source, SchemaTableName target) { metastore.renameTable(new HiveIdentity(session), source.getSchemaName(), source.getTableName(), target.getSchemaName(), target.getTableName()); } @Override public void dropView(ConnectorSession session, SchemaTableName viewName) { if (getView(session, viewName).isEmpty()) { throw new ViewNotFoundException(viewName); } try { metastore.dropTable(session, viewName.getSchemaName(), viewName.getTableName()); } catch (TableNotFoundException e) { throw new ViewNotFoundException(e.getTableName()); } } @Override public List listViews(ConnectorSession session, Optional optionalSchemaName) { ImmutableList.Builder tableNames = ImmutableList.builder(); for (String schemaName : listSchemas(session, optionalSchemaName)) { for (String tableName : metastore.getAllViews(schemaName)) { tableNames.add(new SchemaTableName(schemaName, tableName)); } } return tableNames.build(); } @Override public Map getSchemaProperties(ConnectorSession session, CatalogSchemaName schemaName) { checkState(filterSchema(schemaName.getSchemaName()), "Schema is not accessible: %s", schemaName); Optional db = metastore.getDatabase(schemaName.getSchemaName()); if (db.isPresent()) { return HiveSchemaProperties.fromDatabase(db.get()); } throw new SchemaNotFoundException(schemaName.getSchemaName()); } @Override public Optional getSchemaOwner(ConnectorSession session, CatalogSchemaName schemaName) { checkState(filterSchema(schemaName.getSchemaName()), "Schema is not accessible: %s", schemaName); Optional database = metastore.getDatabase(schemaName.getSchemaName()); if (database.isPresent()) { return database.flatMap(db -> Optional.of(new PrestoPrincipal(db.getOwnerType(), db.getOwnerName()))); } throw new SchemaNotFoundException(schemaName.getSchemaName()); } @Override public Optional getView(ConnectorSession session, SchemaTableName viewName) { if (!filterSchema(viewName.getSchemaName())) { return Optional.empty(); } return metastore.getTable(new HiveIdentity(session), viewName.getSchemaName(), viewName.getTableName()) .flatMap(view -> { if (isPrestoView(view)) { ConnectorViewDefinition definition = decodeViewData(view.getViewOriginalText() .orElseThrow(() -> new PrestoException(HIVE_INVALID_METADATA, "No view original text: " + viewName))); // use owner from table metadata if it exists if (view.getOwner() != null && !definition.isRunAsInvoker()) { definition = new ConnectorViewDefinition( definition.getOriginalSql(), definition.getCatalog(), definition.getSchema(), definition.getColumns(), definition.getComment(), Optional.of(view.getOwner()), false); } return Optional.of(definition); } if (translateHiveViews && isHiveOrPrestoView(view)) { return Optional.of(buildHiveViewConnectorDefinition(catalogName, view)); } return Optional.empty(); }); } private boolean isHiveOrPrestoView(Table table) { return table.getTableType().equals(TableType.VIRTUAL_VIEW.name()); } private static boolean filterSchema(String schemaName) { if ("information_schema".equals(schemaName)) { // For things like listing columns in information_schema.columns table, we need to explicitly filter out Hive's own information_schema. // TODO https://github.com/prestosql/presto/issues/1559 this should be filtered out in engine. return false; } if ("sys".equals(schemaName)) { // Hive 3's `sys` schema contains no objects we can handle, so there is no point in exposing it. // Also, exposing it may require proper handling in access control. return false; } return true; } @Override public ConnectorTableHandle beginDelete(ConnectorSession session, ConnectorTableHandle tableHandle) { throw new PrestoException(NOT_SUPPORTED, "This connector only supports delete where one or more partitions are deleted entirely"); } @Override public ColumnHandle getUpdateRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle) { return updateRowIdHandle(); } @Override public Optional applyDelete(ConnectorSession session, ConnectorTableHandle handle) { return Optional.of(handle); } @Override public OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandle deleteHandle) { HiveTableHandle handle = (HiveTableHandle) deleteHandle; Optional
table = metastore.getTable(new HiveIdentity(session), handle.getSchemaName(), handle.getTableName()); if (table.isEmpty()) { throw new TableNotFoundException(handle.getSchemaTableName()); } if (table.get().getPartitionColumns().isEmpty()) { metastore.truncateUnpartitionedTable(session, handle.getSchemaName(), handle.getTableName()); } else { for (HivePartition hivePartition : partitionManager.getOrLoadPartitions(metastore, new HiveIdentity(session), handle)) { metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), toPartitionValues(hivePartition.getPartitionId()), true); } } // it is too expensive to determine the exact number of deleted rows return OptionalLong.empty(); } @VisibleForTesting static Predicate> convertToPredicate(TupleDomain tupleDomain) { return bindings -> tupleDomain.contains(TupleDomain.fromFixedValues(bindings)); } @Override public boolean usesLegacyTableLayouts() { return false; } @Override public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) { HiveTableHandle hiveTable = (HiveTableHandle) table; List partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns()); List partitions = partitionManager.getOrLoadPartitions(metastore, new HiveIdentity(session), hiveTable); TupleDomain predicate = createPredicate(partitionColumns, partitions); Optional discretePredicates = Optional.empty(); if (!partitionColumns.isEmpty()) { // Do not create tuple domains for every partition at the same time! // There can be a huge number of partitions so use an iterable so // all domains do not need to be in memory at the same time. Iterable> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys())); discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains)); } Optional tablePartitioning = Optional.empty(); if (isBucketExecutionEnabled(session) && hiveTable.getBucketHandle().isPresent()) { tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning( new HivePartitioningHandle( bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream() .map(HiveColumnHandle::getHiveType) .collect(toImmutableList()), OptionalInt.empty()), bucketing.getColumns().stream() .map(ColumnHandle.class::cast) .collect(toImmutableList()))); } return new ConnectorTableProperties( predicate, tablePartitioning, Optional.empty(), discretePredicates, ImmutableList.of()); } @Override public Optional> applyFilter(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint) { HiveTableHandle handle = (HiveTableHandle) tableHandle; checkArgument(handle.getAnalyzePartitionValues().isEmpty() || constraint.getSummary().isAll(), "Analyze should not have a constraint"); HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, new HiveIdentity(session), handle, constraint); HiveTableHandle newHandle = partitionManager.applyPartitionResult(handle, partitionResult, constraint.getPredicateColumns()); if (handle.getPartitions().equals(newHandle.getPartitions()) && handle.getCompactEffectivePredicate().equals(newHandle.getCompactEffectivePredicate()) && handle.getBucketFilter().equals(newHandle.getBucketFilter()) && handle.getConstraintColumns().equals(newHandle.getConstraintColumns())) { return Optional.empty(); } return Optional.of(new ConstraintApplicationResult<>(newHandle, partitionResult.getUnenforcedConstraint())); } @Override public void validateScan(ConnectorSession session, ConnectorTableHandle tableHandle) { HiveTableHandle handle = (HiveTableHandle) tableHandle; if (HiveSessionProperties.isQueryPartitionFilterRequired(session) && handle.getAnalyzePartitionValues().isEmpty() && handle.getEnforcedConstraint().isAll()) { List partitionColumns = handle.getPartitionColumns(); if (!partitionColumns.isEmpty()) { Optional> referencedColumns = handle.getConstraintColumns(); if (referencedColumns.isEmpty() || Collections.disjoint(referencedColumns.get(), partitionColumns)) { String partitionColumnNames = partitionColumns.stream() .map(HiveColumnHandle::getName) .collect(Collectors.joining(",")); throw new PrestoException( StandardErrorCode.QUERY_REJECTED, String.format("Filter required on %s.%s for at least one partition column: %s ", handle.getSchemaName(), handle.getTableName(), partitionColumnNames)); } } } } @Override public Optional> applyProjection( ConnectorSession session, ConnectorTableHandle handle, List projections, Map assignments) { if (!isProjectionPushdownEnabled(session)) { return Optional.empty(); } // Create projected column representations for supported sub expressions. Simple column references and chain of // dereferences on a variable are supported right now. Set projectedExpressions = projections.stream() .flatMap(expression -> extractSupportedProjectedColumns(expression).stream()) .collect(toImmutableSet()); Map columnProjections = projectedExpressions.stream() .collect(toImmutableMap(Function.identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation)); // No pushdown required if all references are simple variables if (columnProjections.values().stream().allMatch(ProjectedColumnRepresentation::isVariable)) { return Optional.empty(); } Map newAssignments = new HashMap<>(); ImmutableMap.Builder expressionToVariableMappings = ImmutableMap.builder(); for (Map.Entry entry : columnProjections.entrySet()) { ConnectorExpression expression = entry.getKey(); ProjectedColumnRepresentation projectedColumn = entry.getValue(); ColumnHandle projectedColumnHandle; String projectedColumnName; // See if input already contains a columnhandle for this projected column, avoid creating duplicates. Optional existingColumn = find(assignments, projectedColumn); if (existingColumn.isPresent()) { projectedColumnName = existingColumn.get(); projectedColumnHandle = assignments.get(projectedColumnName); } else { // Create a new column handle HiveColumnHandle oldColumnHandle = (HiveColumnHandle) assignments.get(projectedColumn.getVariable().getName()); projectedColumnHandle = createProjectedColumnHandle(oldColumnHandle, projectedColumn.getDereferenceIndices()); projectedColumnName = ((HiveColumnHandle) projectedColumnHandle).getName(); } Variable projectedColumnVariable = new Variable(projectedColumnName, expression.getType()); Assignment newAssignment = new Assignment(projectedColumnName, projectedColumnHandle, expression.getType()); newAssignments.put(projectedColumnName, newAssignment); expressionToVariableMappings.put(expression, projectedColumnVariable); } // Modify projections to refer to new variables List newProjections = projections.stream() .map(expression -> replaceWithNewVariables(expression, expressionToVariableMappings.build())) .collect(toImmutableList()); List outputAssignments = newAssignments.values().stream().collect(toImmutableList()); return Optional.of(new ProjectionApplicationResult<>(handle, newProjections, outputAssignments)); } private HiveColumnHandle createProjectedColumnHandle(HiveColumnHandle column, List indices) { HiveType oldHiveType = column.getHiveType(); HiveType newHiveType = oldHiveType.getHiveTypeForDereferences(indices).get(); HiveColumnProjectionInfo columnProjectionInfo = new HiveColumnProjectionInfo( // Merge indices ImmutableList.builder() .addAll(column.getHiveColumnProjectionInfo() .map(HiveColumnProjectionInfo::getDereferenceIndices) .orElse(ImmutableList.of())) .addAll(indices) .build(), // Merge names ImmutableList.builder() .addAll(column.getHiveColumnProjectionInfo() .map(HiveColumnProjectionInfo::getDereferenceNames) .orElse(ImmutableList.of())) .addAll(oldHiveType.getHiveDereferenceNames(indices)) .build(), newHiveType, newHiveType.getType(typeManager)); return new HiveColumnHandle( column.getBaseColumnName(), column.getBaseHiveColumnIndex(), column.getBaseHiveType(), column.getBaseType(), Optional.of(columnProjectionInfo), column.getColumnType(), column.getComment()); } @Override public Optional getCommonPartitioningHandle(ConnectorSession session, ConnectorPartitioningHandle left, ConnectorPartitioningHandle right) { HivePartitioningHandle leftHandle = (HivePartitioningHandle) left; HivePartitioningHandle rightHandle = (HivePartitioningHandle) right; if (!leftHandle.getHiveTypes().equals(rightHandle.getHiveTypes())) { return Optional.empty(); } if (leftHandle.getBucketingVersion() != rightHandle.getBucketingVersion()) { return Optional.empty(); } if (leftHandle.getBucketCount() == rightHandle.getBucketCount()) { return Optional.of(leftHandle); } if (!isOptimizedMismatchedBucketCount(session)) { return Optional.empty(); } int largerBucketCount = Math.max(leftHandle.getBucketCount(), rightHandle.getBucketCount()); int smallerBucketCount = Math.min(leftHandle.getBucketCount(), rightHandle.getBucketCount()); if (largerBucketCount % smallerBucketCount != 0) { // must be evenly divisible return Optional.empty(); } if (Integer.bitCount(largerBucketCount / smallerBucketCount) != 1) { // ratio must be power of two return Optional.empty(); } OptionalInt maxCompatibleBucketCount = min(leftHandle.getMaxCompatibleBucketCount(), rightHandle.getMaxCompatibleBucketCount()); if (maxCompatibleBucketCount.isPresent() && maxCompatibleBucketCount.getAsInt() < smallerBucketCount) { // maxCompatibleBucketCount must be larger than or equal to smallerBucketCount // because the current code uses the smallerBucketCount as the common partitioning handle. return Optional.empty(); } return Optional.of(new HivePartitioningHandle( leftHandle.getBucketingVersion(), // same as rightHandle.getBucketingVersion() smallerBucketCount, leftHandle.getHiveTypes(), maxCompatibleBucketCount)); } private static OptionalInt min(OptionalInt left, OptionalInt right) { if (left.isEmpty()) { return right; } if (right.isEmpty()) { return left; } return OptionalInt.of(Math.min(left.getAsInt(), right.getAsInt())); } @Override public ConnectorTableHandle makeCompatiblePartitioning(ConnectorSession session, ConnectorTableHandle tableHandle, ConnectorPartitioningHandle partitioningHandle) { HiveTableHandle hiveTable = (HiveTableHandle) tableHandle; HivePartitioningHandle hivePartitioningHandle = (HivePartitioningHandle) partitioningHandle; checkArgument(hiveTable.getBucketHandle().isPresent(), "Hive connector only provides alternative layout for bucketed table"); HiveBucketHandle bucketHandle = hiveTable.getBucketHandle().get(); ImmutableList bucketTypes = bucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()); checkArgument( hivePartitioningHandle.getHiveTypes().equals(bucketTypes), "Types from the new PartitioningHandle (%s) does not match the TableHandle (%s)", hivePartitioningHandle.getHiveTypes(), bucketTypes); int largerBucketCount = Math.max(bucketHandle.getTableBucketCount(), hivePartitioningHandle.getBucketCount()); int smallerBucketCount = Math.min(bucketHandle.getTableBucketCount(), hivePartitioningHandle.getBucketCount()); checkArgument( largerBucketCount % smallerBucketCount == 0 && Integer.bitCount(largerBucketCount / smallerBucketCount) == 1, "The requested partitioning is not a valid alternative for the table layout"); return new HiveTableHandle( hiveTable.getSchemaName(), hiveTable.getTableName(), hiveTable.getTableParameters(), hiveTable.getPartitionColumns(), hiveTable.getPartitions(), hiveTable.getCompactEffectivePredicate(), hiveTable.getEnforcedConstraint(), Optional.of(new HiveBucketHandle( bucketHandle.getColumns(), bucketHandle.getBucketingVersion(), bucketHandle.getTableBucketCount(), hivePartitioningHandle.getBucketCount())), hiveTable.getBucketFilter(), hiveTable.getAnalyzePartitionValues(), hiveTable.getAnalyzeColumnNames(), Optional.empty()); } @VisibleForTesting static TupleDomain createPredicate(List partitionColumns, List partitions) { if (partitions.isEmpty()) { return TupleDomain.none(); } return withColumnDomains( partitionColumns.stream() .collect(toMap(identity(), column -> buildColumnDomain(column, partitions)))); } private static Domain buildColumnDomain(ColumnHandle column, List partitions) { checkArgument(!partitions.isEmpty(), "partitions cannot be empty"); boolean hasNull = false; boolean hasNaN = false; List nonNullValues = new ArrayList<>(); Type type = ((HiveColumnHandle) column).getType(); for (HivePartition partition : partitions) { NullableValue value = partition.getKeys().get(column); if (value == null) { throw new PrestoException(HIVE_UNKNOWN_ERROR, format("Partition %s does not have a value for partition column %s", partition, column)); } if (value.isNull()) { hasNull = true; } else { if (isFloatingPointNaN(type, value.getValue())) { hasNaN = true; } nonNullValues.add(value.getValue()); } } Domain domain; if (nonNullValues.isEmpty()) { domain = Domain.none(type); } else if (hasNaN) { domain = Domain.notNull(type); } else { domain = Domain.multipleValues(type, nonNullValues); } if (hasNull) { domain = domain.union(Domain.onlyNull(type)); } return domain; } @Override public Optional getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) { HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; SchemaTableName tableName = hiveTableHandle.getSchemaTableName(); Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); if (table.getStorage().getBucketProperty().isPresent()) { if (bucketedOnTimestamp(table.getStorage().getBucketProperty().get(), table)) { throw new PrestoException(NOT_SUPPORTED, "Writing to tables bucketed on timestamp not supported"); } } Optional hiveBucketHandle = getHiveBucketHandle(table, typeManager); if (hiveBucketHandle.isEmpty()) { // return preferred layout which is partitioned by partition columns List partitionColumns = table.getPartitionColumns(); if (partitionColumns.isEmpty()) { return Optional.empty(); } return Optional.of(new ConnectorNewTableLayout( partitionColumns.stream() .map(Column::getName) .collect(toImmutableList()))); } HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty() .orElseThrow(() -> new NoSuchElementException("Bucket property should be set")); if (!bucketProperty.getSortedBy().isEmpty() && !isSortedWritingEnabled(session)) { throw new PrestoException(NOT_SUPPORTED, "Writing to bucketed sorted Hive tables is disabled"); } HivePartitioningHandle partitioningHandle = new HivePartitioningHandle( hiveBucketHandle.get().getBucketingVersion(), hiveBucketHandle.get().getTableBucketCount(), hiveBucketHandle.get().getColumns().stream() .map(HiveColumnHandle::getHiveType) .collect(toList()), OptionalInt.of(hiveBucketHandle.get().getTableBucketCount())); List partitionColumns = hiveBucketHandle.get().getColumns().stream() .map(HiveColumnHandle::getName) .collect(toList()); return Optional.of(new ConnectorNewTableLayout(partitioningHandle, partitionColumns)); } @Override public Optional getNewTableLayout(ConnectorSession session, ConnectorTableMetadata tableMetadata) { validateTimestampColumns(tableMetadata.getColumns()); validatePartitionColumns(tableMetadata); validateBucketColumns(tableMetadata); validateColumns(tableMetadata); Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); if (bucketProperty.isEmpty()) { // return preferred layout which is partitioned by partition columns List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); if (partitionedBy.isEmpty()) { return Optional.empty(); } return Optional.of(new ConnectorNewTableLayout(partitionedBy)); } if (!bucketProperty.get().getSortedBy().isEmpty() && !isSortedWritingEnabled(session)) { throw new PrestoException(NOT_SUPPORTED, "Writing to bucketed sorted Hive tables is disabled"); } List bucketedBy = bucketProperty.get().getBucketedBy(); Map hiveTypeMap = tableMetadata.getColumns().stream() .collect(toMap(ColumnMetadata::getName, column -> toHiveType(column.getType()))); return Optional.of(new ConnectorNewTableLayout( new HivePartitioningHandle( bucketProperty.get().getBucketingVersion(), bucketProperty.get().getBucketCount(), bucketedBy.stream() .map(hiveTypeMap::get) .collect(toList()), OptionalInt.of(bucketProperty.get().getBucketCount())), bucketedBy)); } @Override public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(ConnectorSession session, ConnectorTableMetadata tableMetadata) { if (!isCollectColumnStatisticsOnWrite(session)) { return TableStatisticsMetadata.empty(); } if (isTransactional(tableMetadata.getProperties()).orElse(false)) { // TODO(https://github.com/prestosql/presto/issues/1956) updating table statistics for trasactional not supported right now. return TableStatisticsMetadata.empty(); } List partitionedBy = firstNonNull(getPartitionedBy(tableMetadata.getProperties()), ImmutableList.of()); return getStatisticsCollectionMetadata(tableMetadata.getColumns(), partitionedBy, Optional.empty(), false); } @Override public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) { List partitionedBy = firstNonNull(getPartitionedBy(tableMetadata.getProperties()), ImmutableList.of()); return getStatisticsCollectionMetadata(tableMetadata.getColumns(), partitionedBy, getAnalyzeColumns(tableMetadata.getProperties()), true); } private TableStatisticsMetadata getStatisticsCollectionMetadata(List columns, List partitionedBy, Optional> analyzeColumns, boolean includeRowCount) { validateTimestampColumns(columns); Set columnStatistics = columns.stream() .filter(column -> !partitionedBy.contains(column.getName())) .filter(column -> !column.isHidden()) .filter(column -> analyzeColumns.isEmpty() || analyzeColumns.get().contains(column.getName())) .map(this::getColumnStatisticMetadata) .flatMap(List::stream) .collect(toImmutableSet()); Set tableStatistics = includeRowCount ? ImmutableSet.of(ROW_COUNT) : ImmutableSet.of(); return new TableStatisticsMetadata(columnStatistics, tableStatistics, partitionedBy); } private List getColumnStatisticMetadata(ColumnMetadata columnMetadata) { return getColumnStatisticMetadata(columnMetadata.getName(), metastore.getSupportedColumnStatistics(columnMetadata.getType())); } private List getColumnStatisticMetadata(String columnName, Set statisticTypes) { return statisticTypes.stream() .map(type -> new ColumnStatisticMetadata(columnName, type)) .collect(toImmutableList()); } @Override public void createRole(ConnectorSession session, String role, Optional grantor) { accessControlMetadata.createRole(session, role, grantor.map(HivePrincipal::from)); } @Override public void dropRole(ConnectorSession session, String role) { accessControlMetadata.dropRole(session, role); } @Override public Set listRoles(ConnectorSession session) { return accessControlMetadata.listRoles(session); } @Override public Set listAllRoleGrants(ConnectorSession session, Optional> roles, Optional> grantees, OptionalLong limit) { return ImmutableSet.copyOf(accessControlMetadata.listAllRoleGrants(session, roles, grantees, limit)); } @Override public Set listRoleGrants(ConnectorSession session, PrestoPrincipal principal) { return ImmutableSet.copyOf(accessControlMetadata.listRoleGrants(session, HivePrincipal.from(principal))); } @Override public void grantRoles(ConnectorSession session, Set roles, Set grantees, boolean adminOption, Optional grantor) { accessControlMetadata.grantRoles(session, roles, HivePrincipal.from(grantees), adminOption, grantor.map(HivePrincipal::from)); } @Override public void revokeRoles(ConnectorSession session, Set roles, Set grantees, boolean adminOption, Optional grantor) { accessControlMetadata.revokeRoles(session, roles, HivePrincipal.from(grantees), adminOption, grantor.map(HivePrincipal::from)); } @Override public Set listApplicableRoles(ConnectorSession session, PrestoPrincipal principal) { return accessControlMetadata.listApplicableRoles(session, HivePrincipal.from(principal)); } @Override public Set listEnabledRoles(ConnectorSession session) { return accessControlMetadata.listEnabledRoles(session); } @Override public void grantTablePrivileges(ConnectorSession session, SchemaTableName schemaTableName, Set privileges, PrestoPrincipal grantee, boolean grantOption) { accessControlMetadata.grantTablePrivileges(session, schemaTableName, privileges, HivePrincipal.from(grantee), grantOption); } @Override public void revokeTablePrivileges(ConnectorSession session, SchemaTableName schemaTableName, Set privileges, PrestoPrincipal grantee, boolean grantOption) { accessControlMetadata.revokeTablePrivileges(session, schemaTableName, privileges, HivePrincipal.from(grantee), grantOption); } @Override public List listTablePrivileges(ConnectorSession session, SchemaTablePrefix schemaTablePrefix) { return accessControlMetadata.listTablePrivileges(session, listTables(session, schemaTablePrefix)); } private static HiveStorageFormat extractHiveStorageFormat(Table table) { StorageFormat storageFormat = table.getStorage().getStorageFormat(); String outputFormat = storageFormat.getOutputFormat(); String serde = storageFormat.getSerDe(); for (HiveStorageFormat format : HiveStorageFormat.values()) { if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serde)) { return format; } } throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serde)); } private static void validateBucketColumns(ConnectorTableMetadata tableMetadata) { Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); if (bucketProperty.isEmpty()) { return; } Set allColumns = tableMetadata.getColumns().stream() .map(ColumnMetadata::getName) .collect(toSet()); List bucketedBy = bucketProperty.get().getBucketedBy(); if (!allColumns.containsAll(bucketedBy)) { throw new PrestoException(INVALID_TABLE_PROPERTY, format("Bucketing columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(bucketedBy), ImmutableSet.copyOf(allColumns)))); } List sortedBy = bucketProperty.get().getSortedBy().stream() .map(SortingColumn::getColumnName) .collect(toImmutableList()); if (!allColumns.containsAll(sortedBy)) { throw new PrestoException(INVALID_TABLE_PROPERTY, format("Sorting columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(sortedBy), ImmutableSet.copyOf(allColumns)))); } } private static boolean isDeltaLakeTable(Table table) { return table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY) && table.getParameters().get(SPARK_TABLE_PROVIDER_KEY).toLowerCase(ENGLISH).equals(DELTA_LAKE_PROVIDER); } private static void validatePartitionColumns(ConnectorTableMetadata tableMetadata) { List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); List allColumns = tableMetadata.getColumns().stream() .map(ColumnMetadata::getName) .collect(toList()); if (!allColumns.containsAll(partitionedBy)) { throw new PrestoException(INVALID_TABLE_PROPERTY, format("Partition columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(partitionedBy), ImmutableSet.copyOf(allColumns)))); } if (allColumns.size() == partitionedBy.size()) { throw new PrestoException(INVALID_TABLE_PROPERTY, "Table contains only partition columns"); } if (!allColumns.subList(allColumns.size() - partitionedBy.size(), allColumns.size()).equals(partitionedBy)) { throw new PrestoException(HIVE_COLUMN_ORDER_MISMATCH, "Partition keys must be the last columns in the table and in the same order as the table properties: " + partitionedBy); } } private static List getColumnHandles(ConnectorTableMetadata tableMetadata, Set partitionColumnNames) { validatePartitionColumns(tableMetadata); validateBucketColumns(tableMetadata); validateColumns(tableMetadata); ImmutableList.Builder columnHandles = ImmutableList.builder(); int ordinal = 0; for (ColumnMetadata column : tableMetadata.getColumns()) { HiveColumnHandle.ColumnType columnType; if (partitionColumnNames.contains(column.getName())) { columnType = PARTITION_KEY; } else if (column.isHidden()) { columnType = SYNTHESIZED; } else { columnType = REGULAR; } columnHandles.add(createBaseColumn( column.getName(), ordinal, toHiveType(column.getType()), column.getType(), columnType, Optional.ofNullable(column.getComment()))); ordinal++; } return columnHandles.build(); } private static void validateColumns(ConnectorTableMetadata tableMetadata) { // Validate types are supported for (ColumnMetadata column : tableMetadata.getColumns()) { toHiveType(column.getType()); } if (getHiveStorageFormat(tableMetadata.getProperties()) != HiveStorageFormat.CSV) { return; } Set partitionedBy = ImmutableSet.copyOf(getPartitionedBy(tableMetadata.getProperties())); List unsupportedColumns = tableMetadata.getColumns().stream() .filter(columnMetadata -> !partitionedBy.contains(columnMetadata.getName())) .filter(columnMetadata -> !columnMetadata.getType().equals(createUnboundedVarcharType())) .collect(toImmutableList()); if (!unsupportedColumns.isEmpty()) { String joinedUnsupportedColumns = unsupportedColumns.stream() .map(columnMetadata -> format("%s %s", columnMetadata.getName(), columnMetadata.getType())) .collect(joining(", ")); throw new PrestoException(NOT_SUPPORTED, "Hive CSV storage format only supports VARCHAR (unbounded). Unsupported columns: " + joinedUnsupportedColumns); } } // temporary, until variable precision timestamps are supported on write private static void validateTimestampColumns(List columns) { for (ColumnMetadata column : columns) { Type type = column.getType(); if (type instanceof TimestampType) { if (type != TIMESTAMP_MILLIS) { throw new PrestoException(NOT_SUPPORTED, "CREATE TABLE, INSERT and ANALYZE are not supported with requested timestamp precision: " + type); } } } } private static Function columnMetadataGetter(Table table) { ImmutableList.Builder columnNames = ImmutableList.builder(); table.getPartitionColumns().stream().map(Column::getName).forEach(columnNames::add); table.getDataColumns().stream().map(Column::getName).forEach(columnNames::add); List allColumnNames = columnNames.build(); if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) { throw new PrestoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName())); } List tableColumns = table.getDataColumns(); ImmutableMap.Builder> builder = ImmutableMap.builder(); for (Column field : concat(tableColumns, table.getPartitionColumns())) { if (field.getComment().isPresent() && !field.getComment().get().equals("from deserializer")) { builder.put(field.getName(), field.getComment()); } else { builder.put(field.getName(), Optional.empty()); } } // add hidden columns builder.put(PATH_COLUMN_NAME, Optional.empty()); if (table.getStorage().getBucketProperty().isPresent()) { builder.put(BUCKET_COLUMN_NAME, Optional.empty()); } builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty()); builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty()); if (!table.getPartitionColumns().isEmpty()) { builder.put(PARTITION_COLUMN_NAME, Optional.empty()); } Map> columnComment = builder.build(); return handle -> ColumnMetadata.builder() .setName(handle.getName()) .setType(handle.getType()) .setComment(columnComment.get(handle.getName())) .setExtraInfo(Optional.ofNullable(columnExtraInfo(handle.isPartitionKey()))) .setHidden(handle.isHidden()) .build(); } @Override public void rollback() { metastore.rollback(); } @Override public void commit() { metastore.commit(); } @Override public void beginQuery(ConnectorSession session) { metastore.beginQuery(session); } @Override public void cleanupQuery(ConnectorSession session) { metastore.cleanupQuery(session); } public static Optional getSourceTableNameFromSystemTable(SchemaTableName tableName) { return Stream.of(SystemTableHandler.values()) .filter(handler -> handler.matches(tableName)) .map(handler -> handler.getSourceTableName(tableName)) .findAny(); } private enum SystemTableHandler { PARTITIONS, PROPERTIES; private final String suffix; SystemTableHandler() { this.suffix = "$" + name().toLowerCase(ENGLISH); } boolean matches(SchemaTableName table) { return table.getTableName().endsWith(suffix) && (table.getTableName().length() > suffix.length()); } SchemaTableName getSourceTableName(SchemaTableName table) { return new SchemaTableName( table.getSchemaName(), table.getTableName().substring(0, table.getTableName().length() - suffix.length())); } } @SafeVarargs private static Optional firstNonNullable(T... values) { for (T value : values) { if (value != null) { return Optional.of(value); } } return Optional.empty(); } }