All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.plugin.hive.metastore.glue.GlueHiveMetastore Maven / Gradle / Ivy

There is a newer version: 350
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.plugin.hive.metastore.glue;

import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.services.glue.AWSGlueAsync;
import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder;
import com.amazonaws.services.glue.model.AlreadyExistsException;
import com.amazonaws.services.glue.model.BatchCreatePartitionRequest;
import com.amazonaws.services.glue.model.BatchCreatePartitionResult;
import com.amazonaws.services.glue.model.BatchGetPartitionRequest;
import com.amazonaws.services.glue.model.BatchGetPartitionResult;
import com.amazonaws.services.glue.model.CreateDatabaseRequest;
import com.amazonaws.services.glue.model.CreateTableRequest;
import com.amazonaws.services.glue.model.DatabaseInput;
import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
import com.amazonaws.services.glue.model.DeletePartitionRequest;
import com.amazonaws.services.glue.model.DeleteTableRequest;
import com.amazonaws.services.glue.model.EntityNotFoundException;
import com.amazonaws.services.glue.model.ErrorDetail;
import com.amazonaws.services.glue.model.GetDatabaseRequest;
import com.amazonaws.services.glue.model.GetDatabaseResult;
import com.amazonaws.services.glue.model.GetDatabasesRequest;
import com.amazonaws.services.glue.model.GetDatabasesResult;
import com.amazonaws.services.glue.model.GetPartitionRequest;
import com.amazonaws.services.glue.model.GetPartitionResult;
import com.amazonaws.services.glue.model.GetPartitionsRequest;
import com.amazonaws.services.glue.model.GetPartitionsResult;
import com.amazonaws.services.glue.model.GetTableRequest;
import com.amazonaws.services.glue.model.GetTableResult;
import com.amazonaws.services.glue.model.GetTablesRequest;
import com.amazonaws.services.glue.model.GetTablesResult;
import com.amazonaws.services.glue.model.PartitionError;
import com.amazonaws.services.glue.model.PartitionInput;
import com.amazonaws.services.glue.model.PartitionValueList;
import com.amazonaws.services.glue.model.Segment;
import com.amazonaws.services.glue.model.TableInput;
import com.amazonaws.services.glue.model.UpdateDatabaseRequest;
import com.amazonaws.services.glue.model.UpdatePartitionRequest;
import com.amazonaws.services.glue.model.UpdateTableRequest;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import io.airlift.log.Logger;
import io.prestosql.plugin.hive.HdfsEnvironment;
import io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext;
import io.prestosql.plugin.hive.HiveType;
import io.prestosql.plugin.hive.PartitionNotFoundException;
import io.prestosql.plugin.hive.PartitionStatistics;
import io.prestosql.plugin.hive.SchemaAlreadyExistsException;
import io.prestosql.plugin.hive.TableAlreadyExistsException;
import io.prestosql.plugin.hive.authentication.HiveIdentity;
import io.prestosql.plugin.hive.metastore.Column;
import io.prestosql.plugin.hive.metastore.Database;
import io.prestosql.plugin.hive.metastore.HiveMetastore;
import io.prestosql.plugin.hive.metastore.HivePrincipal;
import io.prestosql.plugin.hive.metastore.HivePrivilegeInfo;
import io.prestosql.plugin.hive.metastore.MetastoreUtil;
import io.prestosql.plugin.hive.metastore.Partition;
import io.prestosql.plugin.hive.metastore.PartitionWithStatistics;
import io.prestosql.plugin.hive.metastore.PrincipalPrivileges;
import io.prestosql.plugin.hive.metastore.Table;
import io.prestosql.plugin.hive.metastore.glue.converter.GlueInputConverter;
import io.prestosql.plugin.hive.metastore.glue.converter.GlueToPrestoConverter;
import io.prestosql.plugin.hive.util.HiveUtil;
import io.prestosql.plugin.hive.util.HiveWriteUtils;
import io.prestosql.spi.PrestoException;
import io.prestosql.spi.connector.ColumnNotFoundException;
import io.prestosql.spi.connector.SchemaNotFoundException;
import io.prestosql.spi.connector.SchemaTableName;
import io.prestosql.spi.connector.TableNotFoundException;
import io.prestosql.spi.predicate.TupleDomain;
import io.prestosql.spi.security.ConnectorIdentity;
import io.prestosql.spi.security.RoleGrant;
import io.prestosql.spi.statistics.ColumnStatisticType;
import io.prestosql.spi.type.Type;
import org.apache.hadoop.fs.Path;
import org.weakref.jmx.Flatten;
import org.weakref.jmx.Managed;

import javax.annotation.Nullable;
import javax.inject.Inject;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.function.Function;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.Comparators.lexicographical;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY;
import static io.prestosql.plugin.hive.aws.AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.makePartitionName;
import static io.prestosql.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics;
import static io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters;
import static io.prestosql.plugin.hive.util.HiveUtil.toPartitionValues;
import static io.prestosql.spi.StandardErrorCode.ALREADY_EXISTS;
import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED;
import static io.prestosql.spi.security.PrincipalType.USER;
import static java.lang.String.format;
import static java.util.Comparator.comparing;
import static java.util.Objects.requireNonNull;
import static java.util.function.UnaryOperator.identity;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
import static org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW;

public class GlueHiveMetastore
        implements HiveMetastore
{
    private static final Logger log = Logger.get(GlueHiveMetastore.class);

    private static final String PUBLIC_ROLE_NAME = "public";
    private static final String DEFAULT_METASTORE_USER = "presto";
    private static final int BATCH_GET_PARTITION_MAX_PAGE_SIZE = 1000;
    private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100;
    private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000;
    private static final Comparator PARTITION_COMPARATOR =
            comparing(Partition::getValues, lexicographical(String.CASE_INSENSITIVE_ORDER));

    private final HdfsEnvironment hdfsEnvironment;
    private final HdfsContext hdfsContext;
    private final AWSGlueAsync glueClient;
    private final Optional defaultDir;
    private final String catalogId;
    private final int partitionSegments;
    private final Executor executor;
    private final GlueMetastoreStats stats = new GlueMetastoreStats();
    private final GlueColumnStatisticsProvider columnStatisticsProvider;
    private final boolean assumeCanonicalPartitionKeys;

    @Inject
    public GlueHiveMetastore(
            HdfsEnvironment hdfsEnvironment,
            GlueHiveMetastoreConfig glueConfig,
            GlueColumnStatisticsProvider columnStatisticsProvider,
            @ForGlueHiveMetastore Executor executor,
            @ForGlueHiveMetastore Optional requestHandler)
    {
        requireNonNull(glueConfig, "glueConfig is null");
        this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
        this.hdfsContext = new HdfsContext(ConnectorIdentity.ofUser(DEFAULT_METASTORE_USER));
        this.glueClient = createAsyncGlueClient(glueConfig, requestHandler);
        this.defaultDir = glueConfig.getDefaultWarehouseDir();
        this.catalogId = glueConfig.getCatalogId().orElse(null);
        this.partitionSegments = glueConfig.getPartitionSegments();
        this.executor = requireNonNull(executor, "executor is null");
        this.columnStatisticsProvider = requireNonNull(columnStatisticsProvider, "columnStatisticsProvider is null");
        this.assumeCanonicalPartitionKeys = glueConfig.isAssumeCanonicalPartitionKeys();
    }

    private static AWSGlueAsync createAsyncGlueClient(GlueHiveMetastoreConfig config, Optional requestHandler)
    {
        ClientConfiguration clientConfig = new ClientConfiguration()
                .withMaxConnections(config.getMaxGlueConnections())
                .withMaxErrorRetry(config.getMaxGlueErrorRetries());
        AWSGlueAsyncClientBuilder asyncGlueClientBuilder = AWSGlueAsyncClientBuilder.standard()
                .withClientConfiguration(clientConfig);

        requestHandler.ifPresent(asyncGlueClientBuilder::setRequestHandlers);

        if (config.getGlueEndpointUrl().isPresent()) {
            checkArgument(config.getGlueRegion().isPresent(), "Glue region must be set when Glue endpoint URL is set");
            asyncGlueClientBuilder.setEndpointConfiguration(new EndpointConfiguration(
                    config.getGlueEndpointUrl().get(),
                    config.getGlueRegion().get()));
        }
        else if (config.getGlueRegion().isPresent()) {
            asyncGlueClientBuilder.setRegion(config.getGlueRegion().get());
        }
        else if (config.getPinGlueClientToCurrentRegion()) {
            asyncGlueClientBuilder.setRegion(getCurrentRegionFromEC2Metadata().getName());
        }

        asyncGlueClientBuilder.setCredentials(getAwsCredentialsProvider(config));

        return asyncGlueClientBuilder.build();
    }

    private static AWSCredentialsProvider getAwsCredentialsProvider(GlueHiveMetastoreConfig config)
    {
        if (config.getAwsAccessKey().isPresent() && config.getAwsSecretKey().isPresent()) {
            return new AWSStaticCredentialsProvider(
                    new BasicAWSCredentials(config.getAwsAccessKey().get(), config.getAwsSecretKey().get()));
        }
        if (config.getIamRole().isPresent()) {
            return new STSAssumeRoleSessionCredentialsProvider
                    .Builder(config.getIamRole().get(), "presto-session")
                    .withExternalId(config.getExternalId().orElse(null))
                    .build();
        }
        if (config.getAwsCredentialsProvider().isPresent()) {
            return getCustomAWSCredentialsProvider(config.getAwsCredentialsProvider().get());
        }
        return DefaultAWSCredentialsProviderChain.getInstance();
    }

    private static AWSCredentialsProvider getCustomAWSCredentialsProvider(String providerClass)
    {
        try {
            Object instance = Class.forName(providerClass).getConstructor().newInstance();
            if (!(instance instanceof AWSCredentialsProvider)) {
                throw new RuntimeException("Invalid credentials provider class: " + instance.getClass().getName());
            }
            return (AWSCredentialsProvider) instance;
        }
        catch (ReflectiveOperationException e) {
            throw new RuntimeException(format("Error creating an instance of %s", providerClass), e);
        }
    }

    @Managed
    @Flatten
    public GlueMetastoreStats getStats()
    {
        return stats;
    }

    @Override
    public Optional getDatabase(String databaseName)
    {
        try {
            GetDatabaseResult result = stats.getGetDatabase().call(() ->
                    glueClient.getDatabase(new GetDatabaseRequest().withCatalogId(catalogId).withName(databaseName)));
            return Optional.of(GlueToPrestoConverter.convertDatabase(result.getDatabase()));
        }
        catch (EntityNotFoundException e) {
            return Optional.empty();
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public List getAllDatabases()
    {
        try {
            return stats.getGetAllDatabases().call(() -> {
                List databaseNames = new ArrayList<>();
                String nextToken = null;

                do {
                    GetDatabasesResult result = glueClient.getDatabases(new GetDatabasesRequest().withCatalogId(catalogId).withNextToken(nextToken));
                    nextToken = result.getNextToken();
                    result.getDatabaseList().forEach(database -> databaseNames.add(database.getName()));
                }
                while (nextToken != null);

                return databaseNames;
            });
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public Optional getTable(HiveIdentity identity, String databaseName, String tableName)
    {
        try {
            GetTableResult result = stats.getGetTable().call(() ->
                    glueClient.getTable(new GetTableRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(databaseName)
                            .withName(tableName)));
            return Optional.of(GlueToPrestoConverter.convertTable(result.getTable(), databaseName));
        }
        catch (EntityNotFoundException e) {
            return Optional.empty();
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public Set getSupportedColumnStatistics(Type type)
    {
        return columnStatisticsProvider.getSupportedColumnStatistics(type);
    }

    private Table getExistingTable(HiveIdentity identity, String databaseName, String tableName)
    {
        return getTable(identity, databaseName, tableName)
                .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    }

    @Override
    public PartitionStatistics getTableStatistics(HiveIdentity identity, Table table)
    {
        return new PartitionStatistics(getHiveBasicStatistics(table.getParameters()), columnStatisticsProvider.getTableColumnStatistics(table));
    }

    @Override
    public Map getPartitionStatistics(HiveIdentity identity, Table table, List partitions)
    {
        return partitions.stream().collect(toImmutableMap(partition -> makePartitionName(table, partition), this::getPartitionStatistics));
    }

    private PartitionStatistics getPartitionStatistics(Partition partition)
    {
        return new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatisticsProvider.getPartitionColumnStatistics(partition));
    }

    @Override
    public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, Function update)
    {
        Table table = getExistingTable(identity, databaseName, tableName);
        PartitionStatistics currentStatistics = getTableStatistics(identity, table);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);

        try {
            TableInput tableInput = GlueInputConverter.convertTable(table);
            tableInput.setParameters(updateStatisticsParameters(table.getParameters(), updatedStatistics.getBasicStatistics()));
            columnStatisticsProvider.updateTableColumnStatistics(tableInput, updatedStatistics.getColumnStatistics());
            glueClient.updateTable(new UpdateTableRequest()
                    .withCatalogId(catalogId)
                    .withDatabaseName(databaseName)
                    .withTableInput(tableInput));
        }
        catch (EntityNotFoundException e) {
            throw new TableNotFoundException(new SchemaTableName(databaseName, tableName));
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void updatePartitionStatistics(HiveIdentity identity, Table table, String partitionName, Function update)
    {
        List partitionValues = toPartitionValues(partitionName);
        Partition partition = getPartition(identity, table, partitionValues)
                .orElseThrow(() -> new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partitionName));

        PartitionStatistics currentStatistics = getPartitionStatistics(partition);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);

        try {
            PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
            partitionInput.setParameters(updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics()));
            columnStatisticsProvider.updatePartitionStatistics(partitionInput, updatedStatistics.getColumnStatistics());
            glueClient.updatePartition(new UpdatePartitionRequest()
                    .withCatalogId(catalogId)
                    .withDatabaseName(table.getDatabaseName())
                    .withTableName(table.getTableName())
                    .withPartitionValueList(partition.getValues())
                    .withPartitionInput(partitionInput));
        }
        catch (EntityNotFoundException e) {
            throw new PartitionNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionValues);
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public List getAllTables(String databaseName)
    {
        try {
            return stats.getGetAllTables().call(() -> {
                List tableNames = new ArrayList<>();
                String nextToken = null;

                do {
                    GetTablesResult result = glueClient.getTables(new GetTablesRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(databaseName)
                            .withNextToken(nextToken));
                    result.getTableList().forEach(table -> tableNames.add(table.getName()));
                    nextToken = result.getNextToken();
                }
                while (nextToken != null);

                return tableNames;
            });
        }
        catch (EntityNotFoundException e) {
            // database does not exist
            return ImmutableList.of();
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public synchronized List getTablesWithParameter(String databaseName, String parameterKey, String parameterValue)
    {
        // TODO
        throw new UnsupportedOperationException("getTablesWithParameter for GlueHiveMetastore is not implemented");
    }

    @Override
    public List getAllViews(String databaseName)
    {
        try {
            return stats.getGetAllViews().call(() -> {
                List views = new ArrayList<>();
                String nextToken = null;

                do {
                    GetTablesResult result = glueClient.getTables(new GetTablesRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(databaseName)
                            .withNextToken(nextToken));
                    result.getTableList().stream()
                            .filter(table -> VIRTUAL_VIEW.name().equals(table.getTableType()))
                            .forEach(table -> views.add(table.getName()));
                    nextToken = result.getNextToken();
                }
                while (nextToken != null);

                return views;
            });
        }
        catch (EntityNotFoundException e) {
            // database does not exist
            return ImmutableList.of();
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void createDatabase(HiveIdentity identity, Database database)
    {
        if (database.getLocation().isEmpty() && defaultDir.isPresent()) {
            String databaseLocation = new Path(defaultDir.get(), database.getDatabaseName()).toString();
            database = Database.builder(database)
                    .setLocation(Optional.of(databaseLocation))
                    .build();
        }

        try {
            DatabaseInput databaseInput = GlueInputConverter.convertDatabase(database);
            stats.getCreateDatabase().call(() ->
                    glueClient.createDatabase(new CreateDatabaseRequest().withCatalogId(catalogId).withDatabaseInput(databaseInput)));
        }
        catch (AlreadyExistsException e) {
            throw new SchemaAlreadyExistsException(database.getDatabaseName());
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }

        if (database.getLocation().isPresent()) {
            HiveWriteUtils.createDirectory(hdfsContext, hdfsEnvironment, new Path(database.getLocation().get()));
        }
    }

    @Override
    public void dropDatabase(HiveIdentity identity, String databaseName)
    {
        try {
            stats.getDropDatabase().call(() ->
                    glueClient.deleteDatabase(new DeleteDatabaseRequest().withCatalogId(catalogId).withName(databaseName)));
        }
        catch (EntityNotFoundException e) {
            throw new SchemaNotFoundException(databaseName);
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void renameDatabase(HiveIdentity identity, String databaseName, String newDatabaseName)
    {
        try {
            Database database = getDatabase(databaseName).orElseThrow(() -> new SchemaNotFoundException(databaseName));
            DatabaseInput renamedDatabase = GlueInputConverter.convertDatabase(database).withName(newDatabaseName);
            stats.getRenameDatabase().call(() ->
                    glueClient.updateDatabase(new UpdateDatabaseRequest()
                            .withCatalogId(catalogId)
                            .withName(databaseName)
                            .withDatabaseInput(renamedDatabase)));
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void setDatabaseOwner(HiveIdentity identity, String databaseName, HivePrincipal principal)
    {
        throw new PrestoException(NOT_SUPPORTED, "setting the database owner is not supported by Glue");
    }

    @Override
    public void createTable(HiveIdentity identity, Table table, PrincipalPrivileges principalPrivileges)
    {
        try {
            TableInput input = GlueInputConverter.convertTable(table);
            stats.getCreateTable().call(() ->
                    glueClient.createTable(new CreateTableRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(table.getDatabaseName())
                            .withTableInput(input)));
        }
        catch (AlreadyExistsException e) {
            throw new TableAlreadyExistsException(new SchemaTableName(table.getDatabaseName(), table.getTableName()));
        }
        catch (EntityNotFoundException e) {
            throw new SchemaNotFoundException(table.getDatabaseName());
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void dropTable(HiveIdentity identity, String databaseName, String tableName, boolean deleteData)
    {
        Table table = getExistingTable(identity, databaseName, tableName);

        try {
            stats.getDropTable().call(() ->
                    glueClient.deleteTable(new DeleteTableRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(databaseName)
                            .withName(tableName)));
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }

        String tableLocation = table.getStorage().getLocation();
        if (deleteData && isManagedTable(table) && !isNullOrEmpty(tableLocation)) {
            deleteDir(hdfsContext, hdfsEnvironment, new Path(tableLocation), true);
        }
    }

    private static boolean isManagedTable(Table table)
    {
        return table.getTableType().equals(MANAGED_TABLE.name());
    }

    private static void deleteDir(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path path, boolean recursive)
    {
        try {
            hdfsEnvironment.getFileSystem(context, path).delete(path, recursive);
        }
        catch (Exception e) {
            // don't fail if unable to delete path
            log.warn(e, "Failed to delete path: " + path.toString());
        }
    }

    @Override
    public void replaceTable(HiveIdentity identity, String databaseName, String tableName, Table newTable, PrincipalPrivileges principalPrivileges)
    {
        try {
            TableInput newTableInput = GlueInputConverter.convertTable(newTable);
            stats.getReplaceTable().call(() ->
                    glueClient.updateTable(new UpdateTableRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(databaseName)
                            .withTableInput(newTableInput)));
        }
        catch (EntityNotFoundException e) {
            throw new TableNotFoundException(new SchemaTableName(databaseName, tableName));
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public void renameTable(HiveIdentity identity, String databaseName, String tableName, String newDatabaseName, String newTableName)
    {
        throw new PrestoException(NOT_SUPPORTED, "Table rename is not yet supported by Glue service");
    }

    @Override
    public void commentTable(HiveIdentity identity, String databaseName, String tableName, Optional comment)
    {
        throw new PrestoException(NOT_SUPPORTED, "Table comment is not yet supported by Glue service");
    }

    @Override
    public void commentColumn(HiveIdentity identity, String databaseName, String tableName, String columnName, Optional comment)
    {
        throw new PrestoException(NOT_SUPPORTED, "Column comment is not yet supported by Glue service");
    }

    @Override
    public void addColumn(HiveIdentity identity, String databaseName, String tableName, String columnName, HiveType columnType, String columnComment)
    {
        Table oldTable = getExistingTable(identity, databaseName, tableName);
        Table newTable = Table.builder(oldTable)
                .addDataColumn(new Column(columnName, columnType, Optional.ofNullable(columnComment)))
                .build();
        replaceTable(identity, databaseName, tableName, newTable, null);
    }

    @Override
    public void renameColumn(HiveIdentity identity, String databaseName, String tableName, String oldColumnName, String newColumnName)
    {
        Table oldTable = getExistingTable(identity, databaseName, tableName);
        if (oldTable.getPartitionColumns().stream().anyMatch(c -> c.getName().equals(oldColumnName))) {
            throw new PrestoException(NOT_SUPPORTED, "Renaming partition columns is not supported");
        }

        ImmutableList.Builder newDataColumns = ImmutableList.builder();
        for (Column column : oldTable.getDataColumns()) {
            if (column.getName().equals(oldColumnName)) {
                newDataColumns.add(new Column(newColumnName, column.getType(), column.getComment()));
            }
            else {
                newDataColumns.add(column);
            }
        }

        Table newTable = Table.builder(oldTable)
                .setDataColumns(newDataColumns.build())
                .build();
        replaceTable(identity, databaseName, tableName, newTable, null);
    }

    @Override
    public void dropColumn(HiveIdentity identity, String databaseName, String tableName, String columnName)
    {
        verifyCanDropColumn(this, identity, databaseName, tableName, columnName);
        Table oldTable = getExistingTable(identity, databaseName, tableName);

        if (oldTable.getColumn(columnName).isEmpty()) {
            SchemaTableName name = new SchemaTableName(databaseName, tableName);
            throw new ColumnNotFoundException(name, columnName);
        }

        ImmutableList.Builder newDataColumns = ImmutableList.builder();
        oldTable.getDataColumns().stream()
                .filter(fieldSchema -> !fieldSchema.getName().equals(columnName))
                .forEach(newDataColumns::add);

        Table newTable = Table.builder(oldTable)
                .setDataColumns(newDataColumns.build())
                .build();
        replaceTable(identity, databaseName, tableName, newTable, null);
    }

    @Override
    public Optional getPartition(HiveIdentity identity, Table table, List partitionValues)
    {
        try {
            GetPartitionResult result = stats.getGetPartition().call(() ->
                    glueClient.getPartition(new GetPartitionRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(table.getDatabaseName())
                            .withTableName(table.getTableName())
                            .withPartitionValues(partitionValues)));
            return Optional.of(GlueToPrestoConverter.convertPartition(result.getPartition(), table.getParameters()));
        }
        catch (EntityNotFoundException e) {
            return Optional.empty();
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    @Override
    public Optional> getPartitionNamesByFilter(
            HiveIdentity identity,
            String databaseName,
            String tableName,
            List columnNames,
            TupleDomain partitionKeysFilter)
    {
        if (partitionKeysFilter.isNone()) {
            return Optional.of(ImmutableList.of());
        }
        if (MetastoreUtil.isPartitionKeyFilterFalse(partitionKeysFilter)) {
            return Optional.of(ImmutableList.of());
        }

        Table table = getExistingTable(identity, databaseName, tableName);
        String expression = GlueExpressionUtil.buildGlueExpression(columnNames, partitionKeysFilter, assumeCanonicalPartitionKeys);
        List partitions = getPartitions(table, expression);
        return Optional.of(buildPartitionNames(table.getPartitionColumns(), partitions));
    }

    private List getPartitions(Table table, String expression)
    {
        if (partitionSegments == 1) {
            return getPartitions(table, expression, null);
        }

        // Do parallel partition fetch.
        CompletionService> completionService = new ExecutorCompletionService<>(executor);
        for (int i = 0; i < partitionSegments; i++) {
            Segment segment = new Segment().withSegmentNumber(i).withTotalSegments(partitionSegments);
            completionService.submit(() -> getPartitions(table, expression, segment));
        }

        List partitions = new ArrayList<>();
        try {
            for (int i = 0; i < partitionSegments; i++) {
                Future> futurePartitions = completionService.take();
                partitions.addAll(futurePartitions.get());
            }
        }
        catch (ExecutionException | InterruptedException e) {
            if (e instanceof InterruptedException) {
                Thread.currentThread().interrupt();
            }
            throw new PrestoException(HIVE_METASTORE_ERROR, "Failed to fetch partitions from Glue Data Catalog", e);
        }

        partitions.sort(PARTITION_COMPARATOR);
        return partitions;
    }

    private List getPartitions(Table table, String expression, @Nullable Segment segment)
    {
        try {
            return stats.getGetPartitions().call(() -> {
                List partitions = new ArrayList<>();
                String nextToken = null;

                do {
                    GetPartitionsResult result = glueClient.getPartitions(new GetPartitionsRequest()
                            .withCatalogId(catalogId)
                            .withDatabaseName(table.getDatabaseName())
                            .withTableName(table.getTableName())
                            .withExpression(expression)
                            .withSegment(segment)
                            .withNextToken(nextToken)
                            .withMaxResults(AWS_GLUE_GET_PARTITIONS_MAX_RESULTS));
                    result.getPartitions()
                            .forEach(partition -> partitions.add(GlueToPrestoConverter.convertPartition(partition, table.getParameters())));
                    nextToken = result.getNextToken();
                }
                while (nextToken != null);

                return partitions;
            });
        }
        catch (AmazonServiceException e) {
            throw new PrestoException(HIVE_METASTORE_ERROR, e);
        }
    }

    private static List buildPartitionNames(List partitionColumns, List partitions)
    {
        return partitions.stream()
                .map(partition -> makePartitionName(partitionColumns, partition.getValues()))
                .collect(toList());
    }

    /**
     * 
     * Ex: Partition keys = ['a', 'b']
     *     Partition names = ['a=1/b=2', 'a=2/b=2']
     * 
* * @param partitionNames List of full partition names * @return Mapping of partition name to partition object */ @Override public Map> getPartitionsByNames(HiveIdentity identity, Table table, List partitionNames) { return stats.getGetPartitionByName().call(() -> getPartitionsByNames(table, partitionNames)); } private Map> getPartitionsByNames(Table table, List partitionNames) { requireNonNull(partitionNames, "partitionNames is null"); if (partitionNames.isEmpty()) { return ImmutableMap.of(); } List partitions = batchGetPartition(table, partitionNames); Map> partitionNameToPartitionValuesMap = partitionNames.stream() .collect(toMap(identity(), HiveUtil::toPartitionValues)); Map, Partition> partitionValuesToPartitionMap = partitions.stream() .collect(toMap(Partition::getValues, identity())); ImmutableMap.Builder> resultBuilder = ImmutableMap.builder(); for (Entry> entry : partitionNameToPartitionValuesMap.entrySet()) { Partition partition = partitionValuesToPartitionMap.get(entry.getValue()); resultBuilder.put(entry.getKey(), Optional.ofNullable(partition)); } return resultBuilder.build(); } private List batchGetPartition(Table table, List partitionNames) { try { List partitionValueLists = partitionNames.stream() .map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName))).collect(toList()); List> batchedPartitionValueLists = Lists.partition(partitionValueLists, BATCH_GET_PARTITION_MAX_PAGE_SIZE); List> batchGetPartitionFutures = new ArrayList<>(); List result = new ArrayList<>(); for (List partitions : batchedPartitionValueLists) { batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest() .withCatalogId(catalogId) .withDatabaseName(table.getDatabaseName()) .withTableName(table.getTableName()) .withPartitionsToGet(partitions))); } for (Future future : batchGetPartitionFutures) { future.get().getPartitions() .forEach(partition -> result.add(GlueToPrestoConverter.convertPartition(partition, table.getParameters()))); } return result; } catch (AmazonServiceException | InterruptedException | ExecutionException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } throw new PrestoException(HIVE_METASTORE_ERROR, e); } } @Override public void addPartitions(HiveIdentity identity, String databaseName, String tableName, List partitions) { try { stats.getAddPartitions().call(() -> { List> batchedPartitions = Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE); List> futures = new ArrayList<>(); for (List partitionBatch : batchedPartitions) { List partitionInputs = partitionBatch.stream() .map(partition -> GlueInputConverter.convertPartition(partition, columnStatisticsProvider)) .collect(toList()); futures.add(glueClient.batchCreatePartitionAsync(new BatchCreatePartitionRequest() .withCatalogId(catalogId) .withDatabaseName(databaseName) .withTableName(tableName) .withPartitionInputList(partitionInputs))); } for (Future future : futures) { try { BatchCreatePartitionResult result = future.get(); propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new PrestoException(HIVE_METASTORE_ERROR, e); } } return null; }); } catch (AmazonServiceException | ExecutionException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } } private static void propagatePartitionErrorToPrestoException(String databaseName, String tableName, List partitionErrors) { if (partitionErrors != null && !partitionErrors.isEmpty()) { ErrorDetail errorDetail = partitionErrors.get(0).getErrorDetail(); String glueExceptionCode = errorDetail.getErrorCode(); switch (glueExceptionCode) { case "AlreadyExistsException": throw new PrestoException(ALREADY_EXISTS, errorDetail.getErrorMessage()); case "EntityNotFoundException": throw new TableNotFoundException(new SchemaTableName(databaseName, tableName), errorDetail.getErrorMessage()); default: throw new PrestoException(HIVE_METASTORE_ERROR, errorDetail.getErrorCode() + ": " + errorDetail.getErrorMessage()); } } } @Override public void dropPartition(HiveIdentity identity, String databaseName, String tableName, List parts, boolean deleteData) { Table table = getExistingTable(identity, databaseName, tableName); Partition partition = getPartition(identity, table, parts) .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), parts)); try { stats.getDropPartition().call(() -> glueClient.deletePartition(new DeletePartitionRequest() .withCatalogId(catalogId) .withDatabaseName(databaseName) .withTableName(tableName) .withPartitionValues(parts))); } catch (AmazonServiceException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } String partLocation = partition.getStorage().getLocation(); if (deleteData && isManagedTable(table) && !isNullOrEmpty(partLocation)) { deleteDir(hdfsContext, hdfsEnvironment, new Path(partLocation), true); } } @Override public void alterPartition(HiveIdentity identity, String databaseName, String tableName, PartitionWithStatistics partition) { try { PartitionInput newPartition = GlueInputConverter.convertPartition(partition, columnStatisticsProvider); stats.getAlterPartition().call(() -> glueClient.updatePartition(new UpdatePartitionRequest() .withCatalogId(catalogId) .withDatabaseName(databaseName) .withTableName(tableName) .withPartitionInput(newPartition) .withPartitionValueList(partition.getPartition().getValues()))); } catch (EntityNotFoundException e) { throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partition.getPartition().getValues()); } catch (AmazonServiceException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } } @Override public void createRole(String role, String grantor) { throw new PrestoException(NOT_SUPPORTED, "createRole is not supported by Glue"); } @Override public void dropRole(String role) { throw new PrestoException(NOT_SUPPORTED, "dropRole is not supported by Glue"); } @Override public Set listRoles() { return ImmutableSet.of(PUBLIC_ROLE_NAME); } @Override public void grantRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) { throw new PrestoException(NOT_SUPPORTED, "grantRoles is not supported by Glue"); } @Override public void revokeRoles(Set roles, Set grantees, boolean adminOption, HivePrincipal grantor) { throw new PrestoException(NOT_SUPPORTED, "revokeRoles is not supported by Glue"); } @Override public Set listGrantedPrincipals(String role) { throw new PrestoException(NOT_SUPPORTED, "listPrincipals is not supported by Glue"); } @Override public Set listRoleGrants(HivePrincipal principal) { if (principal.getType() == USER) { return ImmutableSet.of(new RoleGrant(principal.toPrestoPrincipal(), PUBLIC_ROLE_NAME, false)); } return ImmutableSet.of(); } @Override public void grantTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, Set privileges) { throw new PrestoException(NOT_SUPPORTED, "grantTablePrivileges is not supported by Glue"); } @Override public void revokeTablePrivileges(String databaseName, String tableName, String tableOwner, HivePrincipal grantee, Set privileges) { throw new PrestoException(NOT_SUPPORTED, "revokeTablePrivileges is not supported by Glue"); } @Override public Set listTablePrivileges(String databaseName, String tableName, String tableOwner, Optional principal) { return ImmutableSet.of(); } @Override public boolean isImpersonationEnabled() { return false; } }