All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prestosql.plugin.hive.metastore.glue.converter.GlueToPrestoConverter Maven / Gradle / Ivy

There is a newer version: 350
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.prestosql.plugin.hive.metastore.glue.converter;

import com.amazonaws.services.glue.model.SerDeInfo;
import com.amazonaws.services.glue.model.StorageDescriptor;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.prestosql.plugin.hive.HiveBucketProperty;
import io.prestosql.plugin.hive.HiveType;
import io.prestosql.plugin.hive.metastore.Column;
import io.prestosql.plugin.hive.metastore.Database;
import io.prestosql.plugin.hive.metastore.Partition;
import io.prestosql.plugin.hive.metastore.SortingColumn;
import io.prestosql.plugin.hive.metastore.SortingColumn.Order;
import io.prestosql.plugin.hive.metastore.Storage;
import io.prestosql.plugin.hive.metastore.StorageFormat;
import io.prestosql.plugin.hive.metastore.Table;
import io.prestosql.plugin.hive.util.HiveBucketing;
import io.prestosql.plugin.hive.util.HiveBucketing.BucketingVersion;
import io.prestosql.spi.PrestoException;
import io.prestosql.spi.security.PrincipalType;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;

import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Strings.nullToEmpty;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;

public final class GlueToPrestoConverter
{
    private static final String PUBLIC_OWNER = "PUBLIC";

    private GlueToPrestoConverter() {}

    public static Database convertDatabase(com.amazonaws.services.glue.model.Database glueDb)
    {
        return Database.builder()
                .setDatabaseName(glueDb.getName())
                .setLocation(Optional.ofNullable(glueDb.getLocationUri()))
                .setComment(Optional.ofNullable(glueDb.getDescription()))
                .setParameters(firstNonNull(glueDb.getParameters(), ImmutableMap.of()))
                .setOwnerName(PUBLIC_OWNER)
                .setOwnerType(PrincipalType.ROLE)
                .build();
    }

    public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName)
    {
        requireNonNull(glueTable.getStorageDescriptor(), "Table StorageDescriptor is null");
        Map tableParameters = firstNonNull(glueTable.getParameters(), ImmutableMap.of());
        StorageDescriptor sd = glueTable.getStorageDescriptor();

        Table.Builder tableBuilder = Table.builder()
                .setDatabaseName(dbName)
                .setTableName(glueTable.getName())
                .setOwner(nullToEmpty(glueTable.getOwner()))
                // Athena treats missing table type as EXTERNAL_TABLE.
                .setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name()))
                .setDataColumns(sd.getColumns().stream()
                        .map(GlueToPrestoConverter::convertColumn)
                        .collect(toImmutableList()))
                .setParameters(tableParameters)
                .setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText()))
                .setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));

        if (glueTable.getPartitionKeys() != null) {
            tableBuilder.setPartitionColumns(glueTable.getPartitionKeys().stream()
                    .map(GlueToPrestoConverter::convertColumn)
                    .collect(toImmutableList()));
        }
        else {
            tableBuilder.setPartitionColumns(new ArrayList<>());
        }

        setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters);
        return tableBuilder.build();
    }

    private static void setStorageBuilder(StorageDescriptor sd, Storage.Builder storageBuilder, Map tableParameters)
    {
        requireNonNull(sd.getSerdeInfo(), "StorageDescriptor SerDeInfo is null");
        SerDeInfo serdeInfo = sd.getSerdeInfo();

        Optional bucketProperty = Optional.empty();
        if (sd.getNumberOfBuckets() > 0) {
            if (isNullOrEmpty(sd.getBucketColumns())) {
                throw new PrestoException(HIVE_INVALID_METADATA, "Table/partition metadata has 'numBuckets' set, but 'bucketCols' is not set");
            }
            List sortedBy = ImmutableList.of();
            if (!isNullOrEmpty(sd.getSortColumns())) {
                sortedBy = sd.getSortColumns().stream()
                        .map(column -> new SortingColumn(
                                column.getColumn(),
                                Order.fromMetastoreApiOrder(column.getSortOrder(), "unknown")))
                        .collect(toImmutableList());
            }
            BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
            bucketProperty = Optional.of(new HiveBucketProperty(sd.getBucketColumns(), bucketingVersion, sd.getNumberOfBuckets(), sortedBy));
        }

        storageBuilder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLibrary(), sd.getInputFormat(), sd.getOutputFormat()))
                .setLocation(nullToEmpty(sd.getLocation()))
                .setBucketProperty(bucketProperty)
                .setSkewed(sd.getSkewedInfo() != null && !isNullOrEmpty(sd.getSkewedInfo().getSkewedColumnNames()))
                .setSerdeParameters(firstNonNull(serdeInfo.getParameters(), ImmutableMap.of()))
                .build();
    }

    private static Column convertColumn(com.amazonaws.services.glue.model.Column glueColumn)
    {
        return new Column(glueColumn.getName(), HiveType.valueOf(glueColumn.getType().toLowerCase(Locale.ENGLISH)), Optional.ofNullable(glueColumn.getComment()));
    }

    public static Partition convertPartition(com.amazonaws.services.glue.model.Partition gluePartition, Map tableParameters)
    {
        requireNonNull(gluePartition.getStorageDescriptor(), "Partition StorageDescriptor is null");
        StorageDescriptor sd = gluePartition.getStorageDescriptor();

        Partition.Builder partitionBuilder = Partition.builder()
                .setDatabaseName(gluePartition.getDatabaseName())
                .setTableName(gluePartition.getTableName())
                .setValues(gluePartition.getValues())
                .setColumns(sd.getColumns().stream()
                        .map(GlueToPrestoConverter::convertColumn)
                        .collect(toImmutableList()))
                .setParameters(firstNonNull(gluePartition.getParameters(), ImmutableMap.of()));

        setStorageBuilder(sd, partitionBuilder.getStorageBuilder(), tableParameters);
        return partitionBuilder.build();
    }

    private static boolean isNullOrEmpty(List list)
    {
        return list == null || list.isEmpty();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy