io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.metastore.glue.converter;
import com.amazonaws.services.glue.model.SerDeInfo;
import com.amazonaws.services.glue.model.StorageDescriptor;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.plugin.hive.HiveBucketProperty;
import io.trino.plugin.hive.HiveStorageFormat;
import io.trino.plugin.hive.HiveType;
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.Database;
import io.trino.plugin.hive.metastore.Partition;
import io.trino.plugin.hive.metastore.SortingColumn;
import io.trino.plugin.hive.metastore.SortingColumn.Order;
import io.trino.plugin.hive.metastore.Storage;
import io.trino.plugin.hive.metastore.StorageFormat;
import io.trino.plugin.hive.metastore.Table;
import io.trino.plugin.hive.util.HiveBucketing;
import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion;
import io.trino.spi.TrinoException;
import io.trino.spi.security.PrincipalType;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Strings.nullToEmpty;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT;
import static io.trino.plugin.hive.HiveType.HIVE_INT;
import static io.trino.plugin.hive.metastore.util.Memoizers.memoizeLast;
import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable;
import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
public final class GlueToTrinoConverter
{
private static final String PUBLIC_OWNER = "PUBLIC";
private GlueToTrinoConverter() {}
public static Database convertDatabase(com.amazonaws.services.glue.model.Database glueDb)
{
return Database.builder()
.setDatabaseName(glueDb.getName())
.setLocation(Optional.ofNullable(glueDb.getLocationUri()))
.setComment(Optional.ofNullable(glueDb.getDescription()))
.setParameters(firstNonNull(glueDb.getParameters(), ImmutableMap.of()))
.setOwnerName(Optional.of(PUBLIC_OWNER))
.setOwnerType(Optional.of(PrincipalType.ROLE))
.build();
}
public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName)
{
Map tableParameters = convertParameters(glueTable.getParameters());
Table.Builder tableBuilder = Table.builder()
.setDatabaseName(dbName)
.setTableName(glueTable.getName())
.setOwner(Optional.ofNullable(glueTable.getOwner()))
// Athena treats missing table type as EXTERNAL_TABLE.
.setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name()))
.setParameters(tableParameters)
.setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText()))
.setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));
StorageDescriptor sd = glueTable.getStorageDescriptor();
if (isIcebergTable(tableParameters) || (sd == null && isDeltaLakeTable(tableParameters))) {
// Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility
// Delta Lake tables only need to provide a dummy properties if a StorageDescriptor was not explicitly configured.
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty())));
tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET));
}
else {
if (sd == null) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable));
}
tableBuilder.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary()));
if (glueTable.getPartitionKeys() != null) {
tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary()));
}
else {
tableBuilder.setPartitionColumns(ImmutableList.of());
}
// No benefit to memoizing here, just reusing the implementation
new StorageConverter().setStorageBuilder(sd, tableBuilder.getStorageBuilder(), tableParameters);
}
return tableBuilder.build();
}
private static Column convertColumn(com.amazonaws.services.glue.model.Column glueColumn, String serde)
{
// OpenCSVSerde deserializes columns from csv file into strings, so we set the column type from the metastore
// to string to avoid cast exceptions.
if (HiveStorageFormat.CSV.getSerde().equals(serde)) {
//TODO(https://github.com/trinodb/trino/issues/7240) Add tests
return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()));
}
else {
return new Column(glueColumn.getName(), HiveType.valueOf(glueColumn.getType().toLowerCase(Locale.ENGLISH)), Optional.ofNullable(glueColumn.getComment()));
}
}
private static List convertColumns(List glueColumns, String serde)
{
return mappedCopy(glueColumns, glueColumn -> convertColumn(glueColumn, serde));
}
private static Map convertParameters(Map parameters)
{
if (parameters == null || parameters.isEmpty()) {
return ImmutableMap.of();
}
return ImmutableMap.copyOf(parameters);
}
private static Function
© 2015 - 2025 Weber Informatics LLC | Privacy Policy