All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hudi.HudiMetadata Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hudi;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystemFactory;
import io.trino.metastore.Column;
import io.trino.metastore.HiveMetastore;
import io.trino.metastore.Table;
import io.trino.metastore.TableInfo;
import io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.connector.ConnectorMetadata;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTableMetadata;
import io.trino.spi.connector.ConnectorTableVersion;
import io.trino.spi.connector.Constraint;
import io.trino.spi.connector.ConstraintApplicationResult;
import io.trino.spi.connector.RelationColumnsMetadata;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.connector.SchemaTablePrefix;
import io.trino.spi.connector.SystemTable;
import io.trino.spi.connector.TableNotFoundException;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.TypeManager;

import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import java.util.stream.Stream;

import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.metastore.Table.TABLE_COMMENT;
import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS;
import static io.trino.plugin.hive.util.HiveUtil.columnMetadataGetter;
import static io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles;
import static io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles;
import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema;
import static io.trino.plugin.hive.util.HiveUtil.isHudiTable;
import static io.trino.plugin.hudi.HudiErrorCode.HUDI_BAD_DATA;
import static io.trino.plugin.hudi.HudiSessionProperties.getColumnsToHide;
import static io.trino.plugin.hudi.HudiSessionProperties.isQueryPartitionFilterRequired;
import static io.trino.plugin.hudi.HudiTableProperties.LOCATION_PROPERTY;
import static io.trino.plugin.hudi.HudiTableProperties.PARTITIONED_BY_PROPERTY;
import static io.trino.plugin.hudi.HudiUtil.hudiMetadataExists;
import static io.trino.plugin.hudi.model.HudiTableType.COPY_ON_WRITE;
import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;
import static io.trino.spi.StandardErrorCode.QUERY_REJECTED;
import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE;
import static io.trino.spi.connector.SchemaTableName.schemaTableName;
import static java.lang.String.format;
import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;

public class HudiMetadata
        implements ConnectorMetadata
{
    private final HiveMetastore metastore;
    private final TrinoFileSystemFactory fileSystemFactory;
    private final TypeManager typeManager;

    public HudiMetadata(HiveMetastore metastore, TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager)
    {
        this.metastore = requireNonNull(metastore, "metastore is null");
        this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
        this.typeManager = requireNonNull(typeManager, "typeManager is null");
    }

    @Override
    public List listSchemaNames(ConnectorSession session)
    {
        return metastore.getAllDatabases().stream()
                .filter(schemaName -> !isHiveSystemSchema(schemaName))
                .collect(toImmutableList());
    }

    @Override
    public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion)
    {
        if (startVersion.isPresent() || endVersion.isPresent()) {
            throw new TrinoException(NOT_SUPPORTED, "This connector does not support versioned tables");
        }

        if (isHiveSystemSchema(tableName.getSchemaName())) {
            return null;
        }
        Optional table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
        if (table.isEmpty()) {
            return null;
        }
        if (!isHudiTable(table.get())) {
            throw new TrinoException(UNSUPPORTED_TABLE_TYPE, format("Not a Hudi table: %s", tableName));
        }
        Location location = Location.of(table.get().getStorage().getLocation());
        if (!hudiMetadataExists(fileSystemFactory.create(session), location)) {
            throw new TrinoException(HUDI_BAD_DATA, "Location of table %s does not contain Hudi table metadata: %s".formatted(tableName, location));
        }

        return new HudiTableHandle(
                tableName.getSchemaName(),
                tableName.getTableName(),
                table.get().getStorage().getLocation(),
                COPY_ON_WRITE,
                getPartitionKeyColumnHandles(table.get(), typeManager),
                TupleDomain.all(),
                TupleDomain.all());
    }

    @Override
    public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName)
    {
        return getRawSystemTable(tableName, session)
                .map(systemTable -> new ClassLoaderSafeSystemTable(systemTable, getClass().getClassLoader()));
    }

    private Optional getRawSystemTable(SchemaTableName tableName, ConnectorSession session)
    {
        HudiTableName name = HudiTableName.from(tableName.getTableName());
        if (name.getTableType() == TableType.DATA) {
            return Optional.empty();
        }

        Optional
tableOptional = metastore.getTable(tableName.getSchemaName(), name.getTableName()); if (tableOptional.isEmpty()) { return Optional.empty(); } if (!isHudiTable(tableOptional.get())) { return Optional.empty(); } return switch (name.getTableType()) { case DATA -> // TODO (https://github.com/trinodb/trino/issues/17973) remove DATA table type Optional.empty(); case TIMELINE -> { SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType()); yield Optional.of(new TimelineTable(fileSystemFactory.create(session), systemTableName, tableOptional.get())); } }; } @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { HudiTableHandle hudiTableHandle = (HudiTableHandle) table; return getTableMetadata(hudiTableHandle.getSchemaTableName(), getColumnsToHide(session)); } @Override public Optional> applyFilter(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint) { HudiTableHandle handle = (HudiTableHandle) tableHandle; HudiPredicates predicates = HudiPredicates.from(constraint.getSummary()); TupleDomain regularColumnPredicates = predicates.getRegularColumnPredicates(); TupleDomain partitionColumnPredicates = predicates.getPartitionColumnPredicates(); // TODO Since the constraint#predicate isn't utilized during split generation. So, // Let's not add constraint#predicateColumns to newConstraintColumns. Set newConstraintColumns = Stream.concat( Stream.concat( regularColumnPredicates.getDomains().stream() .map(Map::keySet) .flatMap(Collection::stream), partitionColumnPredicates.getDomains().stream() .map(Map::keySet) .flatMap(Collection::stream)), handle.getConstraintColumns().stream()) .collect(toImmutableSet()); HudiTableHandle newHudiTableHandle = handle.applyPredicates( newConstraintColumns, partitionColumnPredicates, regularColumnPredicates); if (handle.getPartitionPredicates().equals(newHudiTableHandle.getPartitionPredicates()) && handle.getRegularPredicates().equals(newHudiTableHandle.getRegularPredicates()) && handle.getConstraintColumns().equals(newHudiTableHandle.getConstraintColumns())) { return Optional.empty(); } return Optional.of(new ConstraintApplicationResult<>( newHudiTableHandle, newHudiTableHandle.getRegularPredicates().transformKeys(ColumnHandle.class::cast), constraint.getExpression(), false)); } @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { HudiTableHandle hudiTableHandle = (HudiTableHandle) tableHandle; Table table = metastore.getTable(hudiTableHandle.getSchemaName(), hudiTableHandle.getTableName()) .orElseThrow(() -> new TableNotFoundException(schemaTableName(hudiTableHandle.getSchemaName(), hudiTableHandle.getTableName()))); return hiveColumnHandles(table, typeManager, NANOSECONDS).stream() .collect(toImmutableMap(HiveColumnHandle::getName, identity())); } @Override public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { return ((HiveColumnHandle) columnHandle).getColumnMetadata(); } @Override public Optional getInfo(ConnectorTableHandle table) { return Optional.of(HudiTableInfo.from((HudiTableHandle) table)); } @Override public List listTables(ConnectorSession session, Optional optionalSchemaName) { ImmutableList.Builder tableNames = ImmutableList.builder(); for (String schemaName : listSchemas(session, optionalSchemaName)) { for (TableInfo tableInfo : metastore.getTables(schemaName)) { tableNames.add(tableInfo.tableName()); } } return tableNames.build(); } @Override public Iterator streamRelationColumns( ConnectorSession session, Optional schemaName, UnaryOperator> relationFilter) { SchemaTablePrefix prefix = schemaName.map(SchemaTablePrefix::new) .orElseGet(SchemaTablePrefix::new); List tables = prefix.getTable() .map(_ -> singletonList(prefix.toSchemaTableName())) .orElseGet(() -> listTables(session, prefix.getSchema())); Map relationColumns = tables.stream() .map(table -> getTableColumnMetadata(session, table)) .flatMap(Optional::stream) .collect(toImmutableMap(RelationColumnsMetadata::name, Function.identity())); return relationFilter.apply(relationColumns.keySet()).stream() .map(relationColumns::get) .iterator(); } @Override public void validateScan(ConnectorSession session, ConnectorTableHandle handle) { HudiTableHandle hudiTableHandle = (HudiTableHandle) handle; if (isQueryPartitionFilterRequired(session)) { if (!hudiTableHandle.getPartitionColumns().isEmpty()) { Set partitionColumns = hudiTableHandle.getPartitionColumns().stream() .map(HiveColumnHandle::getName) .collect(toImmutableSet()); Set constraintColumns = hudiTableHandle.getConstraintColumns().stream() .map(HiveColumnHandle::getBaseColumnName) .collect(toImmutableSet()); if (Collections.disjoint(constraintColumns, partitionColumns)) { throw new TrinoException( QUERY_REJECTED, format("Filter required on %s for at least one of the partition columns: %s", hudiTableHandle.getSchemaTableName(), String.join(", ", partitionColumns))); } } } } @Override public boolean allowSplittingReadIntoMultipleSubQueries(ConnectorSession session, ConnectorTableHandle tableHandle) { // hudi supports only a columnar (parquet) storage format return true; } HiveMetastore getMetastore() { return metastore; } private Optional getTableColumnMetadata(ConnectorSession session, SchemaTableName table) { try { List columns = getTableMetadata(table, getColumnsToHide(session)).getColumns(); return Optional.of(RelationColumnsMetadata.forTable(table, columns)); } catch (TableNotFoundException _) { return Optional.empty(); } } private ConnectorTableMetadata getTableMetadata(SchemaTableName tableName, Collection columnsToHide) { Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); Function metadataGetter = columnMetadataGetter(table); List columns = hiveColumnHandles(table, typeManager, NANOSECONDS).stream() .filter(column -> !columnsToHide.contains(column.getName())) .map(metadataGetter) .collect(toImmutableList()); ImmutableMap.Builder properties = ImmutableMap.builder(); // Location property String location = table.getStorage().getLocation(); if (!isNullOrEmpty(location)) { properties.put(LOCATION_PROPERTY, location); } // Partitioning property List partitionedBy = table.getPartitionColumns().stream() .map(Column::getName) .collect(toImmutableList()); if (!partitionedBy.isEmpty()) { properties.put(PARTITIONED_BY_PROPERTY, partitionedBy); } Optional comment = Optional.ofNullable(table.getParameters().get(TABLE_COMMENT)); return new ConnectorTableMetadata(tableName, columns, properties.buildOrThrow(), comment); } private List listSchemas(ConnectorSession session, Optional schemaName) { return schemaName .filter(name -> !isHiveSystemSchema(name)) .map(Collections::singletonList) .orElseGet(() -> listSchemaNames(session)); } }