All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.projection.PartitionProjection Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive.projection;

import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableMap;
import io.trino.metastore.Column;
import io.trino.metastore.Partition;
import io.trino.metastore.Table;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.TupleDomain;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Sets.cartesianProduct;
import static io.trino.metastore.Partition.toPartitionValues;
import static io.trino.plugin.hive.projection.InvalidProjectionException.invalidProjectionMessage;
import static io.trino.plugin.hive.util.HiveUtil.escapePathName;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public final class PartitionProjection
{
    private static final Pattern PROJECTION_LOCATION_TEMPLATE_PLACEHOLDER_PATTERN = Pattern.compile("(\\$\\{[^}]+\\})");

    private final Optional storageLocationTemplate;
    private final Map columnProjections;

    public PartitionProjection(Optional storageLocationTemplate, Map columnProjections)
    {
        this.storageLocationTemplate = requireNonNull(storageLocationTemplate, "storageLocationTemplate is null");
        this.columnProjections = ImmutableMap.copyOf(requireNonNull(columnProjections, "columnProjections is null"));
    }

    public Optional> getProjectedPartitionNamesByFilter(List columnNames, TupleDomain partitionKeysFilter)
    {
        if (partitionKeysFilter.isNone()) {
            return Optional.empty();
        }
        Map columnDomainMap = partitionKeysFilter.getDomains().orElseThrow(VerifyException::new);
        // Should not happen as we enforce defining partition projection for all partitioning columns.
        // But we leave a check as we might get wrong settings stored by 3rd party system.
        columnNames.forEach(columnName -> checkArgument(
                columnProjections.containsKey(columnName),
                invalidProjectionMessage(columnName, "Projection not defined for this column")));
        List> projectedPartitionValues = columnNames.stream()
                .map(columnName -> columnProjections.get(columnName)
                        .getProjectedValues(Optional.ofNullable(columnDomainMap.get(columnName)))
                        .stream()
                        // Partition names are effectively used as subfolder in underlining fs. So we need to escape illegal chars.
                        .map(projectedValue -> escapePathName(columnName) + "=" + escapePathName(projectedValue))
                        .collect(toImmutableSet()))
                .collect(toImmutableList());
        return Optional.of(cartesianProduct(projectedPartitionValues)
                .stream()
                .map(parts -> String.join("/", parts))
                .collect(toImmutableList()));
    }

    public Map> getProjectedPartitionsByNames(Table table, List partitionNames)
    {
        return partitionNames.stream()
                .collect(Collectors.toMap(
                        partitionName -> partitionName,
                        partitionName -> Optional.of(buildPartitionObject(table, partitionName))));
    }

    private Partition buildPartitionObject(Table table, String partitionName)
    {
        List partitionValues = toPartitionValues(partitionName);
        return Partition.builder()
                .setDatabaseName(table.getDatabaseName())
                .setTableName(table.getTableName())
                .setColumns(table.getDataColumns())
                .setValues(partitionValues)
                .setParameters(Map.of())
                .withStorage(storage -> storage
                        .setStorageFormat(table.getStorage().getStorageFormat())
                        .setLocation(storageLocationTemplate
                                .map(template -> expandStorageLocationTemplate(
                                        template,
                                        table.getPartitionColumns().stream()
                                                .map(Column::getName).collect(Collectors.toList()),
                                        partitionValues))
                                .orElseGet(() -> getPartitionLocation(table.getStorage().getLocation(), partitionName)))
                        .setBucketProperty(table.getStorage().getBucketProperty())
                        .setSerdeParameters(table.getStorage().getSerdeParameters()))
                .build();
    }

    private static String getPartitionLocation(String tableLocation, String partitionName)
    {
        if (tableLocation.endsWith("/")) {
            return format("%s%s/", tableLocation, partitionName);
        }
        return format("%s/%s/", tableLocation, partitionName);
    }

    private static String expandStorageLocationTemplate(String template, List partitionColumns, List partitionValues)
    {
        Matcher matcher = PROJECTION_LOCATION_TEMPLATE_PLACEHOLDER_PATTERN.matcher(template);
        StringBuilder location = new StringBuilder();
        while (matcher.find()) {
            String columnPlaceholder = matcher.group(1);
            String columnName = columnPlaceholder.substring(2, columnPlaceholder.length() - 1);
            matcher.appendReplacement(location, partitionValues.get(partitionColumns.indexOf(columnName)));
        }
        matcher.appendTail(location);
        return location.toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy