All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.iceberg.PartitionFields Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.iceberg;

import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.expressions.Term;

import javax.annotation.Nullable;

import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static org.apache.iceberg.expressions.Expressions.bucket;
import static org.apache.iceberg.expressions.Expressions.day;
import static org.apache.iceberg.expressions.Expressions.hour;
import static org.apache.iceberg.expressions.Expressions.month;
import static org.apache.iceberg.expressions.Expressions.ref;
import static org.apache.iceberg.expressions.Expressions.truncate;
import static org.apache.iceberg.expressions.Expressions.year;

public final class PartitionFields
{
    private static final String NAME = "[a-z_][a-z0-9_]*";
    private static final String FUNCTION_NAME = "\\((" + NAME + ")\\)";
    private static final String FUNCTION_NAME_INT = "\\((" + NAME + "), *(\\d+)\\)";

    private static final Pattern IDENTITY_PATTERN = Pattern.compile(NAME);
    private static final Pattern YEAR_PATTERN = Pattern.compile("year" + FUNCTION_NAME);
    private static final Pattern MONTH_PATTERN = Pattern.compile("month" + FUNCTION_NAME);
    private static final Pattern DAY_PATTERN = Pattern.compile("day" + FUNCTION_NAME);
    private static final Pattern HOUR_PATTERN = Pattern.compile("hour" + FUNCTION_NAME);
    private static final Pattern BUCKET_PATTERN = Pattern.compile("bucket" + FUNCTION_NAME_INT);
    private static final Pattern TRUNCATE_PATTERN = Pattern.compile("truncate" + FUNCTION_NAME_INT);

    private static final Pattern COLUMN_BUCKET_PATTERN = Pattern.compile("bucket\\((\\d+)\\)");
    private static final Pattern COLUMN_TRUNCATE_PATTERN = Pattern.compile("truncate\\((\\d+)\\)");
    private static final Pattern ICEBERG_BUCKET_PATTERN = Pattern.compile("bucket\\[(\\d+)]");
    private static final Pattern ICEBERG_TRUNCATE_PATTERN = Pattern.compile("truncate\\[(\\d+)]");

    private PartitionFields() {}

    public static PartitionSpec parsePartitionFields(Schema schema, List fields)
    {
        return parsePartitionFields(schema, fields, null);
    }

    public static PartitionSpec parsePartitionFields(Schema schema, List fields, @Nullable Integer specId)
    {
        PartitionSpec.Builder builder = Optional.ofNullable(specId)
                .map(id -> PartitionSpec.builderFor(schema).withSpecId(id))
                .orElseGet(() -> PartitionSpec.builderFor(schema));

        for (String field : fields) {
            parsePartitionField(builder, field);
        }
        return builder.build();
    }

    public static void parsePartitionField(PartitionSpec.Builder builder, String field)
    {
        @SuppressWarnings("PointlessBooleanExpression")
        boolean matched = false ||
                tryMatch(field, IDENTITY_PATTERN, match -> builder.identity(match.group())) ||
                tryMatch(field, YEAR_PATTERN, match -> builder.year(match.group(1))) ||
                tryMatch(field, MONTH_PATTERN, match -> builder.month(match.group(1))) ||
                tryMatch(field, DAY_PATTERN, match -> builder.day(match.group(1))) ||
                tryMatch(field, HOUR_PATTERN, match -> builder.hour(match.group(1))) ||
                tryMatch(field, BUCKET_PATTERN, match -> builder.bucket(match.group(1), parseInt(match.group(2)))) ||
                tryMatch(field, TRUNCATE_PATTERN, match -> builder.truncate(match.group(1), parseInt(match.group(2))));
        if (!matched) {
            throw new IllegalArgumentException("Invalid partition field declaration: " + field);
        }
    }

    private static boolean tryMatch(CharSequence value, Pattern pattern, Consumer match)
    {
        Matcher matcher = pattern.matcher(value);
        if (matcher.matches()) {
            match.accept(matcher.toMatchResult());
            return true;
        }
        return false;
    }

    public static List toPartitionFields(PartitionSpec spec)
    {
        return spec.fields().stream()
                .map(field -> toPartitionField(spec, field))
                .collect(toImmutableList());
    }

    // Keep consistency with PartitionSpec.Builder
    protected static String getPartitionColumnName(String columnName, String transform)
    {
        switch (transform) {
            case "identity":
                return columnName;
            case "year":
            case "month":
            case "day":
            case "hour":
                return columnName + "_" + transform;
        }

        Matcher matcher = COLUMN_BUCKET_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return columnName + "_bucket";
        }

        matcher = COLUMN_TRUNCATE_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return columnName + "_trunc";
        }

        throw new UnsupportedOperationException("Unknown partition transform: " + transform);
    }

    protected static Term getTransformTerm(String columnName, String transform)
    {
        switch (transform) {
            case "identity":
                return ref(columnName);
            case "year":
                return year(columnName);
            case "month":
                return month(columnName);
            case "day":
                return day(columnName);
            case "hour":
                return hour(columnName);
        }

        Matcher matcher = COLUMN_BUCKET_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return bucket(columnName, Integer.valueOf(matcher.group(1)));
        }

        matcher = COLUMN_TRUNCATE_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return truncate(columnName, Integer.valueOf(matcher.group(1)));
        }

        throw new UnsupportedOperationException("Unknown partition transform: " + transform);
    }

    private static String toPartitionField(PartitionSpec spec, PartitionField field)
    {
        String name = spec.schema().findColumnName(field.sourceId());
        String transform = field.transform().toString();

        switch (transform) {
            case "identity":
                return name;
            case "year":
            case "month":
            case "day":
            case "hour":
                return format("%s(%s)", transform, name);
        }

        Matcher matcher = ICEBERG_BUCKET_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return format("bucket(%s, %s)", name, matcher.group(1));
        }

        matcher = ICEBERG_TRUNCATE_PATTERN.matcher(transform);
        if (matcher.matches()) {
            return format("truncate(%s, %s)", name, matcher.group(1));
        }

        throw new UnsupportedOperationException("Unsupported partition transform: " + field);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy