All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.projection.DateProjection Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive.projection;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.trino.spi.TrinoException;
import io.trino.spi.predicate.Domain;
import io.trino.spi.type.DateType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.ZoneId;
import java.time.temporal.ChronoField;
import java.time.temporal.ChronoUnit;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_FORMAT;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_INTERVAL;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_INTERVAL_UNIT;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_RANGE;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getProjectionPropertyRequiredValue;
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getProjectionPropertyValue;
import static io.trino.spi.predicate.Domain.singleValue;
import static java.lang.String.format;
import static java.time.temporal.ChronoUnit.DAYS;
import static java.time.temporal.ChronoUnit.HOURS;
import static java.time.temporal.ChronoUnit.MINUTES;
import static java.time.temporal.ChronoUnit.MONTHS;
import static java.time.temporal.ChronoUnit.SECONDS;
import static java.util.Objects.nonNull;
import static java.util.Objects.requireNonNull;
import static java.util.TimeZone.getTimeZone;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

final class DateProjection
        implements Projection
{
    private static final ZoneId UTC_TIME_ZONE_ID = ZoneId.of("UTC");
    // Limited to only DAYS, HOURS, MINUTES, SECONDS as we are not fully sure how everything above day
    // is implemented in Athena. So we limit it to a subset of interval units which are explicitly clear how to calculate.
    // The rest will be implemented if this is required as it would require making compatibility tests
    // for results received from Athena and verifying if we receive identical with Trino.
    private static final Set DATE_PROJECTION_INTERVAL_UNITS = ImmutableSet.of(DAYS, HOURS, MINUTES, SECONDS);
    private static final Pattern DATE_RANGE_BOUND_EXPRESSION_PATTERN = Pattern.compile("^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$");

    private final String columnName;
    private final DateFormat dateFormat;
    private final Supplier leftBound;
    private final Supplier rightBound;
    private final int interval;
    private final ChronoUnit intervalUnit;

    public DateProjection(String columnName, Type columnType, Map columnProperties)
    {
        if (!(columnType instanceof VarcharType) &&
                !(columnType instanceof DateType) &&
                !(columnType instanceof TimestampType timestampType && timestampType.isShort())) {
            throw new InvalidProjectionException(columnName, columnType);
        }

        this.columnName = requireNonNull(columnName, "columnName is null");

        String dateFormatPattern = getProjectionPropertyRequiredValue(
                columnName,
                columnProperties,
                COLUMN_PROJECTION_FORMAT,
                String::valueOf);

        List range = getProjectionPropertyRequiredValue(
                columnName,
                columnProperties,
                COLUMN_PROJECTION_RANGE,
                value -> ((List) value).stream()
                        .map(String.class::cast)
                        .collect(toImmutableList()));
        if (range.size() != 2) {
            throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty());
        }

        SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatPattern);
        dateFormat.setLenient(false);
        dateFormat.setTimeZone(getTimeZone(UTC_TIME_ZONE_ID));
        this.dateFormat = requireNonNull(dateFormat, "dateFormatPattern is null");

        leftBound = parseDateRangerBound(columnName, range.get(0), dateFormat);
        rightBound = parseDateRangerBound(columnName, range.get(1), dateFormat);
        if (!leftBound.get().isBefore(rightBound.get())) {
            throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty());
        }

        interval = getProjectionPropertyValue(columnProperties, COLUMN_PROJECTION_INTERVAL, Integer.class::cast).orElse(1);
        intervalUnit = getProjectionPropertyValue(columnProperties, COLUMN_PROJECTION_INTERVAL_UNIT, ChronoUnit.class::cast)
                .orElseGet(() -> resolveDefaultChronoUnit(columnName, dateFormatPattern));

        if (!DATE_PROJECTION_INTERVAL_UNITS.contains(intervalUnit)) {
            throw new InvalidProjectionException(
                    columnName,
                    format(
                            "Property: '%s' value '%s' is invalid. Available options: %s",
                            COLUMN_PROJECTION_INTERVAL_UNIT,
                            intervalUnit,
                            DATE_PROJECTION_INTERVAL_UNITS));
        }
    }

    @Override
    public List getProjectedValues(Optional partitionValueFilter)
    {
        ImmutableList.Builder builder = ImmutableList.builder();

        Instant leftBound = adjustBoundToDateFormat(this.leftBound.get());
        Instant rightBound = adjustBoundToDateFormat(this.rightBound.get());

        Instant currentValue = leftBound;
        while (!currentValue.isAfter(rightBound)) {
            String currentValueFormatted = formatValue(currentValue);
            if (isValueInDomain(partitionValueFilter, currentValue, currentValueFormatted)) {
                builder.add(currentValueFormatted);
            }
            currentValue = currentValue.atZone(UTC_TIME_ZONE_ID)
                    .plus(interval, intervalUnit)
                    .toInstant();
        }

        return builder.build();
    }

    private Instant adjustBoundToDateFormat(Instant value)
    {
        String formatted = formatValue(value.with(ChronoField.MILLI_OF_SECOND, 0));
        try {
            return dateFormat.parse(formatted).toInstant();
        }
        catch (ParseException e) {
            throw new InvalidProjectionException(formatted, e.getMessage());
        }
    }

    private String formatValue(Instant current)
    {
        return dateFormat.format(new Date(current.toEpochMilli()));
    }

    private boolean isValueInDomain(Optional valueDomain, Instant value, String formattedValue)
    {
        if (valueDomain.isEmpty() || valueDomain.get().isAll()) {
            return true;
        }
        Domain domain = valueDomain.get();
        Type type = domain.getType();
        if (type instanceof VarcharType) {
            return domain.contains(singleValue(type, utf8Slice(formattedValue)));
        }
        if (type instanceof DateType) {
            return domain.contains(singleValue(type, MILLISECONDS.toDays(value.toEpochMilli())));
        }
        if (type instanceof TimestampType && ((TimestampType) type).isShort()) {
            return domain.contains(singleValue(type, MILLISECONDS.toMicros(value.toEpochMilli())));
        }
        throw new InvalidProjectionException(columnName, type);
    }

    private static ChronoUnit resolveDefaultChronoUnit(String columnName, String dateFormatPattern)
    {
        String datePatternWithoutText = dateFormatPattern.replaceAll("'.*?'", "");
        if (datePatternWithoutText.contains("S") || datePatternWithoutText.contains("s")
                || datePatternWithoutText.contains("m") || datePatternWithoutText.contains("H")) {
            // When the provided dates are at single-day or single-month precision.
            throw new InvalidProjectionException(
                    columnName,
                    format(
                            "Property: '%s' needs to be set when provided '%s' is less that single-day precision. Interval defaults to 1 day or 1 month, respectively. Otherwise, interval is required",
                            COLUMN_PROJECTION_INTERVAL_UNIT,
                            COLUMN_PROJECTION_FORMAT));
        }
        if (datePatternWithoutText.contains("d")) {
            return DAYS;
        }
        return MONTHS;
    }

    private static Supplier parseDateRangerBound(String columnName, String value, SimpleDateFormat dateFormat)
    {
        Matcher matcher = DATE_RANGE_BOUND_EXPRESSION_PATTERN.matcher(value);
        if (matcher.matches()) {
            String operator = matcher.group(2);
            String multiplierString = matcher.group(3);
            String unitString = matcher.group(4);
            if (nonNull(operator) && nonNull(multiplierString) && nonNull(unitString)) {
                unitString = unitString.toUpperCase(Locale.ENGLISH);
                return new DateExpressionBound(
                        Integer.parseInt(multiplierString),
                        ChronoUnit.valueOf(unitString + "S"),
                        operator.charAt(0) == '+');
            }
            if (value.trim().equals("NOW")) {
                Instant now = Instant.now();
                return () -> now;
            }
            throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of("Invalid expression"));
        }

        Instant dateBound;
        try {
            dateBound = dateFormat.parse(value).toInstant();
        }
        catch (ParseException e) {
            throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of(e.getMessage()));
        }
        return () -> dateBound;
    }

    private static TrinoException invalidRangeProperty(String columnName, String dateFormatPattern, Optional errorDetail)
    {
        throw new InvalidProjectionException(
                columnName,
                format(
                        "Property: '%s' needs to be a list of 2 valid dates formatted as '%s' or '%s' that are sequential%s",
                        COLUMN_PROJECTION_RANGE,
                        dateFormatPattern,
                        DATE_RANGE_BOUND_EXPRESSION_PATTERN.pattern(),
                        errorDetail.map(error -> ": " + error).orElse("")));
    }

    private record DateExpressionBound(int multiplier, ChronoUnit unit, boolean increment)
            implements Supplier
    {
        @Override
        public Instant get()
        {
            return Instant.now().plus(increment ? multiplier : -multiplier, unit);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy