org.apache.iceberg.transforms.ProjectionUtil Maven / Gradle / Ivy
Show all versions of iceberg-api Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.transforms;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Set;
import org.apache.iceberg.expressions.BoundLiteralPredicate;
import org.apache.iceberg.expressions.BoundPredicate;
import org.apache.iceberg.expressions.BoundSetPredicate;
import org.apache.iceberg.expressions.BoundTransform;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import static org.apache.iceberg.expressions.Expressions.predicate;
class ProjectionUtil {
private ProjectionUtil() {
}
static UnboundPredicate truncateInteger(
String name, BoundLiteralPredicate pred, Transform transform) {
int boundary = pred.literal().value();
switch (pred.op()) {
case LT:
// adjust closed and then transform ltEq
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary - 1));
case LT_EQ:
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
case GT:
// adjust closed and then transform gtEq
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary + 1));
case GT_EQ:
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(pred.op(), name, transform.apply(boundary));
default:
return null;
}
}
static UnboundPredicate truncateIntegerStrict(
String name, BoundLiteralPredicate pred, Transform transform) {
int boundary = pred.literal().value();
switch (pred.op()) {
case LT:
return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case LT_EQ:
return predicate(Expression.Operation.LT, name, transform.apply(boundary + 1));
case GT:
return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case GT_EQ:
return predicate(Expression.Operation.GT, name, transform.apply(boundary - 1));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent ints transform to the same value
return null;
default:
return null;
}
}
static UnboundPredicate truncateLongStrict(
String name, BoundLiteralPredicate pred, Transform transform) {
long boundary = pred.literal().value();
switch (pred.op()) {
case LT:
return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case LT_EQ:
return predicate(Expression.Operation.LT, name, transform.apply(boundary + 1L));
case GT:
return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case GT_EQ:
return predicate(Expression.Operation.GT, name, transform.apply(boundary - 1L));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent longs transform to the same value
return null;
default:
return null;
}
}
static UnboundPredicate truncateLong(
String name, BoundLiteralPredicate pred, Transform transform) {
long boundary = pred.literal().value();
switch (pred.op()) {
case LT:
// adjust closed and then transform ltEq
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary - 1L));
case LT_EQ:
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
case GT:
// adjust closed and then transform gtEq
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary + 1L));
case GT_EQ:
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(pred.op(), name, transform.apply(boundary));
default:
return null;
}
}
static UnboundPredicate truncateDecimal(
String name, BoundLiteralPredicate pred,
Transform transform) {
BigDecimal boundary = pred.literal().value();
switch (pred.op()) {
case LT:
// adjust closed and then transform ltEq
BigDecimal minusOne = new BigDecimal(
boundary.unscaledValue().subtract(BigInteger.ONE),
boundary.scale());
return predicate(Expression.Operation.LT_EQ, name, transform.apply(minusOne));
case LT_EQ:
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
case GT:
// adjust closed and then transform gtEq
BigDecimal plusOne = new BigDecimal(
boundary.unscaledValue().add(BigInteger.ONE),
boundary.scale());
return predicate(Expression.Operation.GT_EQ, name, transform.apply(plusOne));
case GT_EQ:
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(pred.op(), name, transform.apply(boundary));
default:
return null;
}
}
static UnboundPredicate truncateDecimalStrict(
String name, BoundLiteralPredicate pred,
Transform transform) {
BigDecimal boundary = pred.literal().value();
BigDecimal minusOne = new BigDecimal(
boundary.unscaledValue().subtract(BigInteger.ONE),
boundary.scale());
BigDecimal plusOne = new BigDecimal(
boundary.unscaledValue().add(BigInteger.ONE),
boundary.scale());
switch (pred.op()) {
case LT:
return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case LT_EQ:
return predicate(Expression.Operation.LT, name, transform.apply(plusOne));
case GT:
return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case GT_EQ:
return predicate(Expression.Operation.GT, name, transform.apply(minusOne));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent decimals transform to the same value
return null;
default:
return null;
}
}
static UnboundPredicate truncateArray(
String name, BoundLiteralPredicate pred, Transform transform) {
S boundary = pred.literal().value();
switch (pred.op()) {
case LT:
case LT_EQ:
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
case GT:
case GT_EQ:
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(Expression.Operation.EQ, name, transform.apply(boundary));
case STARTS_WITH:
return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary));
// case IN: // TODO
// return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
default:
return null;
}
}
static UnboundPredicate truncateArrayStrict(
String name, BoundLiteralPredicate pred, Transform transform) {
S boundary = pred.literal().value();
switch (pred.op()) {
case LT:
case LT_EQ:
return predicate(Expression.Operation.LT, name, transform.apply(boundary));
case GT:
case GT_EQ:
return predicate(Expression.Operation.GT, name, transform.apply(boundary));
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent values transform to the same partition
return null;
default:
return null;
}
}
/**
* If the predicate has a transformed child that matches the given transform, return a predicate.
*/
@SuppressWarnings("unchecked")
static UnboundPredicate projectTransformPredicate(Transform, T> transform,
String partitionName, BoundPredicate> pred) {
if (pred.term() instanceof BoundTransform && transform.equals(((BoundTransform, ?>) pred.term()).transform())) {
// the bound value must be a T because the transform matches
return (UnboundPredicate) removeTransform(partitionName, pred);
}
return null;
}
private static UnboundPredicate removeTransform(String partitionName, BoundPredicate pred) {
if (pred.isUnaryPredicate()) {
return Expressions.predicate(pred.op(), partitionName);
} else if (pred.isLiteralPredicate()) {
return Expressions.predicate(pred.op(), partitionName, pred.asLiteralPredicate().literal());
} else if (pred.isSetPredicate()) {
return Expressions.predicate(pred.op(), partitionName, pred.asSetPredicate().literalSet());
}
throw new UnsupportedOperationException("Cannot replace transform in unknown predicate: " + pred);
}
static UnboundPredicate transformSet(String fieldName,
BoundSetPredicate predicate,
Transform transform) {
return predicate(predicate.op(), fieldName,
Iterables.transform(predicate.asSetPredicate().literalSet(), transform::apply));
}
/**
* Fixes an inclusive projection to account for incorrectly transformed values.
*
* A bug in 0.10.0 and earlier caused negative values to be incorrectly transformed by date and timestamp transforms
* to 1 larger than the correct value. For example, day(1969-12-31 10:00:00) produced 0 instead of -1. To read data
* written by versions with this bug, this method adjusts the inclusive projection. The current inclusive projection
* is correct, so this modifies the "correct" projection when needed. For example, < day(1969-12-31 10:00:00) will
* produce <= -1 (= 1969-12-31) and is adjusted to <= 0 (= 1970-01-01) because the incorrect transformed value was 0.
*/
static UnboundPredicate fixInclusiveTimeProjection(UnboundPredicate projected) {
if (projected == null) {
return projected;
}
// adjust the predicate for values that were 1 larger than the correct transformed value
switch (projected.op()) {
case LT:
if (projected.literal().value() < 0) {
return Expressions.lessThan(projected.term(), projected.literal().value() + 1);
}
return projected;
case LT_EQ:
if (projected.literal().value() < 0) {
return Expressions.lessThanOrEqual(projected.term(), projected.literal().value() + 1);
}
return projected;
case GT:
case GT_EQ:
// incorrect projected values are already greater than the bound for GT, GT_EQ
return projected;
case EQ:
if (projected.literal().value() < 0) {
// match either the incorrect value (projectedValue + 1) or the correct value (projectedValue)
return Expressions.in(projected.term(), projected.literal().value(), projected.literal().value() + 1);
}
return projected;
case IN:
Set fixedSet = Sets.newHashSet();
boolean hasNegativeValue = false;
for (Literal lit : projected.literals()) {
Integer value = lit.value();
fixedSet.add(value);
if (value < 0) {
hasNegativeValue = true;
fixedSet.add(value + 1);
}
}
if (hasNegativeValue) {
return Expressions.in(projected.term(), fixedSet);
}
return projected;
case NOT_IN:
case NOT_EQ:
// there is no inclusive projection for NOT_EQ and NOT_IN
return null;
default:
return projected;
}
}
/**
* Fixes a strict projection to account for incorrectly transformed values.
*
* A bug in 0.10.0 and earlier caused negative values to be incorrectly transformed by date and timestamp transforms
* to 1 larger than the correct value. For example, day(1969-12-31 10:00:00) produced 0 instead of -1. To read data
* written by versions with this bug, this method adjusts the strict projection.
*/
static UnboundPredicate fixStrictTimeProjection(UnboundPredicate projected) {
if (projected == null) {
return null;
}
switch (projected.op()) {
case LT:
case LT_EQ:
// the correct bound is a correct strict projection for the incorrectly transformed values.
return projected;
case GT:
// GT and GT_EQ need to be adjusted because values that do not match the predicate may have been transformed
// into partition values that match the projected predicate. For example, >= month(1969-11-31) is > -2, but
// 1969-10-31 was previously transformed to month -2 instead of -3. This must use the more strict value.
if (projected.literal().value() <= 0) {
return Expressions.greaterThan(projected.term(), projected.literal().value() + 1);
}
return projected;
case GT_EQ:
if (projected.literal().value() <= 0) {
return Expressions.greaterThanOrEqual(projected.term(), projected.literal().value() + 1);
}
return projected;
case EQ:
case IN:
// there is no strict projection for EQ and IN
return null;
case NOT_EQ:
if (projected.literal().value() < 0) {
return Expressions.notIn(projected.term(), projected.literal().value(), projected.literal().value() + 1);
}
return projected;
case NOT_IN:
Set fixedSet = Sets.newHashSet();
boolean hasNegativeValue = false;
for (Literal lit : projected.literals()) {
Integer value = lit.value();
fixedSet.add(value);
if (value < 0) {
hasNegativeValue = true;
fixedSet.add(value + 1);
}
}
if (hasNegativeValue) {
return Expressions.notIn(projected.term(), fixedSet);
}
return projected;
default:
return null;
}
}
}