All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.expressions.ResidualEvaluator Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.expressions;

import java.io.Serializable;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.util.NaNUtil;

/**
 * Finds the residuals for an {@link Expression} the partitions in the given {@link PartitionSpec}.
 * 

* A residual expression is made by partially evaluating an expression using partition values. For * example, if a table is partitioned by day(utc_timestamp) and is read with a filter expression * utc_timestamp >= a and utc_timestamp <= b, then there are 4 possible residuals expressions * for the partition data, d: *

    *
  • If d > day(a) and d < day(b), the residual is always true
  • *
  • If d == day(a) and d != day(b), the residual is utc_timestamp >= a
  • *
  • if d == day(b) and d != day(a), the residual is utc_timestamp <= b
  • *
  • If d == day(a) == day(b), the residual is utc_timestamp >= a and utc_timestamp <= b *
  • *
*

* Partition data is passed using {@link StructLike}. Residuals are returned by * {@link #residualFor(StructLike)}. *

* This class is thread-safe. */ public class ResidualEvaluator implements Serializable { private static class UnpartitionedResidualEvaluator extends ResidualEvaluator { private final Expression expr; UnpartitionedResidualEvaluator(Expression expr) { super(PartitionSpec.unpartitioned(), expr, false); this.expr = expr; } @Override public Expression residualFor(StructLike ignored) { return expr; } } /** * Return a residual evaluator for an unpartitioned {@link PartitionSpec spec}. * * @param expr an expression * @return a residual evaluator that always returns the expression */ public static ResidualEvaluator unpartitioned(Expression expr) { return new UnpartitionedResidualEvaluator(expr); } /** * Return a residual evaluator for a {@link PartitionSpec spec} and {@link Expression expression}. * * @param spec a partition spec * @param expr an expression * @return a residual evaluator for the expression */ public static ResidualEvaluator of(PartitionSpec spec, Expression expr, boolean caseSensitive) { if (spec.fields().size() > 0) { return new ResidualEvaluator(spec, expr, caseSensitive); } else { return unpartitioned(expr); } } private final PartitionSpec spec; private final Expression expr; private final boolean caseSensitive; private ResidualEvaluator(PartitionSpec spec, Expression expr, boolean caseSensitive) { this.spec = spec; this.expr = expr; this.caseSensitive = caseSensitive; } /** * Returns a residual expression for the given partition values. * * @param partitionData partition data values * @return the residual of this evaluator's expression from the partition values */ public Expression residualFor(StructLike partitionData) { return new ResidualVisitor().eval(partitionData); } private class ResidualVisitor extends BoundExpressionVisitor { private StructLike struct; private Expression eval(StructLike dataStruct) { this.struct = dataStruct; return ExpressionVisitors.visit(expr, this); } @Override public Expression alwaysTrue() { return Expressions.alwaysTrue(); } @Override public Expression alwaysFalse() { return Expressions.alwaysFalse(); } @Override public Expression not(Expression result) { return Expressions.not(result); } @Override public Expression and(Expression leftResult, Expression rightResult) { return Expressions.and(leftResult, rightResult); } @Override public Expression or(Expression leftResult, Expression rightResult) { return Expressions.or(leftResult, rightResult); } @Override public Expression isNull(BoundReference ref) { return (ref.eval(struct) == null) ? alwaysTrue() : alwaysFalse(); } @Override public Expression notNull(BoundReference ref) { return (ref.eval(struct) != null) ? alwaysTrue() : alwaysFalse(); } @Override public Expression isNaN(BoundReference ref) { return NaNUtil.isNaN(ref.eval(struct)) ? alwaysTrue() : alwaysFalse(); } @Override public Expression notNaN(BoundReference ref) { return NaNUtil.isNaN(ref.eval(struct)) ? alwaysFalse() : alwaysTrue(); } @Override public Expression lt(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) < 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression ltEq(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) <= 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression gt(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) > 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression gtEq(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) >= 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression eq(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) == 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression notEq(BoundReference ref, Literal lit) { Comparator cmp = lit.comparator(); return (cmp.compare(ref.eval(struct), lit.value()) != 0) ? alwaysTrue() : alwaysFalse(); } @Override public Expression in(BoundReference ref, Set literalSet) { return literalSet.contains(ref.eval(struct)) ? alwaysTrue() : alwaysFalse(); } @Override public Expression notIn(BoundReference ref, Set literalSet) { return literalSet.contains(ref.eval(struct)) ? alwaysFalse() : alwaysTrue(); } @Override public Expression startsWith(BoundReference ref, Literal lit) { return ((String) ref.eval(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse(); } @Override public Expression notStartsWith(BoundReference ref, Literal lit) { return ((String) ref.eval(struct)).startsWith((String) lit.value()) ? alwaysFalse() : alwaysTrue(); } @Override @SuppressWarnings("unchecked") public Expression predicate(BoundPredicate pred) { // Get the strict projection and inclusive projection of this predicate in partition data, // then use them to determine whether to return the original predicate. The strict projection // returns true iff the original predicate would have returned true, so the predicate can be // eliminated if the strict projection evaluates to true. Similarly the inclusive projection // returns false iff the original predicate would have returned false, so the predicate can // also be eliminated if the inclusive projection evaluates to false. // If there is no strict projection or if it evaluates to false, then return the predicate. List parts = spec.getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { return pred; // not associated inclusive a partition field, can't be evaluated } for (PartitionField part : parts) { // checking the strict projection UnboundPredicate strictProjection = ((Transform) part.transform()).projectStrict(part.name(), pred); Expression strictResult = null; if (strictProjection != null) { Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive); if (bound instanceof BoundPredicate) { strictResult = super.predicate((BoundPredicate) bound); } else { // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse strictResult = bound; } } if (strictResult != null && strictResult.op() == Expression.Operation.TRUE) { // If strict is true, returning true return Expressions.alwaysTrue(); } // checking the inclusive projection UnboundPredicate inclusiveProjection = ((Transform) part.transform()).project(part.name(), pred); Expression inclusiveResult = null; if (inclusiveProjection != null) { Expression boundInclusive = inclusiveProjection.bind(spec.partitionType(), caseSensitive); if (boundInclusive instanceof BoundPredicate) { // using predicate method specific to inclusive inclusiveResult = super.predicate((BoundPredicate) boundInclusive); } else { // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse inclusiveResult = boundInclusive; } } if (inclusiveResult != null && inclusiveResult.op() == Expression.Operation.FALSE) { // If inclusive is false, returning false return Expressions.alwaysFalse(); } } // neither strict not inclusive predicate was conclusive, returning the original pred return pred; } @Override public Expression predicate(UnboundPredicate pred) { Expression bound = pred.bind(spec.schema().asStruct(), caseSensitive); if (bound instanceof BoundPredicate) { Expression boundResidual = predicate((BoundPredicate) bound); if (boundResidual instanceof Predicate) { return pred; // replace inclusive original unbound predicate } return boundResidual; // use the non-predicate residual (e.g. alwaysTrue) } // if binding didn't result in a Predicate, return the expression return bound; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy