All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.expressions.Projections Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.expressions;

import java.util.Collection;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.expressions.ExpressionVisitors.ExpressionVisitor;
import org.apache.iceberg.transforms.Transform;

/**
 * Utils to project expressions on rows to expressions on partitions.
 */
public class Projections {
  private Projections() {
  }

  /**
   * A class that projects expressions for a table's data rows into expressions on the table's
   * partition values, for a table's {@link PartitionSpec partition spec}.
   * 

* There are two types of projections: inclusive and strict. *

* An inclusive projection guarantees that if an expression matches a row, the projected * expression will match the row's partition. *

* A strict projection guarantees that if a partition matches a projected expression, then all * rows in that partition will match the original expression. */ public abstract static class ProjectionEvaluator extends ExpressionVisitor { /** * Project the given row expression to a partition expression. * * @param expr an expression on data rows * @return an expression on partition data (depends on the projection) */ public abstract Expression project(Expression expr); } /** * Creates an inclusive {@code ProjectionEvaluator} for the {@link PartitionSpec spec}, defaulting * to case sensitive mode. *

* An evaluator is used to project expressions for a table's data rows into expressions on the * table's partition values. The evaluator returned by this function is inclusive and will build * expressions with the following guarantee: if the original expression matches a row, then the * projected expression will match that row's partition. *

* Each predicate in the expression is projected using * {@link Transform#project(String, BoundPredicate)}. * * @param spec a partition spec * @return an inclusive projection evaluator for the partition spec * @see Transform#project(String, BoundPredicate) Inclusive transform used for each predicate */ public static ProjectionEvaluator inclusive(PartitionSpec spec) { return new InclusiveProjection(spec, true); } /** * Creates an inclusive {@code ProjectionEvaluator} for the {@link PartitionSpec spec}. *

* An evaluator is used to project expressions for a table's data rows into expressions on the * table's partition values. The evaluator returned by this function is inclusive and will build * expressions with the following guarantee: if the original expression matches a row, then the * projected expression will match that row's partition. *

* Each predicate in the expression is projected using * {@link Transform#project(String, BoundPredicate)}. * * @param spec a partition spec * @param caseSensitive whether the Projection should consider case sensitivity on column names or not. * @return an inclusive projection evaluator for the partition spec * @see Transform#project(String, BoundPredicate) Inclusive transform used for each predicate */ public static ProjectionEvaluator inclusive(PartitionSpec spec, boolean caseSensitive) { return new InclusiveProjection(spec, caseSensitive); } /** * Creates a strict {@code ProjectionEvaluator} for the {@link PartitionSpec spec}, defaulting * to case sensitive mode. *

* An evaluator is used to project expressions for a table's data rows into expressions on the * table's partition values. The evaluator returned by this function is strict and will build * expressions with the following guarantee: if the projected expression matches a partition, * then the original expression will match all rows in that partition. *

* Each predicate in the expression is projected using * {@link Transform#projectStrict(String, BoundPredicate)}. * * @param spec a partition spec * @return a strict projection evaluator for the partition spec * @see Transform#projectStrict(String, BoundPredicate) Strict transform used for each predicate */ public static ProjectionEvaluator strict(PartitionSpec spec) { return new StrictProjection(spec, true); } /** * Creates a strict {@code ProjectionEvaluator} for the {@link PartitionSpec spec}. *

* An evaluator is used to project expressions for a table's data rows into expressions on the * table's partition values. The evaluator returned by this function is strict and will build * expressions with the following guarantee: if the projected expression matches a partition, * then the original expression will match all rows in that partition. *

* Each predicate in the expression is projected using * {@link Transform#projectStrict(String, BoundPredicate)}. * * @param spec a partition spec * @param caseSensitive whether the Projection should consider case sensitivity on column names or not. * @return a strict projection evaluator for the partition spec * @see Transform#projectStrict(String, BoundPredicate) Strict transform used for each predicate */ public static ProjectionEvaluator strict(PartitionSpec spec, boolean caseSensitive) { return new StrictProjection(spec, caseSensitive); } private static class BaseProjectionEvaluator extends ProjectionEvaluator { private final PartitionSpec spec; private final boolean caseSensitive; private BaseProjectionEvaluator(PartitionSpec spec, boolean caseSensitive) { this.spec = spec; this.caseSensitive = caseSensitive; } @Override public Expression project(Expression expr) { // projections assume that there are no NOT nodes in the expression tree. to ensure that this // is the case, the expression is rewritten to push all NOT nodes down to the expression // leaf nodes. // this is necessary to ensure that the default expression returned when a predicate can't be // projected is correct. return ExpressionVisitors.visit(ExpressionVisitors.visit(expr, RewriteNot.get()), this); } @Override public Expression alwaysTrue() { return Expressions.alwaysTrue(); } @Override public Expression alwaysFalse() { return Expressions.alwaysFalse(); } @Override public Expression not(Expression result) { throw new UnsupportedOperationException("[BUG] project called on expression with a not"); } @Override public Expression and(Expression leftResult, Expression rightResult) { return Expressions.and(leftResult, rightResult); } @Override public Expression or(Expression leftResult, Expression rightResult) { return Expressions.or(leftResult, rightResult); } @Override public Expression predicate(UnboundPredicate pred) { Expression bound = pred.bind(spec.schema().asStruct(), caseSensitive); if (bound instanceof BoundPredicate) { return predicate((BoundPredicate) bound); } return bound; } PartitionSpec spec() { return spec; } boolean isCaseSensitive() { return caseSensitive; } } private static class InclusiveProjection extends BaseProjectionEvaluator { private InclusiveProjection(PartitionSpec spec, boolean caseSensitive) { super(spec, caseSensitive); } @Override @SuppressWarnings("unchecked") public Expression predicate(BoundPredicate pred) { Collection parts = spec().getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { // the predicate has no partition column return Expressions.alwaysTrue(); } Expression result = Expressions.alwaysTrue(); for (PartitionField part : parts) { // consider (d = 2019-01-01) with bucket(7, d) and bucket(5, d) // projections: b1 = bucket(7, '2019-01-01') = 5, b2 = bucket(5, '2019-01-01') = 0 // any value where b1 != 5 or any value where b2 != 0 cannot be the '2019-01-01' // // similarly, if partitioning by day(ts) and hour(ts), the more restrictive // projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02. UnboundPredicate inclusiveProjection = ((Transform) part.transform()).project(part.name(), pred); if (inclusiveProjection != null) { result = Expressions.and(result, inclusiveProjection); } } return result; } } private static class StrictProjection extends BaseProjectionEvaluator { private StrictProjection(PartitionSpec spec, boolean caseSensitive) { super(spec, caseSensitive); } @Override @SuppressWarnings("unchecked") public Expression predicate(BoundPredicate pred) { Collection parts = spec().getFieldsBySourceId(pred.ref().fieldId()); if (parts == null) { // the predicate has no partition column return Expressions.alwaysFalse(); } Expression result = Expressions.alwaysFalse(); for (PartitionField part : parts) { // consider (ts > 2019-01-01T01:00:00) with day(ts) and hour(ts) // projections: d >= 2019-01-02 and h >= 2019-01-01-02 (note the inclusive bounds). // any timestamp where either projection predicate is true must match the original // predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not // the day, but does match the original predicate. UnboundPredicate strictProjection = ((Transform) part.transform()).projectStrict(part.name(), pred); if (strictProjection != null) { result = Expressions.or(result, strictProjection); } } return result; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy