All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.util.ExpressionUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.util;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.expressions.CallExpression;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.expressions.FieldReferenceExpression;
import org.apache.flink.table.expressions.ResolvedExpression;
import org.apache.flink.table.expressions.ValueLiteralExpression;
import org.apache.flink.table.functions.BuiltInFunctionDefinitions;
import org.apache.flink.table.functions.FunctionDefinition;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;

import javax.annotation.Nullable;

import java.math.BigDecimal;
import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneOffset;
import java.time.temporal.ChronoField;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Utilities for expression resolving.
 */
public class ExpressionUtils {

  /**
   * Collect the referenced columns with given expressions,
   * only simple call expression is supported.
   */
  public static String[] referencedColumns(List exprs) {
    return exprs.stream()
        .map(ExpressionUtils::getReferencedColumns)
        .filter(columns -> columns.length > 0)
        .flatMap(Arrays::stream)
        .distinct() // deduplication
        .toArray(String[]::new);
  }

  /**
   * Returns whether the given expression is simple call expression:
   * a binary call with one operand as field reference and another operand
   * as literal.
   */
  public static boolean isSimpleCallExpression(Expression expr) {
    if (!(expr instanceof CallExpression)) {
      return false;
    }
    CallExpression callExpression = (CallExpression) expr;
    FunctionDefinition funcDef = callExpression.getFunctionDefinition();
    // simple call list:
    // NOT AND OR IN EQUALS NOT_EQUALS IS_NULL IS_NOT_NULL LESS_THAN GREATER_THAN
    // LESS_THAN_OR_EQUAL GREATER_THAN_OR_EQUAL

    if (funcDef == BuiltInFunctionDefinitions.NOT
        || funcDef == BuiltInFunctionDefinitions.AND
        || funcDef == BuiltInFunctionDefinitions.OR) {
      return callExpression.getChildren().stream()
          .allMatch(ExpressionUtils::isSimpleCallExpression);
    }
    if (!(funcDef == BuiltInFunctionDefinitions.IN
        || funcDef == BuiltInFunctionDefinitions.EQUALS
        || funcDef == BuiltInFunctionDefinitions.NOT_EQUALS
        || funcDef == BuiltInFunctionDefinitions.IS_NULL
        || funcDef == BuiltInFunctionDefinitions.IS_NOT_NULL
        || funcDef == BuiltInFunctionDefinitions.LESS_THAN
        || funcDef == BuiltInFunctionDefinitions.GREATER_THAN
        || funcDef == BuiltInFunctionDefinitions.LESS_THAN_OR_EQUAL
        || funcDef == BuiltInFunctionDefinitions.GREATER_THAN_OR_EQUAL)) {
      return false;
    }
    // handle IN
    if (funcDef == BuiltInFunctionDefinitions.IN) {
      // In expression RHS operands are always literals
      return true;
    }
    // handle unary operator
    if (funcDef == BuiltInFunctionDefinitions.IS_NULL
        || funcDef == BuiltInFunctionDefinitions.IS_NOT_NULL) {
      return callExpression.getChildren().stream()
          .allMatch(e -> e instanceof FieldReferenceExpression);
    }
    // handle binary operator
    return isFieldReferenceAndLiteral(callExpression.getChildren());
  }

  private static boolean isFieldReferenceAndLiteral(List exprs) {
    if (exprs.size() != 2) {
      return false;
    }
    final Expression expr0 = exprs.get(0);
    final Expression expr1 = exprs.get(1);
    return expr0 instanceof FieldReferenceExpression && expr1 instanceof ValueLiteralExpression
        || expr0 instanceof ValueLiteralExpression && expr1 instanceof FieldReferenceExpression;
  }

  private static String[] getReferencedColumns(ResolvedExpression expression) {
    CallExpression callExpr = (CallExpression) expression;
    FunctionDefinition funcDef = callExpr.getFunctionDefinition();
    if (funcDef == BuiltInFunctionDefinitions.NOT
        || funcDef == BuiltInFunctionDefinitions.AND
        || funcDef == BuiltInFunctionDefinitions.OR) {
      return callExpr.getChildren().stream()
          .map(e -> getReferencedColumns((ResolvedExpression) e))
          .flatMap(Arrays::stream)
          .toArray(String[]::new);
    }

    return expression.getChildren().stream()
        .filter(expr -> expr instanceof FieldReferenceExpression)
        .map(expr -> ((FieldReferenceExpression) expr).getName())
        .toArray(String[]::new);
  }

  /**
   * Returns the value with given value literal expression.
   *
   * 

Returns null if the value can not parse as the output data type correctly, * should call {@code ValueLiteralExpression.isNull} first to decide whether * the literal is NULL. */ @Nullable public static Object getValueFromLiteral(ValueLiteralExpression expr) { LogicalType logicalType = expr.getOutputDataType().getLogicalType(); switch (logicalType.getTypeRoot()) { case TIMESTAMP_WITHOUT_TIME_ZONE: return expr.getValueAs(LocalDateTime.class) .map(ldt -> ldt.toInstant(ZoneOffset.UTC).toEpochMilli()) .orElse(null); case TIME_WITHOUT_TIME_ZONE: return expr.getValueAs(LocalTime.class) .map(lt -> lt.get(ChronoField.MILLI_OF_DAY)) .orElse(null); case DATE: return expr.getValueAs(LocalDate.class) .map(date -> (int) date.toEpochDay()) .orElse(null); // NOTE: All integral types of size less than Int are encoded as Ints in MT case BOOLEAN: return expr.getValueAs(Boolean.class).orElse(null); case TINYINT: return expr.getValueAs(Byte.class).orElse(null); case SMALLINT: return expr.getValueAs(Short.class).orElse(null); case INTEGER: return expr.getValueAs(Integer.class).orElse(null); case BIGINT: return expr.getValueAs(Long.class).orElse(null); case FLOAT: return expr.getValueAs(Float.class).orElse(null); case DOUBLE: return expr.getValueAs(Double.class).orElse(null); case BINARY: case VARBINARY: return expr.getValueAs(byte[].class).orElse(null); case CHAR: case VARCHAR: return expr.getValueAs(String.class).orElse(null); case DECIMAL: return expr.getValueAs(BigDecimal.class).orElse(null); default: throw new UnsupportedOperationException("Unsupported type: " + logicalType); } } /** * Returns the field as part of a hoodie key with given value literal expression. * *

CAUTION: the data type and value parsing should follow the impl of {@link #getValueFromLiteral(ValueLiteralExpression)}. * *

CAUTION: the data and timestamp conversion should follow the impl if {@code HoodieAvroUtils.convertValueForAvroLogicalTypes}. * *

Returns null if the value can not parse as the output data type correctly, * should call {@code ValueLiteralExpression.isNull} first to decide whether * the literal is NULL. */ @Nullable public static Object getKeyFromLiteral(ValueLiteralExpression expr, boolean logicalTimestamp) { Object val = getValueFromLiteral(expr); if (val == null) { return null; } LogicalType logicalType = expr.getOutputDataType().getLogicalType(); switch (logicalType.getTypeRoot()) { case TIMESTAMP_WITHOUT_TIME_ZONE: return logicalTimestamp ? new Timestamp((long) val) : val; case DATE: return LocalDate.ofEpochDay((int) val); default: return val; } } /** * Returns whether all the fields {@code fields} are involved in the filtering predicates. * * @param exprs The filters * @param fields The field set */ public static boolean isFilteringByAllFields(List exprs, Set fields) { if (exprs.size() != fields.size()) { return false; } Set referencedPks = exprs.stream() .map(ResolvedExpression::getChildren) .flatMap(Collection::stream) .filter(expr -> expr instanceof FieldReferenceExpression) .map(rExpr -> ((FieldReferenceExpression) rExpr).getName()) .collect(Collectors.toSet()); return referencedPks.size() == fields.size(); } /** * Returns whether the given expression {@code resolvedExpr} is a * literal equivalence predicate within the fields {@code fields}. */ public static boolean isEqualsLitExpr(ResolvedExpression resolvedExpr, Set fields) { CallExpression callExpr = (CallExpression) resolvedExpr; FunctionDefinition funcDef = callExpr.getFunctionDefinition(); if (funcDef != BuiltInFunctionDefinitions.EQUALS) { return false; } if (!isFieldReferenceAndLiteral(callExpr.getChildren())) { return false; } return callExpr.getChildren().stream() .filter(expr -> expr instanceof FieldReferenceExpression) .anyMatch(expr -> fields.contains(((FieldReferenceExpression) expr).getName())); } public static List filterSimpleCallExpression(List exprs) { return exprs.stream() .filter(ExpressionUtils::isSimpleCallExpression) .collect(Collectors.toList()); } /** * Extracts partition predicate from filter condition. * *

NOTE: the {@code expressions} should be simple call expressions. * * @return A tuple of partition predicates and non-partition predicates. */ public static Tuple2, List> splitExprByPartitionCall( List expressions, List partitionKeys, RowType tableRowType) { if (partitionKeys.isEmpty()) { return Tuple2.of(expressions, Collections.emptyList()); } else { List partitionFilters = new ArrayList<>(); List nonPartitionFilters = new ArrayList<>(); final List fieldNames = tableRowType.getFieldNames(); Set parFieldPos = partitionKeys.stream().map(fieldNames::indexOf).collect(Collectors.toSet()); for (ResolvedExpression expr : expressions) { for (CallExpression e : splitByAnd(expr)) { if (isPartitionCallExpr(e, parFieldPos)) { partitionFilters.add(expr); } else { nonPartitionFilters.add(e); } } } return Tuple2.of(nonPartitionFilters, partitionFilters); } } private static List splitByAnd(ResolvedExpression expr) { List result = new ArrayList<>(); splitByAnd(expr, result); return result; } private static void splitByAnd( ResolvedExpression expr, List result) { if (!(expr instanceof CallExpression)) { return; } CallExpression callExpr = (CallExpression) expr; FunctionDefinition funcDef = callExpr.getFunctionDefinition(); if (funcDef == BuiltInFunctionDefinitions.AND) { callExpr.getChildren().stream() .filter(child -> child instanceof CallExpression) .forEach(child -> splitByAnd((CallExpression) child, result)); } else { result.add(callExpr); } } /** * Returns whether the {@code expr} is a partition call expression. * * @param expr The expression * @param parFieldPos The partition field positions within the table schema */ private static boolean isPartitionCallExpr(CallExpression expr, Set parFieldPos) { List children = expr.getChildren(); // if any child expr reference a non-partition field, returns false. return children.stream() .allMatch( child -> { if (child instanceof FieldReferenceExpression) { FieldReferenceExpression refExpr = (FieldReferenceExpression) child; return parFieldPos.contains(refExpr.getFieldIndex()); } else if (child instanceof CallExpression) { return isPartitionCallExpr((CallExpression) child, parFieldPos); } else { return true; } }); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy