All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.source.prune.PrimaryKeyPruners Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.source.prune;

import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.configuration.OptionsResolver;
import org.apache.hudi.index.bucket.BucketIdentifier;
import org.apache.hudi.util.ExpressionUtils;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.expressions.FieldReferenceExpression;
import org.apache.flink.table.expressions.ResolvedExpression;
import org.apache.flink.table.expressions.ValueLiteralExpression;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

/**
 * Utilities for primary key based file pruning.
 */
public class PrimaryKeyPruners {
  private static final Logger LOG = LoggerFactory.getLogger(PrimaryKeyPruners.class);

  public static final int BUCKET_ID_NO_PRUNING = -1;

  public static int getBucketId(List hashKeyFilters, Configuration conf) {
    List pkFields = Arrays.asList(conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(","));
    // step1: resolve the hash key values
    final boolean logicalTimestamp = OptionsResolver.isConsistentLogicalTimestampEnabled(conf);
    List values = hashKeyFilters.stream()
        .map(filter -> {
          Pair children = castChildAs(filter.getChildren());
          return Pair.of(pkFields.indexOf(children.getLeft().getName()), StringUtils.objToString(ExpressionUtils.getKeyFromLiteral(children.getRight(), logicalTimestamp)));
        })
        // IMPORTANT: follows KeyGenUtils#extractRecordKeysByFields,
        // the hash keys must be evaluated in the record key field sequence.
        .sorted(java.util.Map.Entry.comparingByKey())
        .map(Pair::getValue)
        .collect(Collectors.toList());
    // step2: generate bucket id
    return BucketIdentifier.getBucketId(values, conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS));
  }

  private static Pair castChildAs(List children) {
    Expression lExpr = children.get(0);
    Expression rExpr = children.get(1);
    if (lExpr instanceof FieldReferenceExpression) {
      return Pair.of((FieldReferenceExpression) lExpr, (ValueLiteralExpression) rExpr);
    } else {
      return Pair.of((FieldReferenceExpression) rExpr, (ValueLiteralExpression) lExpr);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy