All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.configuration.OptionsResolver Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.configuration;

import org.apache.hudi.client.clustering.plan.strategy.FlinkConsistentBucketClusteringPlanStrategy;
import org.apache.hudi.client.transaction.BucketIndexConcurrentFileWritesConflictResolutionStrategy;
import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
import org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.WriteConcurrencyMode;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode;
import org.apache.hudi.common.table.timeline.TimelineUtils.HollowCommitHandling;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.sink.overwrite.PartitionOverwriteMode;
import org.apache.hudi.table.format.FilePathUtils;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.Configuration;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import static org.apache.hudi.common.config.HoodieCommonConfig.INCREMENTAL_READ_HANDLE_HOLLOW_COMMIT;

/**
 * Tool helping to resolve the flink options {@link FlinkOptions}.
 */
public class OptionsResolver {
  /**
   * Returns whether insert clustering is allowed with given configuration {@code conf}.
   */
  public static boolean insertClustering(Configuration conf) {
    return isCowTable(conf) && isInsertOperation(conf) && conf.getBoolean(FlinkOptions.INSERT_CLUSTER);
  }

  /**
   * Returns whether the insert is clustering disabled with given configuration {@code conf}.
   */
  public static boolean isAppendMode(Configuration conf) {
    // 1. inline clustering is supported for COW table;
    // 2. async clustering is supported for both COW and MOR table
    return isInsertOperation(conf) && ((isCowTable(conf) && !conf.getBoolean(FlinkOptions.INSERT_CLUSTER)) || isMorTable(conf));
  }

  /**
   * Returns whether the table operation is 'insert'.
   */
  public static boolean isInsertOperation(Configuration conf) {
    WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
    return operationType == WriteOperationType.INSERT;
  }

  /**
   * Returns whether the table operation is 'bulk_insert'.
   */
  public static boolean isBulkInsertOperation(Configuration conf) {
    WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
    return operationType == WriteOperationType.BULK_INSERT;
  }

  /**
   * Returns whether it is a MERGE_ON_READ table.
   */
  public static boolean isMorTable(Configuration conf) {
    return conf.getString(FlinkOptions.TABLE_TYPE)
        .toUpperCase(Locale.ROOT)
        .equals(FlinkOptions.TABLE_TYPE_MERGE_ON_READ);
  }

  /**
   * Returns whether it is a MERGE_ON_READ table.
   */
  public static boolean isMorTable(Map options) {
    return options.getOrDefault(FlinkOptions.TABLE_TYPE.key(),
        FlinkOptions.TABLE_TYPE.defaultValue()).equalsIgnoreCase(FlinkOptions.TABLE_TYPE_MERGE_ON_READ);
  }

  /**
   * Returns whether it is a COPY_ON_WRITE table.
   */
  public static boolean isCowTable(Configuration conf) {
    return conf.getString(FlinkOptions.TABLE_TYPE)
        .toUpperCase(Locale.ROOT)
        .equals(FlinkOptions.TABLE_TYPE_COPY_ON_WRITE);
  }

  /**
   * Returns whether the payload clazz is {@link DefaultHoodieRecordPayload}.
   */
  public static boolean isDefaultHoodieRecordPayloadClazz(Configuration conf) {
    return conf.getString(FlinkOptions.PAYLOAD_CLASS_NAME).contains(DefaultHoodieRecordPayload.class.getSimpleName());
  }

  /**
   * Returns the preCombine field
   * or null if the value is set as {@link FlinkOptions#NO_PRE_COMBINE}.
   */
  public static String getPreCombineField(Configuration conf) {
    final String preCombineField = conf.getString(FlinkOptions.PRECOMBINE_FIELD);
    return preCombineField.equals(FlinkOptions.NO_PRE_COMBINE) ? null : preCombineField;
  }

  /**
   * Returns whether the compaction strategy is based on elapsed delta time.
   */
  public static boolean isDeltaTimeCompaction(Configuration conf) {
    final String strategy = conf.getString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY).toLowerCase(Locale.ROOT);
    return FlinkOptions.TIME_ELAPSED.equals(strategy) || FlinkOptions.NUM_OR_TIME.equals(strategy);
  }

  /**
   * Returns whether the table is partitioned.
   */
  public static boolean isPartitionedTable(Configuration conf) {
    return FilePathUtils.extractPartitionKeys(conf).length > 0;
  }

  public static boolean isBucketIndexType(Configuration conf) {
    return conf.getString(FlinkOptions.INDEX_TYPE).equalsIgnoreCase(HoodieIndex.IndexType.BUCKET.name());
  }

  public static HoodieIndex.BucketIndexEngineType getBucketEngineType(Configuration conf) {
    String bucketEngineType = conf.get(FlinkOptions.BUCKET_INDEX_ENGINE_TYPE);
    return HoodieIndex.BucketIndexEngineType.valueOf(bucketEngineType);
  }

  /**
   * Returns whether the table index is consistent bucket index.
   */
  public static boolean isConsistentHashingBucketIndexType(Configuration conf) {
    return isBucketIndexType(conf) && getBucketEngineType(conf).equals(HoodieIndex.BucketIndexEngineType.CONSISTENT_HASHING);
  }

  /**
   * Returns the default plan strategy class.
   */
  public static String getDefaultPlanStrategyClassName(Configuration conf) {
    return OptionsResolver.isConsistentHashingBucketIndexType(conf) ? FlinkConsistentBucketClusteringPlanStrategy.class.getName() :
        FlinkOptions.CLUSTERING_PLAN_STRATEGY_CLASS.defaultValue();
  }

  /**
   * Returns whether the source should emit changelog.
   *
   * @return true if the source is read as streaming with changelog mode enabled
   */
  public static boolean emitChangelog(Configuration conf) {
    return conf.getBoolean(FlinkOptions.READ_AS_STREAMING) && conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED)
        || conf.getBoolean(FlinkOptions.READ_AS_STREAMING) && conf.getBoolean(FlinkOptions.CDC_ENABLED)
        || isIncrementalQuery(conf) && conf.getBoolean(FlinkOptions.CDC_ENABLED);
  }

  /**
   * Returns whether there is need to schedule the async compaction.
   *
   * @param conf The flink configuration.
   */
  public static boolean needsAsyncCompaction(Configuration conf) {
    return OptionsResolver.isMorTable(conf)
        && conf.getBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED);
  }

  /**
   * Returns whether there is need to schedule the compaction plan.
   *
   * @param conf The flink configuration.
   */
  public static boolean needsScheduleCompaction(Configuration conf) {
    return OptionsResolver.isMorTable(conf)
        && conf.getBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED);
  }

  /**
   * Returns whether there is need to schedule the async clustering.
   *
   * @param conf The flink configuration.
   */
  public static boolean needsAsyncClustering(Configuration conf) {
    return isInsertOperation(conf) && conf.getBoolean(FlinkOptions.CLUSTERING_ASYNC_ENABLED);
  }

  /**
   * Returns whether there is need to schedule the clustering plan.
   *
   * @param conf The flink configuration.
   */
  public static boolean needsScheduleClustering(Configuration conf) {
    if (!conf.getBoolean(FlinkOptions.CLUSTERING_SCHEDULE_ENABLED)) {
      return false;
    }
    WriteOperationType operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
    if (OptionsResolver.isConsistentHashingBucketIndexType(conf)) {
      // Write pipelines for table with consistent bucket index would detect whether clustering service occurs,
      // and automatically adjust the partitioner and write function if clustering service happens.
      // So it could handle UPSERT.
      // But it could not handle INSERT case, because insert write would not take index into consideration currently.
      return operationType == WriteOperationType.UPSERT;
    } else {
      return operationType == WriteOperationType.INSERT;
    }
  }

  /**
   * Returns whether the clustering sort is enabled.
   */
  public static boolean sortClusteringEnabled(Configuration conf) {
    return !StringUtils.isNullOrEmpty(conf.getString(FlinkOptions.CLUSTERING_SORT_COLUMNS));
  }

  /**
   * Returns whether the operation is INSERT OVERWRITE (table or partition).
   */
  public static boolean isInsertOverwrite(Configuration conf) {
    return conf.getString(FlinkOptions.OPERATION).equalsIgnoreCase(WriteOperationType.INSERT_OVERWRITE_TABLE.value())
        || conf.getString(FlinkOptions.OPERATION).equalsIgnoreCase(WriteOperationType.INSERT_OVERWRITE.value());
  }

  /**
   * Returns whether the operation is INSERT OVERWRITE dynamic partition.
   */
  public static boolean overwriteDynamicPartition(Configuration conf) {
    return conf.getString(FlinkOptions.OPERATION).equalsIgnoreCase(WriteOperationType.INSERT_OVERWRITE.value())
        || conf.getString(FlinkOptions.WRITE_PARTITION_OVERWRITE_MODE).equalsIgnoreCase(PartitionOverwriteMode.DYNAMIC.name());
  }

  /**
   * Returns whether the read start commit is specific commit timestamp.
   */
  public static boolean isSpecificStartCommit(Configuration conf) {
    return conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent()
        && !conf.get(FlinkOptions.READ_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST);
  }

  /**
   * Returns true if there are no explicit start and end commits.
   */
  public static boolean hasNoSpecificReadCommits(Configuration conf) {
    return !conf.contains(FlinkOptions.READ_START_COMMIT) && !conf.contains(FlinkOptions.READ_END_COMMIT);
  }

  /**
   * Returns the supplemental logging mode.
   */
  public static HoodieCDCSupplementalLoggingMode getCDCSupplementalLoggingMode(Configuration conf) {
    String mode = conf.getString(FlinkOptions.SUPPLEMENTAL_LOGGING_MODE).toUpperCase();
    return HoodieCDCSupplementalLoggingMode.valueOf(mode);
  }

  /**
   * Returns whether comprehensive schema evolution enabled.
   */
  public static boolean isSchemaEvolutionEnabled(Configuration conf) {
    return conf.getBoolean(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key(), HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.defaultValue());
  }

  /**
   * Returns whether the query is incremental.
   */
  public static boolean isIncrementalQuery(Configuration conf) {
    return conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent() || conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent();
  }

  /**
   * Returns whether consistent value will be generated for a logical timestamp type column.
   */
  public static boolean isConsistentLogicalTimestampEnabled(Configuration conf) {
    return conf.getBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
        Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()));
  }

  /**
   * Returns whether the writer txn should be guarded by lock.
   */
  public static boolean isLockRequired(Configuration conf) {
    return conf.getBoolean(FlinkOptions.METADATA_ENABLED) || isOptimisticConcurrencyControl(conf);
  }

  /**
   * Returns whether OCC is enabled.
   */
  public static boolean isOptimisticConcurrencyControl(Configuration conf) {
    return conf.getString(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), HoodieWriteConfig.WRITE_CONCURRENCY_MODE.defaultValue())
        .equalsIgnoreCase(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name());
  }

  /**
   * Returns whether to read the instants using completion time.
   *
   * 

A Hudi instant contains both the txn start time and completion time, for incremental subscription * of the source reader, using completion time to filter the candidate instants can avoid data loss * in scenarios like multiple writers. */ public static boolean isReadByTxnCompletionTime(Configuration conf) { HollowCommitHandling handlingMode = HollowCommitHandling.valueOf(conf .getString(INCREMENTAL_READ_HANDLE_HOLLOW_COMMIT.key(), INCREMENTAL_READ_HANDLE_HOLLOW_COMMIT.defaultValue())); return handlingMode == HollowCommitHandling.USE_TRANSITION_TIME; } /** * Returns the index type. */ public static HoodieIndex.IndexType getIndexType(Configuration conf) { return HoodieIndex.IndexType.valueOf(conf.getString(FlinkOptions.INDEX_TYPE)); } /** * Returns the index key field. */ public static String getIndexKeyField(Configuration conf) { return conf.getString(FlinkOptions.INDEX_KEY_FIELD, conf.getString(FlinkOptions.RECORD_KEY_FIELD)); } /** * Returns the index key field values. */ public static String[] getIndexKeys(Configuration conf) { return getIndexKeyField(conf).split(","); } /** * Returns the conflict resolution strategy. */ public static ConflictResolutionStrategy getConflictResolutionStrategy(Configuration conf) { return isBucketIndexType(conf) ? new BucketIndexConcurrentFileWritesConflictResolutionStrategy() : new SimpleConcurrentFileWritesConflictResolutionStrategy(); } /** * Returns whether to commit even when current batch has no data, for flink defaults false */ public static boolean allowCommitOnEmptyBatch(Configuration conf) { return conf.getBoolean(HoodieWriteConfig.ALLOW_EMPTY_COMMIT.key(), false); } public static boolean isLazyFailedWritesCleanPolicy(Configuration conf) { return conf.getString(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(), HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.defaultValue()) .equalsIgnoreCase(HoodieFailedWritesCleaningPolicy.LAZY.name()); } // ------------------------------------------------------------------------- // Utilities // ------------------------------------------------------------------------- /** * Returns all the config options with the given class {@code clazz}. */ public static List> allOptions(Class clazz) { Field[] declaredFields = clazz.getDeclaredFields(); List> options = new ArrayList<>(); for (Field field : declaredFields) { if (java.lang.reflect.Modifier.isStatic(field.getModifiers()) && field.getType().equals(ConfigOption.class)) { try { options.add((ConfigOption) field.get(ConfigOption.class)); } catch (IllegalAccessException e) { throw new HoodieException("Error while fetching static config option", e); } } } return options; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy