All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.HoodieMergeHandleFactory Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.io;

import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.table.HoodieTable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Iterator;
import java.util.Map;

/**
 * Factory class for hoodie merge handle.
 */
public class HoodieMergeHandleFactory {
  private static final Logger LOG = LoggerFactory.getLogger(HoodieMergeHandleFactory.class);
  /**
   * Creates a merge handle for normal write path.
   */
  public static  HoodieMergeHandle create(
      WriteOperationType operationType,
      HoodieWriteConfig writeConfig,
      String instantTime,
      HoodieTable table,
      Iterator> recordItr,
      String partitionPath,
      String fileId,
      TaskContextSupplier taskContextSupplier,
      Option keyGeneratorOpt) {
    LOG.info("Create update handle for fileId {} and partition path {} at commit {}", fileId, partitionPath, instantTime);
    if (table.requireSortedRecords()) {
      if (table.getMetaClient().getTableConfig().isCDCEnabled()) {
        return new HoodieSortedMergeHandleWithChangeLog<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier,
            keyGeneratorOpt);
      } else {
        return new HoodieSortedMergeHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier,
            keyGeneratorOpt);
      }
    } else if (!WriteOperationType.isChangingRecords(operationType) && writeConfig.allowDuplicateInserts()) {
      return new HoodieConcatHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
    } else {
      if (table.getMetaClient().getTableConfig().isCDCEnabled()) {
        return new HoodieMergeHandleWithChangeLog<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
      } else {
        return new HoodieMergeHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
      }
    }
  }

  /**
   * Creates a merge handle for compaction path.
   */
  public static  HoodieMergeHandle create(
      HoodieWriteConfig writeConfig,
      String instantTime,
      HoodieTable table,
      Map> keyToNewRecords,
      String partitionPath,
      String fileId,
      HoodieBaseFile dataFileToBeMerged,
      TaskContextSupplier taskContextSupplier,
      Option keyGeneratorOpt) {
    LOG.info("Get updateHandle for fileId {} and partitionPath {} at commit {}", fileId, partitionPath, instantTime);
    if (table.requireSortedRecords()) {
      return new HoodieSortedMergeHandle<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
          dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
    } else if (table.getMetaClient().getTableConfig().isCDCEnabled() && writeConfig.isYieldingPureLogForMor()) {
      // IMPORTANT: only index type that yields pure log files need to enable the cdc log files for compaction,
      // index type such as the BLOOM does not need this because it would do delta merge for inserts and generates log for updates,
      // both of these two cases are already handled in HoodieCDCExtractor.
      return new HoodieMergeHandleWithChangeLog<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
          dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
    } else {
      return new HoodieMergeHandle<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
          dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy