org.apache.hudi.io.HoodieMergeHandleFactory Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.io;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.table.HoodieTable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Iterator;
import java.util.Map;
/**
* Factory class for hoodie merge handle.
*/
public class HoodieMergeHandleFactory {
private static final Logger LOG = LoggerFactory.getLogger(HoodieMergeHandleFactory.class);
/**
* Creates a merge handle for normal write path.
*/
public static HoodieMergeHandle create(
WriteOperationType operationType,
HoodieWriteConfig writeConfig,
String instantTime,
HoodieTable table,
Iterator> recordItr,
String partitionPath,
String fileId,
TaskContextSupplier taskContextSupplier,
Option keyGeneratorOpt) {
LOG.info("Create update handle for fileId {} and partition path {} at commit {}", fileId, partitionPath, instantTime);
if (table.requireSortedRecords()) {
if (table.getMetaClient().getTableConfig().isCDCEnabled()) {
return new HoodieSortedMergeHandleWithChangeLog<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier,
keyGeneratorOpt);
} else {
return new HoodieSortedMergeHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier,
keyGeneratorOpt);
}
} else if (!WriteOperationType.isChangingRecords(operationType) && writeConfig.allowDuplicateInserts()) {
return new HoodieConcatHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
} else {
if (table.getMetaClient().getTableConfig().isCDCEnabled()) {
return new HoodieMergeHandleWithChangeLog<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
} else {
return new HoodieMergeHandle<>(writeConfig, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, keyGeneratorOpt);
}
}
}
/**
* Creates a merge handle for compaction path.
*/
public static HoodieMergeHandle create(
HoodieWriteConfig writeConfig,
String instantTime,
HoodieTable table,
Map> keyToNewRecords,
String partitionPath,
String fileId,
HoodieBaseFile dataFileToBeMerged,
TaskContextSupplier taskContextSupplier,
Option keyGeneratorOpt) {
LOG.info("Get updateHandle for fileId {} and partitionPath {} at commit {}", fileId, partitionPath, instantTime);
if (table.requireSortedRecords()) {
return new HoodieSortedMergeHandle<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
} else if (table.getMetaClient().getTableConfig().isCDCEnabled() && writeConfig.isYieldingPureLogForMor()) {
// IMPORTANT: only index type that yields pure log files need to enable the cdc log files for compaction,
// index type such as the BLOOM does not need this because it would do delta merge for inserts and generates log for updates,
// both of these two cases are already handled in HoodieCDCExtractor.
return new HoodieMergeHandleWithChangeLog<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
} else {
return new HoodieMergeHandle<>(writeConfig, instantTime, table, keyToNewRecords, partitionPath, fileId,
dataFileToBeMerged, taskContextSupplier, keyGeneratorOpt);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy