org.apache.hudi.table.action.commit.JavaBulkInsertHelper Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.action.commit;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.JavaLazyInsertIterable;
import org.apache.hudi.execution.bulkinsert.JavaBulkInsertInternalPartitionerFactory;
import org.apache.hudi.io.CreateHandleFactory;
import org.apache.hudi.io.WriteHandleFactory;
import org.apache.hudi.metadata.JavaHoodieMetadataBulkInsertPartitioner;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.FileIdPrefixProvider;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import java.util.ArrayList;
import java.util.List;
/**
* A java implementation of {@link BaseBulkInsertHelper}.
*
* @param
*/
@SuppressWarnings("checkstyle:LineLength")
public class JavaBulkInsertHelper extends BaseBulkInsertHelper>,
List, List, R> {
private JavaBulkInsertHelper() {
super(ignored -> -1);
}
private static class BulkInsertHelperHolder {
private static final JavaBulkInsertHelper JAVA_BULK_INSERT_HELPER = new JavaBulkInsertHelper();
}
public static JavaBulkInsertHelper newInstance() {
return BulkInsertHelperHolder.JAVA_BULK_INSERT_HELPER;
}
@Override
public HoodieWriteMetadata> bulkInsert(final List> inputRecords,
final String instantTime,
final HoodieTable>, List, List> table,
final HoodieWriteConfig config,
final BaseCommitActionExecutor>, List, List, R> executor,
final boolean performDedupe,
final Option userDefinedBulkInsertPartitioner) {
HoodieWriteMetadata result = new HoodieWriteMetadata();
// It's possible the transition to inflight could have already happened.
if (!table.getActiveTimeline().filterInflights().containsInstant(instantTime)) {
table.getActiveTimeline().transitionRequestedToInflight(
table.getInstantGenerator().createNewInstant(HoodieInstant.State.REQUESTED, table.getMetaClient().getCommitActionType(), instantTime),
Option.empty(),
config.shouldAllowMultiWriteOnSameInstant());
}
BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.orElseGet(() -> JavaBulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode()));
// write new files
List writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, partitioner, false,
config.getBulkInsertShuffleParallelism(), new CreateHandleFactory(false));
//update index
((BaseJavaCommitActionExecutor) executor).updateIndexAndCommitIfNeeded(writeStatuses, result);
return result;
}
@Override
public List bulkInsert(List> inputRecords,
String instantTime,
HoodieTable>, List, List> table,
HoodieWriteConfig config,
boolean performDedupe,
BulkInsertPartitioner partitioner,
boolean useWriterSchema,
int configuredParallelism,
WriteHandleFactory writeHandleFactory) {
// De-dupe/merge if needed
List> dedupedRecords = inputRecords;
int targetParallelism = deduceShuffleParallelism(inputRecords, configuredParallelism);
if (performDedupe) {
dedupedRecords = (List>) JavaWriteHelper.newInstance()
.combineOnCondition(config.shouldCombineBeforeInsert(), inputRecords, targetParallelism, table);
}
final List> repartitionedRecords =
(List>) partitioner.repartitionRecords(dedupedRecords, targetParallelism);
String fileIdPrefix;
if (partitioner instanceof JavaHoodieMetadataBulkInsertPartitioner) {
fileIdPrefix = partitioner.getFileIdPfx(0);
} else {
FileIdPrefixProvider fileIdPrefixProvider = (FileIdPrefixProvider) ReflectionUtils.loadClass(
config.getFileIdPrefixProviderClassName(),
new TypedProperties(config.getProps()));
fileIdPrefix = fileIdPrefixProvider.createFilePrefix("");
}
List writeStatuses = new ArrayList<>();
new JavaLazyInsertIterable<>(repartitionedRecords.iterator(), true,
config, instantTime, table,
fileIdPrefix, table.getTaskContextSupplier(),
// Always get the first WriteHandleFactory, as there is only a single data partition for hudi java engine.
(WriteHandleFactory) partitioner.getWriteHandleFactory(0).orElse(writeHandleFactory)).forEachRemaining(writeStatuses::addAll);
return writeStatuses;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy