All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.table.action.commit.JavaBulkInsertHelper Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.table.action.commit;

import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.JavaLazyInsertIterable;
import org.apache.hudi.execution.bulkinsert.JavaBulkInsertInternalPartitionerFactory;
import org.apache.hudi.io.CreateHandleFactory;
import org.apache.hudi.io.WriteHandleFactory;
import org.apache.hudi.metadata.JavaHoodieMetadataBulkInsertPartitioner;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.FileIdPrefixProvider;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;

import java.util.ArrayList;
import java.util.List;

/**
 * A java implementation of {@link BaseBulkInsertHelper}.
 *
 * @param 
 */
@SuppressWarnings("checkstyle:LineLength")
public class JavaBulkInsertHelper extends BaseBulkInsertHelper>,
    List, List, R> {

  private JavaBulkInsertHelper() {
    super(ignored -> -1);
  }

  private static class BulkInsertHelperHolder {
    private static final JavaBulkInsertHelper JAVA_BULK_INSERT_HELPER = new JavaBulkInsertHelper();
  }

  public static JavaBulkInsertHelper newInstance() {
    return BulkInsertHelperHolder.JAVA_BULK_INSERT_HELPER;
  }

  @Override
  public HoodieWriteMetadata> bulkInsert(final List> inputRecords,
                                                           final String instantTime,
                                                           final HoodieTable>, List, List> table,
                                                           final HoodieWriteConfig config,
                                                           final BaseCommitActionExecutor>, List, List, R> executor,
                                                           final boolean performDedupe,
                                                           final Option userDefinedBulkInsertPartitioner) {
    HoodieWriteMetadata result = new HoodieWriteMetadata();

    // It's possible the transition to inflight could have already happened.
    if (!table.getActiveTimeline().filterInflights().containsInstant(instantTime)) {
      table.getActiveTimeline().transitionRequestedToInflight(
          table.getInstantGenerator().createNewInstant(HoodieInstant.State.REQUESTED, table.getMetaClient().getCommitActionType(), instantTime),
          Option.empty(),
          config.shouldAllowMultiWriteOnSameInstant());
    }

    BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.orElseGet(() -> JavaBulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode()));

    // write new files
    List writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, partitioner, false,
        config.getBulkInsertShuffleParallelism(), new CreateHandleFactory(false));
    //update index
    ((BaseJavaCommitActionExecutor) executor).updateIndexAndCommitIfNeeded(writeStatuses, result);
    return result;
  }

  @Override
  public List bulkInsert(List> inputRecords,
                                      String instantTime,
                                      HoodieTable>, List, List> table,
                                      HoodieWriteConfig config,
                                      boolean performDedupe,
                                      BulkInsertPartitioner partitioner,
                                      boolean useWriterSchema,
                                      int configuredParallelism,
                                      WriteHandleFactory writeHandleFactory) {

    // De-dupe/merge if needed
    List> dedupedRecords = inputRecords;

    int targetParallelism = deduceShuffleParallelism(inputRecords, configuredParallelism);

    if (performDedupe) {
      dedupedRecords = (List>) JavaWriteHelper.newInstance()
          .combineOnCondition(config.shouldCombineBeforeInsert(), inputRecords, targetParallelism, table);
    }

    final List> repartitionedRecords =
        (List>) partitioner.repartitionRecords(dedupedRecords, targetParallelism);

    String fileIdPrefix;
    if (partitioner instanceof JavaHoodieMetadataBulkInsertPartitioner) {
      fileIdPrefix = partitioner.getFileIdPfx(0);
    } else {
      FileIdPrefixProvider fileIdPrefixProvider = (FileIdPrefixProvider) ReflectionUtils.loadClass(
          config.getFileIdPrefixProviderClassName(),
          new TypedProperties(config.getProps()));
      fileIdPrefix = fileIdPrefixProvider.createFilePrefix("");
    }

    List writeStatuses = new ArrayList<>();

    new JavaLazyInsertIterable<>(repartitionedRecords.iterator(), true,
        config, instantTime, table,
        fileIdPrefix, table.getTaskContextSupplier(),
        // Always get the first WriteHandleFactory, as there is only a single data partition for hudi java engine.
        (WriteHandleFactory) partitioner.getWriteHandleFactory(0).orElse(writeHandleFactory)).forEachRemaining(writeStatuses::addAll);

    return writeStatuses;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy