All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.index.bucket.HoodieBucketIndex Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.index.bucket;

import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.utils.LazyIterableIterator;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieTable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.List;

import static org.apache.hudi.index.HoodieIndexUtils.tagAsNewRecordIfNeeded;

/**
 * Hash indexing mechanism.
 */
public abstract class HoodieBucketIndex extends HoodieIndex {

  private static final Logger LOG = LoggerFactory.getLogger(HoodieBucketIndex.class);

  protected final int numBuckets;
  protected final List indexKeyFields;

  public HoodieBucketIndex(HoodieWriteConfig config) {
    super(config);

    this.numBuckets = config.getBucketIndexNumBuckets();
    this.indexKeyFields = Arrays.asList(config.getBucketIndexHashField().split(","));
    LOG.info("Use bucket index, numBuckets = " + numBuckets + ", indexFields: " + indexKeyFields);
  }

  @Override
  public HoodieData updateLocation(HoodieData writeStatuses,
                                                HoodieEngineContext context,
                                                HoodieTable hoodieTable)
      throws HoodieIndexException {
    return writeStatuses;
  }

  @Override
  public  HoodieData> tagLocation(
      HoodieData> records, HoodieEngineContext context,
      HoodieTable hoodieTable)
      throws HoodieIndexException {
    // Get bucket location mapper for the given partitions
    List partitions = records.map(HoodieRecord::getPartitionPath).distinct().collectAsList();
    LOG.info("Get BucketIndexLocationMapper for partitions: " + partitions);
    BucketIndexLocationMapper mapper = getLocationMapper(hoodieTable, partitions);

    return records.mapPartitions(iterator ->
        new LazyIterableIterator, HoodieRecord>(iterator) {
          @Override
          protected HoodieRecord computeNext() {
            // TODO maybe batch the operation to improve performance
            HoodieRecord record = inputItr.next();
            Option loc = mapper.getRecordLocation(record.getKey());
            return tagAsNewRecordIfNeeded(record, loc);
          }
        },
        false
    );
  }

  @Override
  public boolean requiresTagging(WriteOperationType operationType) {
    switch (operationType) {
      case INSERT:
      case INSERT_OVERWRITE:
      case UPSERT:
      case DELETE:
      case DELETE_PREPPED:
      case BULK_INSERT:
        return true;
      default:
        return false;
    }
  }

  @Override
  public boolean rollbackCommit(String instantTime) {
    return true;
  }

  @Override
  public boolean isGlobal() {
    return false;
  }

  @Override
  public boolean canIndexLogFiles() {
    return true;
  }

  @Override
  public boolean isImplicitWithStorage() {
    return true;
  }

  public int getNumBuckets() {
    return numBuckets;
  }

  /**
   * Get a location mapper for the given table & partitionPath
   */
  protected abstract BucketIndexLocationMapper getLocationMapper(HoodieTable table, List partitionPath);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy