All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.index.bucket.HoodieConsistentBucketIndex Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.index.bucket;

import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.utils.LazyIterableIterator;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.ConsistentHashingNode;
import org.apache.hudi.common.model.HoodieConsistentHashingMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIndexException;
import org.apache.hudi.table.HoodieTable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.apache.hudi.index.HoodieIndexUtils.tagAsNewRecordIfNeeded;

/**
 * Consistent hashing bucket index implementation, with auto-adjust bucket number.
 * NOTE: bucket resizing is triggered by clustering.
 */
public class HoodieConsistentBucketIndex extends HoodieBucketIndex {

  private static final Logger LOG = LoggerFactory.getLogger(HoodieConsistentBucketIndex.class);

  public HoodieConsistentBucketIndex(HoodieWriteConfig config) {
    super(config);
  }

  @Override
  public HoodieData updateLocation(HoodieData writeStatuses,
                                                HoodieEngineContext context,
                                                HoodieTable hoodieTable)
      throws HoodieIndexException {
    throw new HoodieIndexException("Consistent hashing index does not support update location without the instant parameter");
  }

  /**
   * Do nothing.
   * A failed write may create a hashing metadata for a partition. In this case, we still do nothing when rolling back
   * the failed write. Because the hashing metadata created by a writer must have 00000000000000 timestamp and can be viewed
   * as the initialization of a partition rather than as a part of the failed write.
   */
  @Override
  public boolean rollbackCommit(String instantTime) {
    return true;
  }

  @Override
  public  HoodieData> tagLocation(
      HoodieData> records, HoodieEngineContext context,
      HoodieTable hoodieTable)
      throws HoodieIndexException {
    // Get bucket location mapper for the given partitions
    List partitions = records.map(HoodieRecord::getPartitionPath).distinct().collectAsList();
    LOG.info("Get BucketIndexLocationMapper for partitions: " + partitions);
    ConsistentBucketIndexLocationMapper mapper = new ConsistentBucketIndexLocationMapper(hoodieTable, partitions);

    return records.mapPartitions(iterator ->
            new LazyIterableIterator, HoodieRecord>(iterator) {
      @Override
      protected HoodieRecord computeNext() {
        // TODO maybe batch the operation to improve performance
        HoodieRecord record = inputItr.next();
        Option loc = mapper.getRecordLocation(record.getKey());
        return tagAsNewRecordIfNeeded(record, loc);
      }
      }, false);
  }

  public class ConsistentBucketIndexLocationMapper implements Serializable {

    /**
     * Mapping from partitionPath -> bucket identifier
     */
    private final Map partitionToIdentifier;

    public ConsistentBucketIndexLocationMapper(HoodieTable table, List partitions) {
      // TODO maybe parallel
      partitionToIdentifier = partitions.stream().collect(Collectors.toMap(p -> p, p -> {
        HoodieConsistentHashingMetadata metadata = ConsistentBucketIndexUtils.loadOrCreateMetadata(table, p, getNumBuckets());
        return new ConsistentBucketIdentifier(metadata);
      }));
    }

    public Option getRecordLocation(HoodieKey key) {
      String partitionPath = key.getPartitionPath();
      ConsistentHashingNode node = partitionToIdentifier.get(partitionPath).getBucket(key, indexKeyFields);
      if (!StringUtils.isNullOrEmpty(node.getFileIdPrefix())) {
        // Dynamic Bucket Index doesn't need the instant time of the latest file group.
        // We add suffix 0 here to the file uuid, following the naming convention, i.e., fileId = [uuid]_[numWrites]
        return Option.of(new HoodieRecordLocation(null, FSUtils.createNewFileId(node.getFileIdPrefix(), 0)));
      }

      LOG.error("Consistent hashing node has no file group, partition: {}, meta: {}, record_key: {}",
          partitionPath, partitionToIdentifier.get(partitionPath).getMetadata().getFilename(), key.toString());
      throw new HoodieIndexException("Failed to getBucket as hashing node has no file group");
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy