All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.config.HoodieIndexConfig Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.config;

import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.DefaultHoodieConfig;
import org.apache.hudi.index.HoodieIndex;

import javax.annotation.concurrent.Immutable;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;

/**
 * Indexing related config.
 */
@Immutable
public class HoodieIndexConfig extends DefaultHoodieConfig {

  public static final String INDEX_TYPE_PROP = "hoodie.index.type";
  public static final String DEFAULT_INDEX_TYPE = HoodieIndex.IndexType.BLOOM.name();

  public static final String INDEX_CLASS_PROP = "hoodie.index.class";
  public static final String DEFAULT_INDEX_CLASS = "";

  // ***** Bloom Index configs *****
  public static final String BLOOM_FILTER_NUM_ENTRIES = "hoodie.index.bloom.num_entries";
  public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = "60000";
  public static final String BLOOM_FILTER_FPP = "hoodie.index.bloom.fpp";
  public static final String DEFAULT_BLOOM_FILTER_FPP = "0.000000001";
  public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
  // Disable explicit bloom index parallelism setting by default - hoodie auto computes
  public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
  public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges";
  public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
  public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
  public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
  public static final String BLOOM_INDEX_TREE_BASED_FILTER_PROP = "hoodie.bloom.index.use.treebased.filter";
  public static final String DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER = "true";
  // TODO: On by default. Once stable, we will remove the other mode.
  public static final String BLOOM_INDEX_BUCKETIZED_CHECKING_PROP = "hoodie.bloom.index.bucketized.checking";
  public static final String DEFAULT_BLOOM_INDEX_BUCKETIZED_CHECKING = "true";
  public static final String BLOOM_INDEX_FILTER_TYPE = "hoodie.bloom.index.filter.type";
  public static final String DEFAULT_BLOOM_INDEX_FILTER_TYPE = BloomFilterTypeCode.SIMPLE.name();
  public static final String HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES = "hoodie.bloom.index.filter.dynamic.max.entries";
  public static final String DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES = "100000";
  public static final String SIMPLE_INDEX_USE_CACHING_PROP = "hoodie.simple.index.use.caching";
  public static final String DEFAULT_SIMPLE_INDEX_USE_CACHING = "true";
  public static final String SIMPLE_INDEX_PARALLELISM_PROP = "hoodie.simple.index.parallelism";
  public static final String DEFAULT_SIMPLE_INDEX_PARALLELISM = "50";
  public static final String GLOBAL_SIMPLE_INDEX_PARALLELISM_PROP = "hoodie.global.simple.index.parallelism";
  public static final String DEFAULT_GLOBAL_SIMPLE_INDEX_PARALLELISM = "100";

  // 1B bloom filter checks happen in 250 seconds. 500ms to read a bloom filter.
  // 10M checks in 2500ms, thus amortizing the cost of reading bloom filter across partitions.
  public static final String BLOOM_INDEX_KEYS_PER_BUCKET_PROP = "hoodie.bloom.index.keys.per.bucket";
  public static final String DEFAULT_BLOOM_INDEX_KEYS_PER_BUCKET = "10000000";

  // ***** HBase Index Configs *****
  public static final String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum";
  public static final String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport";
  public static final String HBASE_ZK_ZNODEPARENT = "hoodie.index.hbase.zknode.path";
  public static final String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table";
  public static final String HBASE_GET_BATCH_SIZE_PROP = "hoodie.index.hbase.get.batch.size";
  public static final String HBASE_PUT_BATCH_SIZE_PROP = "hoodie.index.hbase.put.batch.size";
  public static final String DEFAULT_HBASE_BATCH_SIZE = "100";


  public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = "hoodie.bloom.index.input.storage.level";
  public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
  public static final String SIMPLE_INDEX_INPUT_STORAGE_LEVEL = "hoodie.simple.index.input.storage.level";
  public static final String DEFAULT_SIMPLE_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";

  /**
   * Only applies if index type is GLOBAL_BLOOM.
   * 

* When set to true, an update to a record with a different partition from its existing one * will insert the record to the new partition and delete it from the old partition. *

* When set to false, a record will be updated to the old partition. */ public static final String BLOOM_INDEX_UPDATE_PARTITION_PATH = "hoodie.bloom.index.update.partition.path"; public static final String DEFAULT_BLOOM_INDEX_UPDATE_PARTITION_PATH = "false"; public static final String SIMPLE_INDEX_UPDATE_PARTITION_PATH = "hoodie.simple.index.update.partition.path"; public static final String DEFAULT_SIMPLE_INDEX_UPDATE_PARTITION_PATH = "false"; private HoodieIndexConfig(Properties props) { super(props); } public static HoodieIndexConfig.Builder newBuilder() { return new Builder(); } public static class Builder { private final Properties props = new Properties(); public Builder fromFile(File propertiesFile) throws IOException { try (FileReader reader = new FileReader(propertiesFile)) { this.props.load(reader); return this; } } public Builder fromProperties(Properties props) { this.props.putAll(props); return this; } public Builder withIndexType(HoodieIndex.IndexType indexType) { props.setProperty(INDEX_TYPE_PROP, indexType.name()); return this; } public Builder withIndexClass(String indexClass) { props.setProperty(INDEX_CLASS_PROP, indexClass); return this; } public Builder withHBaseIndexConfig(HoodieHBaseIndexConfig hBaseIndexConfig) { props.putAll(hBaseIndexConfig.getProps()); return this; } public Builder bloomFilterNumEntries(int numEntries) { props.setProperty(BLOOM_FILTER_NUM_ENTRIES, String.valueOf(numEntries)); return this; } public Builder bloomFilterFPP(double fpp) { props.setProperty(BLOOM_FILTER_FPP, String.valueOf(fpp)); return this; } public Builder hbaseZkQuorum(String zkString) { props.setProperty(HBASE_ZKQUORUM_PROP, zkString); return this; } public Builder hbaseZkPort(int port) { props.setProperty(HBASE_ZKPORT_PROP, String.valueOf(port)); return this; } public Builder hbaseZkZnodeParent(String zkZnodeParent) { props.setProperty(HBASE_ZK_ZNODEPARENT, zkZnodeParent); return this; } public Builder hbaseTableName(String tableName) { props.setProperty(HBASE_TABLENAME_PROP, tableName); return this; } public Builder bloomIndexParallelism(int parallelism) { props.setProperty(BLOOM_INDEX_PARALLELISM_PROP, String.valueOf(parallelism)); return this; } public Builder bloomIndexPruneByRanges(boolean pruneRanges) { props.setProperty(BLOOM_INDEX_PRUNE_BY_RANGES_PROP, String.valueOf(pruneRanges)); return this; } public Builder bloomIndexUseCaching(boolean useCaching) { props.setProperty(BLOOM_INDEX_USE_CACHING_PROP, String.valueOf(useCaching)); return this; } public Builder bloomIndexTreebasedFilter(boolean useTreeFilter) { props.setProperty(BLOOM_INDEX_TREE_BASED_FILTER_PROP, String.valueOf(useTreeFilter)); return this; } public Builder bloomIndexBucketizedChecking(boolean bucketizedChecking) { props.setProperty(BLOOM_INDEX_BUCKETIZED_CHECKING_PROP, String.valueOf(bucketizedChecking)); return this; } public Builder bloomIndexKeysPerBucket(int keysPerBucket) { props.setProperty(BLOOM_INDEX_KEYS_PER_BUCKET_PROP, String.valueOf(keysPerBucket)); return this; } public Builder withBloomIndexInputStorageLevel(String level) { props.setProperty(BLOOM_INDEX_INPUT_STORAGE_LEVEL, level); return this; } public Builder withBloomIndexUpdatePartitionPath(boolean updatePartitionPath) { props.setProperty(BLOOM_INDEX_UPDATE_PARTITION_PATH, String.valueOf(updatePartitionPath)); return this; } public Builder withSimpleIndexParallelism(int parallelism) { props.setProperty(SIMPLE_INDEX_PARALLELISM_PROP, String.valueOf(parallelism)); return this; } public Builder simpleIndexUseCaching(boolean useCaching) { props.setProperty(SIMPLE_INDEX_USE_CACHING_PROP, String.valueOf(useCaching)); return this; } public Builder withSimpleIndexInputStorageLevel(String level) { props.setProperty(SIMPLE_INDEX_INPUT_STORAGE_LEVEL, level); return this; } public Builder withGlobalSimpleIndexParallelism(int parallelism) { props.setProperty(GLOBAL_SIMPLE_INDEX_PARALLELISM_PROP, String.valueOf(parallelism)); return this; } public Builder withGlobalSimpleIndexUpdatePartitionPath(boolean updatePartitionPath) { props.setProperty(SIMPLE_INDEX_UPDATE_PARTITION_PATH, String.valueOf(updatePartitionPath)); return this; } public HoodieIndexConfig build() { HoodieIndexConfig config = new HoodieIndexConfig(props); setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE); setDefaultOnCondition(props, !props.containsKey(INDEX_CLASS_PROP), INDEX_CLASS_PROP, DEFAULT_INDEX_CLASS); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES), BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP), BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP), BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP), BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP), BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL), BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_UPDATE_PARTITION_PATH), BLOOM_INDEX_UPDATE_PARTITION_PATH, DEFAULT_BLOOM_INDEX_UPDATE_PARTITION_PATH); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_TREE_BASED_FILTER_PROP), BLOOM_INDEX_TREE_BASED_FILTER_PROP, DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_BUCKETIZED_CHECKING_PROP), BLOOM_INDEX_BUCKETIZED_CHECKING_PROP, DEFAULT_BLOOM_INDEX_BUCKETIZED_CHECKING); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_KEYS_PER_BUCKET_PROP), BLOOM_INDEX_KEYS_PER_BUCKET_PROP, DEFAULT_BLOOM_INDEX_KEYS_PER_BUCKET); setDefaultOnCondition(props, !props.contains(BLOOM_INDEX_FILTER_TYPE), BLOOM_INDEX_FILTER_TYPE, DEFAULT_BLOOM_INDEX_FILTER_TYPE); setDefaultOnCondition(props, !props.contains(HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES), HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES, DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES); setDefaultOnCondition(props, !props.containsKey(SIMPLE_INDEX_PARALLELISM_PROP), SIMPLE_INDEX_PARALLELISM_PROP, DEFAULT_SIMPLE_INDEX_PARALLELISM); setDefaultOnCondition(props, !props.containsKey(SIMPLE_INDEX_USE_CACHING_PROP), SIMPLE_INDEX_USE_CACHING_PROP, DEFAULT_SIMPLE_INDEX_USE_CACHING); setDefaultOnCondition(props, !props.containsKey(SIMPLE_INDEX_INPUT_STORAGE_LEVEL), SIMPLE_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_SIMPLE_INDEX_INPUT_STORAGE_LEVEL); setDefaultOnCondition(props, !props.containsKey(GLOBAL_SIMPLE_INDEX_PARALLELISM_PROP), GLOBAL_SIMPLE_INDEX_PARALLELISM_PROP, DEFAULT_GLOBAL_SIMPLE_INDEX_PARALLELISM); setDefaultOnCondition(props, !props.containsKey(SIMPLE_INDEX_UPDATE_PARTITION_PATH), SIMPLE_INDEX_UPDATE_PARTITION_PATH, DEFAULT_SIMPLE_INDEX_UPDATE_PARTITION_PATH); // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP)); return config; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy