org.apache.hudi.config.HoodieIndexConfig Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.config;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.engine.EngineType;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.index.HoodieIndex;
import javax.annotation.concurrent.Immutable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;
import static org.apache.hudi.config.HoodieHBaseIndexConfig.GET_BATCH_SIZE;
import static org.apache.hudi.config.HoodieHBaseIndexConfig.PUT_BATCH_SIZE;
import static org.apache.hudi.config.HoodieHBaseIndexConfig.TABLENAME;
import static org.apache.hudi.config.HoodieHBaseIndexConfig.ZKPORT;
import static org.apache.hudi.config.HoodieHBaseIndexConfig.ZKQUORUM;
/**
* Indexing related config.
*/
@Immutable
@ConfigClassProperty(name = "Index Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control indexing behavior, "
+ "which tags incoming records as either inserts or updates to older records.")
public class HoodieIndexConfig extends HoodieConfig {
public static final ConfigProperty INDEX_TYPE = ConfigProperty
.key("hoodie.index.type")
.noDefaultValue()
.withDocumentation("Type of index to use. Default is Bloom filter. "
+ "Possible options are [BLOOM | GLOBAL_BLOOM |SIMPLE | GLOBAL_SIMPLE | INMEMORY | HBASE]. "
+ "Bloom filters removes the dependency on a external system "
+ "and is stored in the footer of the Parquet Data Files");
public static final ConfigProperty INDEX_CLASS_NAME = ConfigProperty
.key("hoodie.index.class")
.defaultValue("")
.withDocumentation("Full path of user-defined index class and must be a subclass of HoodieIndex class. "
+ "It will take precedence over the hoodie.index.type configuration if specified");
// ***** Bloom Index configs *****
public static final ConfigProperty BLOOM_FILTER_NUM_ENTRIES_VALUE = ConfigProperty
.key("hoodie.index.bloom.num_entries")
.defaultValue("60000")
.withDocumentation("Only applies if index type is BLOOM. "
+ "This is the number of entries to be stored in the bloom filter. "
+ "The rationale for the default: Assume the maxParquetFileSize is 128MB and averageRecordSize is 1kb and "
+ "hence we approx a total of 130K records in a file. The default (60000) is roughly half of this approximation. "
+ "Warning: Setting this very low, will generate a lot of false positives and index lookup "
+ "will have to scan a lot more files than it has to and setting this to a very high number will "
+ "increase the size every base file linearly (roughly 4KB for every 50000 entries). "
+ "This config is also used with DYNAMIC bloom filter which determines the initial size for the bloom.");
public static final ConfigProperty BLOOM_FILTER_FPP_VALUE = ConfigProperty
.key("hoodie.index.bloom.fpp")
.defaultValue("0.000000001")
.withDocumentation("Only applies if index type is BLOOM. "
+ "Error rate allowed given the number of entries. This is used to calculate how many bits should be "
+ "assigned for the bloom filter and the number of hash functions. This is usually set very low (default: 0.000000001), "
+ "we like to tradeoff disk space for lower false positives. "
+ "If the number of entries added to bloom filter exceeds the configured value (hoodie.index.bloom.num_entries), "
+ "then this fpp may not be honored.");
public static final ConfigProperty BLOOM_INDEX_PARALLELISM = ConfigProperty
.key("hoodie.bloom.index.parallelism")
.defaultValue("0")
.withDocumentation("Only applies if index type is BLOOM. "
+ "This is the amount of parallelism for index lookup, which involves a shuffle. "
+ "By default, this is auto computed based on input workload characteristics.");
public static final ConfigProperty BLOOM_INDEX_PRUNE_BY_RANGES = ConfigProperty
.key("hoodie.bloom.index.prune.by.ranges")
.defaultValue("true")
.withDocumentation("Only applies if index type is BLOOM. "
+ "When true, range information from files to leveraged speed up index lookups. Particularly helpful, "
+ "if the key has a monotonously increasing prefix, such as timestamp. "
+ "If the record key is completely random, it is better to turn this off, since range pruning will only "
+ " add extra overhead to the index lookup.");
public static final ConfigProperty BLOOM_INDEX_USE_CACHING = ConfigProperty
.key("hoodie.bloom.index.use.caching")
.defaultValue("true")
.withDocumentation("Only applies if index type is BLOOM."
+ "When true, the input RDD will cached to speed up index lookup by reducing IO "
+ "for computing parallelism or affected partitions");
public static final ConfigProperty BLOOM_INDEX_TREE_BASED_FILTER = ConfigProperty
.key("hoodie.bloom.index.use.treebased.filter")
.defaultValue("true")
.withDocumentation("Only applies if index type is BLOOM. "
+ "When true, interval tree based file pruning optimization is enabled. "
+ "This mode speeds-up file-pruning based on key ranges when compared with the brute-force mode");
// TODO: On by default. Once stable, we will remove the other mode.
public static final ConfigProperty BLOOM_INDEX_BUCKETIZED_CHECKING = ConfigProperty
.key("hoodie.bloom.index.bucketized.checking")
.defaultValue("true")
.withDocumentation("Only applies if index type is BLOOM. "
+ "When true, bucketized bloom filtering is enabled. "
+ "This reduces skew seen in sort based bloom index lookup");
public static final ConfigProperty BLOOM_FILTER_TYPE = ConfigProperty
.key("hoodie.bloom.index.filter.type")
.defaultValue(BloomFilterTypeCode.DYNAMIC_V0.name())
.withDocumentation("Filter type used. Default is BloomFilterTypeCode.DYNAMIC_V0. "
+ "Available values are [BloomFilterTypeCode.SIMPLE , BloomFilterTypeCode.DYNAMIC_V0]. "
+ "Dynamic bloom filters auto size themselves based on number of keys.");
public static final ConfigProperty BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES = ConfigProperty
.key("hoodie.bloom.index.filter.dynamic.max.entries")
.defaultValue("100000")
.withDocumentation("The threshold for the maximum number of keys to record in a dynamic Bloom filter row. "
+ "Only applies if filter type is BloomFilterTypeCode.DYNAMIC_V0.");
public static final ConfigProperty SIMPLE_INDEX_USE_CACHING = ConfigProperty
.key("hoodie.simple.index.use.caching")
.defaultValue("true")
.withDocumentation("Only applies if index type is SIMPLE. "
+ "When true, the incoming writes will cached to speed up index lookup by reducing IO "
+ "for computing parallelism or affected partitions");
public static final ConfigProperty SIMPLE_INDEX_PARALLELISM = ConfigProperty
.key("hoodie.simple.index.parallelism")
.defaultValue("50")
.withDocumentation("Only applies if index type is SIMPLE. "
+ "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
public static final ConfigProperty GLOBAL_SIMPLE_INDEX_PARALLELISM = ConfigProperty
.key("hoodie.global.simple.index.parallelism")
.defaultValue("100")
.withDocumentation("Only applies if index type is GLOBAL_SIMPLE. "
+ "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
// 1B bloom filter checks happen in 250 seconds. 500ms to read a bloom filter.
// 10M checks in 2500ms, thus amortizing the cost of reading bloom filter across partitions.
public static final ConfigProperty BLOOM_INDEX_KEYS_PER_BUCKET = ConfigProperty
.key("hoodie.bloom.index.keys.per.bucket")
.defaultValue("10000000")
.withDocumentation("Only applies if bloomIndexBucketizedChecking is enabled and index type is bloom. "
+ "This configuration controls the “bucket” size which tracks the number of record-key checks made against "
+ "a single file and is the unit of work allocated to each partition performing bloom filter lookup. "
+ "A higher value would amortize the fixed cost of reading a bloom filter to memory.");
public static final ConfigProperty BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE = ConfigProperty
.key("hoodie.bloom.index.input.storage.level")
.defaultValue("MEMORY_AND_DISK_SER")
.withDocumentation("Only applies when #bloomIndexUseCaching is set. Determine what level of persistence is used to cache input RDDs. "
+ "Refer to org.apache.spark.storage.StorageLevel for different values");
public static final ConfigProperty SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE = ConfigProperty
.key("hoodie.simple.index.input.storage.level")
.defaultValue("MEMORY_AND_DISK_SER")
.withDocumentation("Only applies when #simpleIndexUseCaching is set. Determine what level of persistence is used to cache input RDDs. "
+ "Refer to org.apache.spark.storage.StorageLevel for different values");
/**
* Only applies if index type is GLOBAL_BLOOM.
*
* When set to true, an update to a record with a different partition from its existing one
* will insert the record to the new partition and delete it from the old partition.
*
* When set to false, a record will be updated to the old partition.
*/
public static final ConfigProperty BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE = ConfigProperty
.key("hoodie.bloom.index.update.partition.path")
.defaultValue("true")
.withDocumentation("Only applies if index type is GLOBAL_BLOOM. "
+ "When set to true, an update including the partition path of a record that already exists will result in "
+ "inserting the incoming record into the new partition and deleting the original record in the old partition. "
+ "When set to false, the original record will only be updated in the old partition");
public static final ConfigProperty SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE = ConfigProperty
.key("hoodie.simple.index.update.partition.path")
.defaultValue("true")
.withDocumentation("Similar to " + BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE + ", but for simple index.");
/**
* Deprecated configs. These are now part of {@link HoodieHBaseIndexConfig}.
*/
@Deprecated
public static final String HBASE_ZKQUORUM_PROP = ZKQUORUM.key();
@Deprecated
public static final String HBASE_ZKPORT_PROP = ZKPORT.key();
@Deprecated
public static final String HBASE_ZK_ZNODEPARENT = HoodieHBaseIndexConfig.ZK_NODE_PATH.key();
@Deprecated
public static final String HBASE_TABLENAME_PROP = TABLENAME.key();
@Deprecated
public static final String HBASE_GET_BATCH_SIZE_PROP = GET_BATCH_SIZE.key();
@Deprecated
public static final String HBASE_PUT_BATCH_SIZE_PROP = PUT_BATCH_SIZE.key();
@Deprecated
public static final String DEFAULT_HBASE_BATCH_SIZE = "100";
/** @deprecated Use {@link #INDEX_TYPE} and its methods instead */
@Deprecated
public static final String INDEX_TYPE_PROP = INDEX_TYPE.key();
/**
* @deprecated Use {@link #INDEX_CLASS_NAME} and its methods instead
*/
@Deprecated
public static final String INDEX_CLASS_PROP = INDEX_CLASS_NAME.key();
/**
* @deprecated Use {@link #INDEX_CLASS_NAME} and its methods instead
*/
@Deprecated
public static final String DEFAULT_INDEX_CLASS = INDEX_CLASS_NAME.defaultValue();
/**
* @deprecated Use {@link #BLOOM_FILTER_NUM_ENTRIES_VALUE} and its methods instead
*/
@Deprecated
public static final String BLOOM_FILTER_NUM_ENTRIES = BLOOM_FILTER_NUM_ENTRIES_VALUE.key();
/**
* @deprecated Use {@link #BLOOM_FILTER_NUM_ENTRIES_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = BLOOM_FILTER_NUM_ENTRIES_VALUE.defaultValue();
/**
* @deprecated Use {@link #BLOOM_FILTER_FPP_VALUE} and its methods instead
*/
@Deprecated
public static final String BLOOM_FILTER_FPP = BLOOM_FILTER_FPP_VALUE.key();
/**
* @deprecated Use {@link #BLOOM_FILTER_FPP_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_FILTER_FPP = BLOOM_FILTER_FPP_VALUE.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_PARALLELISM} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_PARALLELISM_PROP = BLOOM_INDEX_PARALLELISM.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_PARALLELISM} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = BLOOM_INDEX_PARALLELISM.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_PRUNE_BY_RANGES} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = BLOOM_INDEX_PRUNE_BY_RANGES.key();
/** @deprecated Use {@link #BLOOM_INDEX_PRUNE_BY_RANGES} and its methods instead */
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = BLOOM_INDEX_PRUNE_BY_RANGES.defaultValue();
/** @deprecated Use {@link #BLOOM_INDEX_USE_CACHING} and its methods instead */
@Deprecated
public static final String BLOOM_INDEX_USE_CACHING_PROP = BLOOM_INDEX_USE_CACHING.key();
/** @deprecated Use {@link #BLOOM_INDEX_USE_CACHING} and its methods instead */
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = BLOOM_INDEX_USE_CACHING.defaultValue();
/** @deprecated Use {@link #BLOOM_INDEX_TREE_BASED_FILTER} and its methods instead */
@Deprecated
public static final String BLOOM_INDEX_TREE_BASED_FILTER_PROP = BLOOM_INDEX_TREE_BASED_FILTER.key();
/** @deprecated Use {@link #BLOOM_INDEX_TREE_BASED_FILTER} and its methods instead */
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER = BLOOM_INDEX_TREE_BASED_FILTER.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_BUCKETIZED_CHECKING} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_BUCKETIZED_CHECKING_PROP = BLOOM_INDEX_BUCKETIZED_CHECKING.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_BUCKETIZED_CHECKING} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_BUCKETIZED_CHECKING = BLOOM_INDEX_BUCKETIZED_CHECKING.defaultValue();
/**
* @deprecated Use {@link #BLOOM_FILTER_TYPE} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_FILTER_TYPE = BLOOM_FILTER_TYPE.key();
/**
* @deprecated Use {@link #BLOOM_FILTER_TYPE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_FILTER_TYPE = BLOOM_FILTER_TYPE.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES} and its methods instead
*/
@Deprecated
public static final String HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES = BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES} and its methods instead
*/
@Deprecated
public static final String DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES = BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.defaultValue();
/**
* @deprecated Use {@link #SIMPLE_INDEX_USE_CACHING} and its methods instead
*/
@Deprecated
public static final String SIMPLE_INDEX_USE_CACHING_PROP = SIMPLE_INDEX_USE_CACHING.key();
/**
* @deprecated Use {@link #SIMPLE_INDEX_USE_CACHING} and its methods instead
*/
@Deprecated
public static final String DEFAULT_SIMPLE_INDEX_USE_CACHING = SIMPLE_INDEX_USE_CACHING.defaultValue();
/** @deprecated Use {@link #SIMPLE_INDEX_PARALLELISM} and its methods instead */
@Deprecated
public static final String SIMPLE_INDEX_PARALLELISM_PROP = SIMPLE_INDEX_PARALLELISM.key();
/** @deprecated Use {@link #SIMPLE_INDEX_PARALLELISM} and its methods instead */
@Deprecated
public static final String DEFAULT_SIMPLE_INDEX_PARALLELISM = SIMPLE_INDEX_PARALLELISM.defaultValue();
/** @deprecated Use {@link #GLOBAL_SIMPLE_INDEX_PARALLELISM} and its methods instead */
@Deprecated
public static final String GLOBAL_SIMPLE_INDEX_PARALLELISM_PROP = GLOBAL_SIMPLE_INDEX_PARALLELISM.key();
/**
* @deprecated Use {@link #GLOBAL_SIMPLE_INDEX_PARALLELISM} and its methods instead
*/
@Deprecated
public static final String DEFAULT_GLOBAL_SIMPLE_INDEX_PARALLELISM = GLOBAL_SIMPLE_INDEX_PARALLELISM.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_KEYS_PER_BUCKET} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_KEYS_PER_BUCKET_PROP = BLOOM_INDEX_KEYS_PER_BUCKET.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_KEYS_PER_BUCKET} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_KEYS_PER_BUCKET = BLOOM_INDEX_KEYS_PER_BUCKET.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE.defaultValue();
/**
* @deprecated Use {@link #SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE} and its methods instead
*/
@Deprecated
public static final String SIMPLE_INDEX_INPUT_STORAGE_LEVEL = SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE.key();
/**
* @deprecated Use {@link #SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_SIMPLE_INDEX_INPUT_STORAGE_LEVEL = SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE.defaultValue();
/**
* @deprecated Use {@link #BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE} and its methods instead
*/
@Deprecated
public static final String BLOOM_INDEX_UPDATE_PARTITION_PATH = BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE.key();
/**
* @deprecated Use {@link #BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_BLOOM_INDEX_UPDATE_PARTITION_PATH = BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE.defaultValue();
/**
* @deprecated Use {@link #SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE} and its methods instead
*/
@Deprecated
public static final String SIMPLE_INDEX_UPDATE_PARTITION_PATH = SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE.key();
/**
* @deprecated Use {@link #SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_SIMPLE_INDEX_UPDATE_PARTITION_PATH = SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE.defaultValue();
private EngineType engineType;
/**
* Use Spark engine by default.
*/
private HoodieIndexConfig() {
this(EngineType.SPARK);
}
private HoodieIndexConfig(EngineType engineType) {
super();
this.engineType = engineType;
}
public static HoodieIndexConfig.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private EngineType engineType = EngineType.SPARK;
private final HoodieIndexConfig hoodieIndexConfig = new HoodieIndexConfig();
public Builder fromFile(File propertiesFile) throws IOException {
try (FileReader reader = new FileReader(propertiesFile)) {
this.hoodieIndexConfig.getProps().load(reader);
return this;
}
}
public Builder fromProperties(Properties props) {
this.hoodieIndexConfig.getProps().putAll(props);
return this;
}
public Builder withIndexType(HoodieIndex.IndexType indexType) {
hoodieIndexConfig.setValue(INDEX_TYPE, indexType.name());
return this;
}
public Builder withIndexClass(String indexClass) {
hoodieIndexConfig.setValue(INDEX_CLASS_NAME, indexClass);
return this;
}
public Builder withHBaseIndexConfig(HoodieHBaseIndexConfig hBaseIndexConfig) {
hoodieIndexConfig.getProps().putAll(hBaseIndexConfig.getProps());
return this;
}
public Builder bloomFilterNumEntries(int numEntries) {
hoodieIndexConfig.setValue(BLOOM_FILTER_NUM_ENTRIES_VALUE, String.valueOf(numEntries));
return this;
}
public Builder bloomFilterFPP(double fpp) {
hoodieIndexConfig.setValue(BLOOM_FILTER_FPP_VALUE, String.valueOf(fpp));
return this;
}
public Builder bloomIndexParallelism(int parallelism) {
hoodieIndexConfig.setValue(BLOOM_INDEX_PARALLELISM, String.valueOf(parallelism));
return this;
}
public Builder bloomIndexPruneByRanges(boolean pruneRanges) {
hoodieIndexConfig.setValue(BLOOM_INDEX_PRUNE_BY_RANGES, String.valueOf(pruneRanges));
return this;
}
public Builder bloomIndexUseCaching(boolean useCaching) {
hoodieIndexConfig.setValue(BLOOM_INDEX_USE_CACHING, String.valueOf(useCaching));
return this;
}
public Builder bloomIndexTreebasedFilter(boolean useTreeFilter) {
hoodieIndexConfig.setValue(BLOOM_INDEX_TREE_BASED_FILTER, String.valueOf(useTreeFilter));
return this;
}
public Builder bloomIndexBucketizedChecking(boolean bucketizedChecking) {
hoodieIndexConfig.setValue(BLOOM_INDEX_BUCKETIZED_CHECKING, String.valueOf(bucketizedChecking));
return this;
}
public Builder bloomIndexKeysPerBucket(int keysPerBucket) {
hoodieIndexConfig.setValue(BLOOM_INDEX_KEYS_PER_BUCKET, String.valueOf(keysPerBucket));
return this;
}
public Builder withBloomIndexInputStorageLevel(String level) {
hoodieIndexConfig.setValue(BLOOM_INDEX_INPUT_STORAGE_LEVEL_VALUE, level);
return this;
}
public Builder withBloomIndexUpdatePartitionPath(boolean updatePartitionPath) {
hoodieIndexConfig.setValue(BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE, String.valueOf(updatePartitionPath));
return this;
}
public Builder withSimpleIndexParallelism(int parallelism) {
hoodieIndexConfig.setValue(SIMPLE_INDEX_PARALLELISM, String.valueOf(parallelism));
return this;
}
public Builder simpleIndexUseCaching(boolean useCaching) {
hoodieIndexConfig.setValue(SIMPLE_INDEX_USE_CACHING, String.valueOf(useCaching));
return this;
}
public Builder withSimpleIndexInputStorageLevel(String level) {
hoodieIndexConfig.setValue(SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE, level);
return this;
}
public Builder withGlobalSimpleIndexParallelism(int parallelism) {
hoodieIndexConfig.setValue(GLOBAL_SIMPLE_INDEX_PARALLELISM, String.valueOf(parallelism));
return this;
}
public Builder withGlobalSimpleIndexUpdatePartitionPath(boolean updatePartitionPath) {
hoodieIndexConfig.setValue(SIMPLE_INDEX_UPDATE_PARTITION_PATH_ENABLE, String.valueOf(updatePartitionPath));
return this;
}
public Builder withEngineType(EngineType engineType) {
this.engineType = engineType;
return this;
}
public HoodieIndexConfig build() {
hoodieIndexConfig.setDefaultValue(INDEX_TYPE, getDefaultIndexType(engineType));
hoodieIndexConfig.setDefaults(HoodieIndexConfig.class.getName());
// Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
HoodieIndex.IndexType.valueOf(hoodieIndexConfig.getString(INDEX_TYPE));
return hoodieIndexConfig;
}
private String getDefaultIndexType(EngineType engineType) {
switch (engineType) {
case SPARK:
return HoodieIndex.IndexType.BLOOM.name();
case FLINK:
case JAVA:
return HoodieIndex.IndexType.INMEMORY.name();
default:
throw new HoodieNotSupportedException("Unsupported engine " + engineType);
}
}
public EngineType getEngineType() {
return engineType;
}
}
}