All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.config.HoodieIndexingConfig Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.common.config;

import org.apache.hudi.common.model.HoodieIndexDefinition;
import org.apache.hudi.common.util.BinaryUtil;
import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;

import javax.annotation.concurrent.Immutable;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.time.Instant;
import java.util.Properties;
import java.util.Set;
import java.util.function.BiConsumer;

import static org.apache.hudi.common.util.ConfigUtils.fetchConfigs;
import static org.apache.hudi.common.util.ConfigUtils.recoverIfNeeded;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;

@Immutable
@ConfigClassProperty(name = "Common Index Configs",
    groupName = ConfigGroups.Names.WRITE_CLIENT,
    subGroupName = ConfigGroups.SubGroupNames.INDEX,
    areCommonConfigs = true,
    description = "")
public class HoodieIndexingConfig extends HoodieConfig {

  public static final String INDEX_DEFINITION_FILE = "index.properties";
  public static final String INDEX_DEFINITION_FILE_BACKUP = "index.properties.backup";
  public static final ConfigProperty INDEX_NAME = ConfigProperty
      .key("hoodie.expression.index.name")
      .noDefaultValue()
      .sinceVersion("1.0.0")
      .withDocumentation("Name of the expression index. This is also used for the partition name in the metadata table.");

  public static final ConfigProperty INDEX_TYPE = ConfigProperty
      .key("hoodie.expression.index.type")
      .defaultValue(MetadataPartitionType.COLUMN_STATS.name())
      .withValidValues(
          MetadataPartitionType.COLUMN_STATS.name(),
          MetadataPartitionType.BLOOM_FILTERS.name(),
          MetadataPartitionType.SECONDARY_INDEX.name()
      )
      .sinceVersion("1.0.0")
      .withDocumentation("Type of the expression index. Default is `column_stats` if there are no functions and expressions in the command. "
          + "Valid options could be BITMAP, COLUMN_STATS, LUCENE, etc. If index_type is not provided, "
          + "and there are functions or expressions in the command then a expression index using column stats will be created.");

  public static final ConfigProperty INDEX_FUNCTION = ConfigProperty
      .key("hoodie.expression.index.function")
      .noDefaultValue()
      .sinceVersion("1.0.0")
      .withDocumentation("Function to be used for building the expression index.");

  public static final ConfigProperty INDEX_DEFINITION_CHECKSUM = ConfigProperty
      .key("hoodie.table.checksum")
      .noDefaultValue()
      .sinceVersion("1.0.0")
      .withDocumentation("Index definition checksum is used to guard against partial writes in HDFS. "
          + "It is added as the last entry in index.properties and then used to validate while reading table config.");

  private static final String INDEX_DEFINITION_CHECKSUM_FORMAT = "%s.%s"; // .

  public HoodieIndexingConfig() {
    super();
  }

  public static void create(HoodieStorage storage, StoragePath metadataFolder, Properties properties)
      throws IOException {
    if (!storage.exists(metadataFolder)) {
      storage.createDirectory(metadataFolder);
    }
    HoodieConfig hoodieConfig = new HoodieConfig(properties);
    StoragePath propertyPath = new StoragePath(metadataFolder, INDEX_DEFINITION_FILE);
    try (OutputStream outputStream = storage.create(propertyPath)) {
      if (!hoodieConfig.contains(INDEX_NAME)) {
        throw new IllegalArgumentException(INDEX_NAME.key() + " property needs to be specified");
      }
      hoodieConfig.setDefaultValue(INDEX_TYPE);
      storeProperties(hoodieConfig.getProps(), outputStream);
    }
  }

  public static void update(HoodieStorage storage, StoragePath metadataFolder,
                            Properties updatedProps) {
    modify(storage, metadataFolder, updatedProps, ConfigUtils::upsertProperties);
  }

  public static void delete(HoodieStorage storage, StoragePath metadataFolder,
                            Set deletedProps) {
    Properties props = new Properties();
    deletedProps.forEach(p -> props.setProperty(p, ""));
    modify(storage, metadataFolder, props, ConfigUtils::deleteProperties);
  }

  public static void recover(HoodieStorage storage, StoragePath metadataFolder)
      throws IOException {
    StoragePath cfgPath = new StoragePath(metadataFolder, INDEX_DEFINITION_FILE);
    StoragePath backupCfgPath = new StoragePath(metadataFolder, INDEX_DEFINITION_FILE_BACKUP);
    recoverIfNeeded(storage, cfgPath, backupCfgPath);
  }

  private static void modify(HoodieStorage storage, StoragePath metadataFolder,
                             Properties modifyProps, BiConsumer modifyFn) {
    StoragePath cfgPath = new StoragePath(metadataFolder, INDEX_DEFINITION_FILE);
    StoragePath backupCfgPath = new StoragePath(metadataFolder, INDEX_DEFINITION_FILE_BACKUP);
    try {
      // 0. do any recovery from prior attempts.
      recoverIfNeeded(storage, cfgPath, backupCfgPath);

      // 1. Read the existing config
      TypedProperties props =
          fetchConfigs(storage, metadataFolder, INDEX_DEFINITION_FILE,
              INDEX_DEFINITION_FILE_BACKUP, MAX_READ_RETRIES, READ_RETRY_DELAY_MSEC);

      // 2. backup the existing properties.
      try (OutputStream out = storage.create(backupCfgPath, false)) {
        storeProperties(props, out);
      }

      // 3. delete the properties file, reads will go to the backup, until we are done.
      storage.deleteFile(cfgPath);

      // 4. Upsert and save back.
      String checksum;
      try (OutputStream out = storage.create(cfgPath, true)) {
        modifyFn.accept(props, modifyProps);
        checksum = storeProperties(props, out);
      }

      // 4. verify and remove backup.
      try (InputStream in = storage.open(cfgPath)) {
        props.clear();
        props.load(in);
        if (!props.containsKey(INDEX_DEFINITION_CHECKSUM.key())
            || !props.getProperty(INDEX_DEFINITION_CHECKSUM.key()).equals(checksum)) {
          // delete the properties file and throw exception indicating update failure
          // subsequent writes will recover and update, reads will go to the backup until then
          storage.deleteFile(cfgPath);
          throw new HoodieIOException("Checksum property missing or does not match.");
        }
      }

      // 5. delete the backup properties file
      storage.deleteFile(backupCfgPath);
    } catch (IOException e) {
      throw new HoodieIOException("Error updating table configs.", e);
    }
  }

  private static Properties getOrderedPropertiesWithTableChecksum(Properties props) {
    Properties orderedProps = new OrderedProperties(props);
    orderedProps.put(INDEX_DEFINITION_CHECKSUM.key(), String.valueOf(generateChecksum(props)));
    return orderedProps;
  }

  /**
   * Write the properties to the given output stream and return the table checksum.
   *
   * @param props        - properties to be written
   * @param outputStream - output stream to which properties will be written
   * @return return the table checksum
   */
  private static String storeProperties(Properties props, OutputStream outputStream)
      throws IOException {
    final String checksum;
    if (isValidChecksum(props)) {
      checksum = props.getProperty(INDEX_DEFINITION_CHECKSUM.key());
      props.store(outputStream, "Updated at " + Instant.now());
    } else {
      Properties propsWithChecksum = getOrderedPropertiesWithTableChecksum(props);
      propsWithChecksum.store(outputStream, "Properties saved on " + Instant.now());
      checksum = propsWithChecksum.getProperty(INDEX_DEFINITION_CHECKSUM.key());
      props.setProperty(INDEX_DEFINITION_CHECKSUM.key(), checksum);
    }
    return checksum;
  }

  private static boolean isValidChecksum(Properties props) {
    return props.containsKey(INDEX_DEFINITION_CHECKSUM.key()) && validateChecksum(props);
  }

  public static long generateChecksum(Properties props) {
    if (!props.containsKey(INDEX_NAME.key())) {
      throw new IllegalArgumentException(INDEX_NAME.key() + " property needs to be specified");
    }
    String table = props.getProperty(INDEX_NAME.key());
    String database = props.getProperty(INDEX_TYPE.key(), "");
    return BinaryUtil.generateChecksum(getUTF8Bytes(String.format(INDEX_DEFINITION_CHECKSUM_FORMAT, database, table)));
  }

  public static boolean validateChecksum(Properties props) {
    return Long.parseLong(props.getProperty(INDEX_DEFINITION_CHECKSUM.key())) == generateChecksum(props);
  }

  public static HoodieIndexingConfig.Builder newBuilder() {
    return new HoodieIndexingConfig.Builder();
  }

  public static class Builder {
    private final HoodieIndexingConfig expressionIndexConfig = new HoodieIndexingConfig();

    public Builder fromFile(File propertiesFile) throws IOException {
      try (FileReader reader = new FileReader(propertiesFile)) {
        this.expressionIndexConfig.getProps().load(reader);
        return this;
      }
    }

    public Builder fromProperties(Properties props) {
      this.expressionIndexConfig.getProps().putAll(props);
      return this;
    }

    public Builder fromIndexConfig(HoodieIndexingConfig expressionIndexConfig) {
      this.expressionIndexConfig.getProps().putAll(expressionIndexConfig.getProps());
      return this;
    }

    public Builder withIndexName(String indexName) {
      expressionIndexConfig.setValue(INDEX_NAME, indexName);
      return this;
    }

    public Builder withIndexType(String indexType) {
      expressionIndexConfig.setValue(INDEX_TYPE, indexType);
      return this;
    }

    public Builder withIndexFunction(String indexFunction) {
      expressionIndexConfig.setValue(INDEX_FUNCTION, indexFunction);
      return this;
    }

    public HoodieIndexingConfig build() {
      expressionIndexConfig.setDefaults(HoodieIndexingConfig.class.getName());
      return expressionIndexConfig;
    }
  }

  public static HoodieIndexingConfig copy(HoodieIndexingConfig expressionIndexConfig) {
    return newBuilder().fromIndexConfig(expressionIndexConfig).build();
  }

  public static HoodieIndexingConfig merge(HoodieIndexingConfig expressionIndexConfig1, HoodieIndexingConfig expressionIndexConfig2) {
    return newBuilder().fromIndexConfig(expressionIndexConfig1).fromIndexConfig(expressionIndexConfig2).build();
  }

  public static HoodieIndexingConfig fromIndexDefinition(HoodieIndexDefinition indexDefinition) {
    return newBuilder().withIndexName(indexDefinition.getIndexName())
        .withIndexType(indexDefinition.getIndexType())
        .withIndexFunction(indexDefinition.getIndexFunction())
        .build();
  }

  public String getIndexName() {
    return getString(INDEX_NAME);
  }

  public String getIndexType() {
    return getString(INDEX_TYPE);
  }

  public String getIndexFunction() {
    return getString(INDEX_FUNCTION);
  }

  public boolean isIndexUsingBloomFilter() {
    return getIndexType().equalsIgnoreCase(MetadataPartitionType.BLOOM_FILTERS.name());
  }

  public boolean isIndexUsingColumnStats() {
    return getIndexType().equalsIgnoreCase(MetadataPartitionType.COLUMN_STATS.name());
  }

  public boolean isIndexUsingRecordIndex() {
    return getIndexType().equalsIgnoreCase(MetadataPartitionType.RECORD_INDEX.name());
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy