All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.config.HoodieArchivalConfig Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;

import javax.annotation.concurrent.Immutable;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;

/**
 * Archival related config.
 */
@Immutable
@ConfigClassProperty(name = "Archival Configs",
    groupName = ConfigGroups.Names.WRITE_CLIENT,
    description = "Configurations that control archival.")
public class HoodieArchivalConfig extends HoodieConfig {

  public static final ConfigProperty AUTO_ARCHIVE = ConfigProperty
      .key("hoodie.archive.automatic")
      .defaultValue("true")
      .markAdvanced()
      .withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
          + " to archive commits if we cross a maximum value of commits."
          + " It's recommended to enable this, to ensure number of active commits is bounded.");

  public static final ConfigProperty ASYNC_ARCHIVE = ConfigProperty
      .key("hoodie.archive.async")
      .defaultValue("false")
      .markAdvanced()
      .sinceVersion("0.11.0")
      .withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
          + "When turned on runs archiver async with writing, which can speed up overall write performance.");

  public static final ConfigProperty MAX_COMMITS_TO_KEEP = ConfigProperty
      .key("hoodie.keep.max.commits")
      .defaultValue("30")
      .withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to"
          + " keep the metadata overhead constant, even as the table size grows."
          + " This config controls the maximum number of instants to retain in the active timeline. ");

  public static final ConfigProperty DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
      .key("hoodie.archive.delete.parallelism")
      .defaultValue(100)
      .markAdvanced()
      .withDocumentation("When performing archival operation, Hudi needs to delete the files of "
          + "the archived instants in the active timeline in .hoodie folder. The file deletion "
          + "also happens after merging small archived files into larger ones if enabled. "
          + "This config limits the Spark parallelism for deleting files in both cases, i.e., "
          + "parallelism of deleting files does not go above the configured value and the "
          + "parallelism is the number of files to delete if smaller than the "
          + "configured value.  If you see that the file deletion in archival operation is slow "
          + "because of the limited parallelism, you can increase this to tune the performance.");

  public static final ConfigProperty MIN_COMMITS_TO_KEEP = ConfigProperty
      .key("hoodie.keep.min.commits")
      .defaultValue("20")
      .withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
          + " instants to retain in the active timeline.");

  public static final ConfigProperty COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
      .key("hoodie.commits.archival.batch")
      .defaultValue(String.valueOf(10))
      .markAdvanced()
      .withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
          + " archive log. This config controls such archival batch size.");

  public static final ConfigProperty TIMELINE_COMPACTION_BATCH_SIZE = ConfigProperty
      .key("hoodie.timeline.compaction.batch.size")
      .defaultValue(10)
      .markAdvanced()
      .withDocumentation("The number of small files to compact at once.");

  public static final ConfigProperty ARCHIVE_BEYOND_SAVEPOINT = ConfigProperty
      .key("hoodie.archive.beyond.savepoint")
      .defaultValue(false)
      .markAdvanced()
      .sinceVersion("0.12.0")
      .withDocumentation("If enabled, archival will proceed beyond savepoint, skipping savepoint commits."
          + " If disabled, archival will stop at the earliest savepoint commit.");

  /**
   * @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
   */
  @Deprecated
  public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
  /**
   * @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
   */
  @Deprecated
  public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
  /**
   * @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
   */
  @Deprecated
  public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
  /**
   * @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
   */
  @Deprecated
  private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
  /**
   * @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
   */
  @Deprecated
  private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
  /**
   * @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
   */
  @Deprecated
  private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();

  private HoodieArchivalConfig() {
    super();
  }

  public static HoodieArchivalConfig.Builder newBuilder() {
    return new HoodieArchivalConfig.Builder();
  }

  public static class Builder {

    private final HoodieArchivalConfig archivalConfig = new HoodieArchivalConfig();

    public HoodieArchivalConfig.Builder fromFile(File propertiesFile) throws IOException {
      try (FileReader reader = new FileReader(propertiesFile)) {
        this.archivalConfig.getProps().load(reader);
        return this;
      }
    }

    public HoodieArchivalConfig.Builder fromProperties(Properties props) {
      this.archivalConfig.getProps().putAll(props);
      return this;
    }

    public HoodieArchivalConfig.Builder withAutoArchive(Boolean autoArchive) {
      archivalConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
      return this;
    }

    public HoodieArchivalConfig.Builder withAsyncArchive(Boolean asyncArchive) {
      archivalConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
      return this;
    }

    public HoodieArchivalConfig.Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
      archivalConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
      archivalConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
      return this;
    }

    public HoodieArchivalConfig.Builder withTimelineCompactionBatchSize(int number) {
      archivalConfig.setValue(TIMELINE_COMPACTION_BATCH_SIZE, String.valueOf(number));
      return this;
    }

    public HoodieArchivalConfig.Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
      archivalConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
      return this;
    }

    public HoodieArchivalConfig.Builder withCommitsArchivalBatchSize(int batchSize) {
      archivalConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
      return this;
    }

    public Builder withArchiveBeyondSavepoint(boolean archiveBeyondSavepoint) {
      archivalConfig.setValue(ARCHIVE_BEYOND_SAVEPOINT, String.valueOf(archiveBeyondSavepoint));
      return this;
    }

    public HoodieArchivalConfig build() {
      archivalConfig.setDefaults(HoodieArchivalConfig.class.getName());
      return archivalConfig;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy