All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.client.BaseHoodieTableServiceClient Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.client;

import org.apache.hudi.async.AsyncArchiveService;
import org.apache.hudi.async.AsyncCleanerService;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
import org.apache.hudi.client.heartbeat.HeartbeatUtils;
import org.apache.hudi.client.timeline.HoodieTimelineArchiver;
import org.apache.hudi.common.HoodiePendingRollbackInfo;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.ActionType;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.TableServiceType;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.ClusteringUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieClusteringException;
import org.apache.hudi.exception.HoodieCompactionException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieLogCompactException;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.compact.CompactHelpers;
import org.apache.hudi.table.action.rollback.RollbackUtils;
import org.apache.hudi.table.marker.WriteMarkersFactory;

import com.codahale.metrics.Timer;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
import static org.apache.hudi.metadata.HoodieTableMetadata.isMetadataTable;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.isIndexingCommit;

/**
 * Base class for all shared logic between table service clients regardless of engine.
 * @param  The {@link HoodieTable} implementation's input type
 * @param  The {@link HoodieTable} implementation's output type
 * @param  The {@link BaseHoodieWriteClient} implementation's output type (differs in case of spark)
 */
public abstract class BaseHoodieTableServiceClient extends BaseHoodieClient implements RunsTableService {

  private static final Logger LOG = LoggerFactory.getLogger(BaseHoodieTableServiceClient.class);

  protected transient Timer.Context compactionTimer;
  protected transient Timer.Context clusteringTimer;
  protected transient Timer.Context logCompactionTimer;

  protected transient AsyncCleanerService asyncCleanerService;
  protected transient AsyncArchiveService asyncArchiveService;

  protected Set pendingInflightAndRequestedInstants;

  protected BaseHoodieTableServiceClient(HoodieEngineContext context,
                                         HoodieWriteConfig clientConfig,
                                         Option timelineService) {
    super(context, clientConfig, timelineService);
  }

  protected void startAsyncCleanerService(BaseHoodieWriteClient writeClient) {
    if (this.asyncCleanerService == null) {
      this.asyncCleanerService = AsyncCleanerService.startAsyncCleaningIfEnabled(writeClient);
    } else {
      this.asyncCleanerService.start(null);
    }
  }

  protected void startAsyncArchiveService(BaseHoodieWriteClient writeClient) {
    if (this.asyncArchiveService == null) {
      this.asyncArchiveService = AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient);
    } else {
      this.asyncArchiveService.start(null);
    }
  }

  protected void asyncClean() {
    AsyncCleanerService.waitForCompletion(asyncCleanerService);
  }

  protected void asyncArchive() {
    AsyncArchiveService.waitForCompletion(asyncArchiveService);
  }

  protected void setTableServiceTimer(WriteOperationType operationType) {
    switch (operationType) {
      case CLUSTER:
        clusteringTimer = metrics.getClusteringCtx();
        break;
      case COMPACT:
        compactionTimer = metrics.getCompactionCtx();
        break;
      case LOG_COMPACT:
        logCompactionTimer = metrics.getLogCompactionCtx();
        break;
      default:
    }
  }

  protected void setPendingInflightAndRequestedInstants(Set pendingInflightAndRequestedInstants) {
    this.pendingInflightAndRequestedInstants = pendingInflightAndRequestedInstants;
  }

  /**
   * Any pre-commit actions like conflict resolution goes here.
   *
   * @param metadata commit metadata for which pre commit is being invoked.
   */
  protected void preCommit(HoodieCommitMetadata metadata) {
    // Create a Hoodie table after startTxn which encapsulated the commits and files visible.
    // Important to create this after the lock to ensure the latest commits show up in the timeline without need for reload
    HoodieTable table = createTable(config, hadoopConf);
    resolveWriteConflict(table, metadata, this.pendingInflightAndRequestedInstants);
  }

  /**
   * Performs a compaction operation on a table, serially before or after an insert/upsert action.
   * Scheduling and execution is done inline.
   */
  protected Option inlineCompaction(Option> extraMetadata) {
    Option compactionInstantTimeOpt = inlineScheduleCompaction(extraMetadata);
    compactionInstantTimeOpt.ifPresent(compactInstantTime -> {
      // inline compaction should auto commit as the user is never given control
      compact(compactInstantTime, true);
    });
    return compactionInstantTimeOpt;
  }

  private void inlineCompaction(HoodieTable table, Option> extraMetadata) {
    if (shouldDelegateToTableServiceManager(config, ActionType.compaction)) {
      scheduleCompaction(extraMetadata);
    } else {
      runAnyPendingCompactions(table);
      inlineCompaction(extraMetadata);
    }
  }

  /**
   * Ensures compaction instant is in expected state and performs Log Compaction for the workload stored in instant-time.s
   *
   * @param logCompactionInstantTime Compaction Instant Time
   * @return Collection of Write Status
   */
  protected HoodieWriteMetadata logCompact(String logCompactionInstantTime, boolean shouldComplete) {
    HoodieTable table = createTable(config, context.getStorageConf().unwrapAs(Configuration.class));

    // Check if a commit or compaction instant with a greater timestamp is on the timeline.
    // If an instant is found then abort log compaction, since it is no longer needed.
    Set actions = CollectionUtils.createSet(COMMIT_ACTION, COMPACTION_ACTION);
    Option compactionInstantWithGreaterTimestamp =
        Option.fromJavaOptional(table.getActiveTimeline().getInstantsAsStream()
            .filter(hoodieInstant -> actions.contains(hoodieInstant.getAction()))
            .filter(hoodieInstant -> HoodieTimeline.compareTimestamps(hoodieInstant.getTimestamp(),
                GREATER_THAN, logCompactionInstantTime))
            .findFirst());
    if (compactionInstantWithGreaterTimestamp.isPresent()) {
      throw new HoodieLogCompactException(String.format("Cannot log compact since a compaction instant with greater "
          + "timestamp exists. Instant details %s", compactionInstantWithGreaterTimestamp.get()));
    }

    HoodieTimeline pendingLogCompactionTimeline = table.getActiveTimeline().filterPendingLogCompactionTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getLogCompactionInflightInstant(logCompactionInstantTime);
    if (pendingLogCompactionTimeline.containsInstant(inflightInstant)) {
      LOG.info("Found Log compaction inflight file. Rolling back the commit and exiting.");
      table.rollbackInflightLogCompaction(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
      table.getMetaClient().reloadActiveTimeline();
      throw new HoodieException("Execution is aborted since it found an Inflight logcompaction,"
          + "log compaction plans are mutable plans, so reschedule another logcompaction.");
    }
    logCompactionTimer = metrics.getLogCompactionCtx();
    WriteMarkersFactory.get(config.getMarkersType(), table, logCompactionInstantTime);
    HoodieWriteMetadata writeMetadata = table.logCompact(context, logCompactionInstantTime);
    HoodieWriteMetadata logCompactionMetadata = convertToOutputMetadata(writeMetadata);
    if (shouldComplete && logCompactionMetadata.getCommitMetadata().isPresent()) {
      completeLogCompaction(logCompactionMetadata.getCommitMetadata().get(), table, logCompactionInstantTime);
    }
    return logCompactionMetadata;
  }

  /**
   * Performs a log compaction operation on a table, serially before or after an insert/upsert action.
   */
  protected Option inlineLogCompact(Option> extraMetadata) {
    Option logCompactionInstantTimeOpt = scheduleLogCompaction(extraMetadata);
    logCompactionInstantTimeOpt.ifPresent(logCompactInstantTime -> {
      // inline log compaction should auto commit as the user is never given control
      logCompact(logCompactInstantTime, true);
    });
    return logCompactionInstantTimeOpt;
  }

  protected void runAnyPendingCompactions(HoodieTable table) {
    table.getActiveTimeline().getWriteTimeline().filterPendingCompactionTimeline().getInstants()
        .forEach(instant -> {
          LOG.info("Running previously failed inflight compaction at instant {}", instant);
          compact(instant.getTimestamp(), true);
        });
  }

  protected void runAnyPendingLogCompactions(HoodieTable table) {
    table.getActiveTimeline().getWriteTimeline().filterPendingLogCompactionTimeline().getInstantsAsStream()
        .forEach(instant -> {
          LOG.info("Running previously failed inflight log compaction at instant {}", instant);
          logCompact(instant.getTimestamp(), true);
        });
  }

  /**
   * Schedules compaction inline.
   *
   * @param extraMetadata extra metadata to be used.
   * @return compaction instant if scheduled.
   */
  protected Option inlineScheduleCompaction(Option> extraMetadata) {
    return scheduleCompaction(extraMetadata);
  }

  /**
   * Schedules a new compaction instant.
   *
   * @param extraMetadata Extra Metadata to be stored
   */
  public Option scheduleCompaction(Option> extraMetadata) throws HoodieIOException {
    String instantTime = createNewInstantTime();
    return scheduleCompactionAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty();
  }

  /**
   * Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time.
   *
   * @param compactionInstantTime Compaction Instant Time
   * @return Collection of Write Status
   */
  protected HoodieWriteMetadata compact(String compactionInstantTime, boolean shouldComplete) {
    HoodieTable table = createTable(config, context.getStorageConf().unwrapAs(Configuration.class));
    HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
    if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
      table.rollbackInflightCompaction(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
      table.getMetaClient().reloadActiveTimeline();
    }
    compactionTimer = metrics.getCompactionCtx();
    HoodieWriteMetadata writeMetadata = table.compact(context, compactionInstantTime);
    HoodieWriteMetadata compactionMetadata = convertToOutputMetadata(writeMetadata);
    if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) {
      completeCompaction(compactionMetadata.getCommitMetadata().get(), table, compactionInstantTime);
    }
    return compactionMetadata;
  }

  /**
   * Commit a compaction operation. Allow passing additional meta-data to be stored in commit instant file.
   *
   * @param compactionInstantTime Compaction Instant Time
   * @param metadata              All the metadata that gets stored along with a commit
   * @param extraMetadata         Extra Metadata to be stored
   */
  public void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata, Option> extraMetadata) {
    extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata));
    completeCompaction(metadata, createTable(config, context.getStorageConf().unwrapAs(Configuration.class)), compactionInstantTime);
  }

  /**
   * Commit Compaction and track metrics.
   */
  protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable table, String compactionCommitTime) {
    this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction: " + config.getTableName());
    List writeStats = metadata.getWriteStats();
    handleWriteErrors(writeStats, TableServiceType.COMPACT);
    final HoodieInstant compactionInstant = HoodieTimeline.getCompactionInflightInstant(compactionCommitTime);
    try {
      this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
      finalizeWrite(table, compactionCommitTime, writeStats);
      // commit to data table after committing to metadata table.
      writeTableMetadata(table, compactionCommitTime, metadata, context.emptyHoodieData());
      LOG.info("Committing Compaction {}", compactionCommitTime);
      LOG.debug("Compaction {} finished with result: {}", compactionCommitTime, metadata);
      CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
    } finally {
      this.txnManager.endTransaction(Option.of(compactionInstant));
      releaseResources(compactionCommitTime);
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, compactionCommitTime)
        .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (compactionTimer != null) {
      long durationInMs = metrics.getDurationInMs(compactionTimer.stop());
      HoodieActiveTimeline.parseDateFromInstantTimeSafely(compactionCommitTime).ifPresent(parsedInstant ->
          metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, COMPACTION_ACTION)
      );
    }
    LOG.info("Compacted successfully on commit {}", compactionCommitTime);
  }

  /**
   * Schedules a new log compaction instant.
   *
   * @param extraMetadata Extra Metadata to be stored
   */
  public Option scheduleLogCompaction(Option> extraMetadata) throws HoodieIOException {
    String instantTime = createNewInstantTime();
    return scheduleLogCompactionAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty();
  }

  /**
   * Schedules a new log compaction instant with passed-in instant time.
   *
   * @param instantTime   Log Compaction Instant Time
   * @param extraMetadata Extra Metadata to be stored
   */
  public boolean scheduleLogCompactionAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException {
    return scheduleTableService(instantTime, extraMetadata, TableServiceType.LOG_COMPACT).isPresent();
  }

  /**
   * Performs Log Compaction for the workload stored in instant-time.
   *
   * @param logCompactionInstantTime Log Compaction Instant Time
   * @return Collection of WriteStatus to inspect errors and counts
   */
  public HoodieWriteMetadata logCompact(String logCompactionInstantTime) {
    return logCompact(logCompactionInstantTime, config.shouldAutoCommit());
  }

  /**
   * Commit Log Compaction and track metrics.
   */
  protected void completeLogCompaction(HoodieCommitMetadata metadata, HoodieTable table, String logCompactionCommitTime) {
    this.context.setJobStatus(this.getClass().getSimpleName(), "Collect log compaction write status and commit compaction");
    List writeStats = metadata.getWriteStats();
    handleWriteErrors(writeStats, TableServiceType.LOG_COMPACT);
    final HoodieInstant logCompactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.LOG_COMPACTION_ACTION, logCompactionCommitTime);
    try {
      this.txnManager.beginTransaction(Option.of(logCompactionInstant), Option.empty());
      preCommit(metadata);
      finalizeWrite(table, logCompactionCommitTime, writeStats);
      // commit to data table after committing to metadata table.
      writeTableMetadata(table, logCompactionCommitTime, metadata, context.emptyHoodieData());
      LOG.info("Committing Log Compaction {}", logCompactionCommitTime);
      LOG.debug("Log Compaction {} finished with result {}", logCompactionCommitTime, metadata);
      CompactHelpers.getInstance().completeInflightLogCompaction(table, logCompactionCommitTime, metadata);
    } finally {
      this.txnManager.endTransaction(Option.of(logCompactionInstant));
      releaseResources(logCompactionCommitTime);
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, logCompactionCommitTime)
        .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (logCompactionTimer != null) {
      long durationInMs = metrics.getDurationInMs(logCompactionTimer.stop());
      HoodieActiveTimeline.parseDateFromInstantTimeSafely(logCompactionCommitTime).ifPresent(parsedInstant ->
          metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, HoodieActiveTimeline.LOG_COMPACTION_ACTION)
      );
    }
    LOG.info("Log Compacted successfully on commit {}", logCompactionCommitTime);
  }

  /**
   * Schedules a new compaction instant with passed-in instant time.
   *
   * @param instantTime   Compaction Instant Time
   * @param extraMetadata Extra Metadata to be stored
   */
  public boolean scheduleCompactionAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException {
    return scheduleTableService(instantTime, extraMetadata, TableServiceType.COMPACT).isPresent();
  }

  /**
   * Schedules a new clustering instant.
   *
   * @param extraMetadata Extra Metadata to be stored
   */
  public Option scheduleClustering(Option> extraMetadata) throws HoodieIOException {
    String instantTime = createNewInstantTime();
    return scheduleClusteringAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty();
  }

  /**
   * Schedules a new clustering instant with passed-in instant time.
   *
   * @param instantTime   clustering Instant Time
   * @param extraMetadata Extra Metadata to be stored
   */
  public boolean scheduleClusteringAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException {
    return scheduleTableService(instantTime, extraMetadata, TableServiceType.CLUSTER).isPresent();
  }

  /**
   * Ensures clustering instant is in expected state and performs clustering for the plan stored in metadata.
   *
   * @param clusteringInstant Clustering Instant Time
   * @return Collection of Write Status
   */
  public HoodieWriteMetadata cluster(String clusteringInstant, boolean shouldComplete) {
    HoodieTable table = createTable(config, context.getStorageConf().unwrapAs(Configuration.class));
    HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
    if (pendingClusteringTimeline.containsInstant(inflightInstant)) {
      if (pendingClusteringTimeline.isPendingClusterInstant(inflightInstant.getTimestamp())) {
        table.rollbackInflightClustering(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
        table.getMetaClient().reloadActiveTimeline();
      } else {
        throw new HoodieClusteringException("Non clustering replace-commit inflight at timestamp " + clusteringInstant);
      }
    }
    clusteringTimer = metrics.getClusteringCtx();
    LOG.info("Starting clustering at {}", clusteringInstant);
    HoodieWriteMetadata writeMetadata = table.cluster(context, clusteringInstant);
    HoodieWriteMetadata clusteringMetadata = convertToOutputMetadata(writeMetadata);
    // Validation has to be done after cloning. if not, it could result in referencing the write status twice which means clustering could get executed twice.
    validateClusteringCommit(clusteringMetadata, clusteringInstant, table);

    // Publish file creation metrics for clustering.
    if (config.isMetricsOn()) {
      clusteringMetadata.getWriteStats()
          .ifPresent(hoodieWriteStats -> hoodieWriteStats.stream()
              .filter(hoodieWriteStat -> hoodieWriteStat.getRuntimeStats() != null)
              .map(hoodieWriteStat -> hoodieWriteStat.getRuntimeStats().getTotalCreateTime())
              .forEach(metrics::updateClusteringFileCreationMetrics));
    }

    // TODO : Where is shouldComplete used ?
    if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) {
      completeClustering((HoodieReplaceCommitMetadata) clusteringMetadata.getCommitMetadata().get(), table, clusteringInstant, Option.ofNullable(convertToWriteStatus(writeMetadata)));
    }
    return clusteringMetadata;
  }

  public boolean purgePendingClustering(String clusteringInstant) {
    HoodieTable table = createTable(config, context.getStorageConf().unwrapAs(Configuration.class));
    HoodieTimeline pendingClusteringTimeline = table.getActiveTimeline().filterPendingReplaceTimeline();
    HoodieInstant inflightInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringInstant);
    if (pendingClusteringTimeline.containsInstant(inflightInstant)) {
      table.rollbackInflightClustering(inflightInstant, commitToRollback -> getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false), true);
      table.getMetaClient().reloadActiveTimeline();
      return true;
    }
    return false;
  }

  /**
   * Delete expired partition by config.
   *
   * @param instantTime Instant Time for the action
   * @return HoodieWriteMetadata
   */
  public HoodieWriteMetadata managePartitionTTL(String instantTime) {
    HoodieTable table = createTable(config, context.getStorageConf().unwrapAs(Configuration.class));
    return table.managePartitionTTL(context, instantTime);
  }

  protected abstract void validateClusteringCommit(HoodieWriteMetadata clusteringMetadata, String clusteringCommitTime, HoodieTable table);

  protected abstract HoodieWriteMetadata convertToOutputMetadata(HoodieWriteMetadata writeMetadata);

  protected abstract HoodieData convertToWriteStatus(HoodieWriteMetadata writeMetadata);

  private void completeClustering(HoodieReplaceCommitMetadata metadata,
                                  HoodieTable table,
                                  String clusteringCommitTime,
                                  Option> writeStatuses) {
    List writeStats = metadata.getWriteStats();
    handleWriteErrors(writeStats, TableServiceType.CLUSTER);
    final HoodieInstant clusteringInstant = HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime);
    try {
      this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());

      finalizeWrite(table, clusteringCommitTime, writeStats);
      // Only in some cases conflict resolution needs to be performed.
      // So, check if preCommit method that does conflict resolution needs to be triggered.
      if (isPreCommitRequired()) {
        preCommit(metadata);
      }
      // Update table's metadata (table)
      writeTableMetadata(table, clusteringInstant.getTimestamp(), metadata, writeStatuses.orElseGet(context::emptyHoodieData));

      LOG.info("Committing Clustering {}", clusteringCommitTime);
      LOG.debug("Clustering {} finished with result {}", clusteringCommitTime, metadata);

      table.getActiveTimeline().transitionReplaceInflightToComplete(
          false,
          clusteringInstant,
          serializeCommitMetadata(metadata));
    } catch (Exception e) {
      throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
    } finally {
      this.txnManager.endTransaction(Option.of(clusteringInstant));
      releaseResources(clusteringCommitTime);
    }
    WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime)
        .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
    if (clusteringTimer != null) {
      long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
      HoodieActiveTimeline.parseDateFromInstantTimeSafely(clusteringCommitTime).ifPresent(parsedInstant ->
          metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, HoodieActiveTimeline.REPLACE_COMMIT_ACTION)
      );
    }
    LOG.info("Clustering successfully on commit {}", clusteringCommitTime);
  }

  protected void runTableServicesInline(HoodieTable table, HoodieCommitMetadata metadata, Option> extraMetadata) {
    if (!tableServicesEnabled(config)) {
      return;
    }

    if (!config.areAnyTableServicesExecutedInline() && !config.areAnyTableServicesScheduledInline()) {
      return;
    }

    // Do an inline compaction if enabled
    if (config.inlineCompactionEnabled()) {
      metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT.key(), "true");
      inlineCompaction(table, extraMetadata);
    } else {
      metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT.key(), "false");
    }

    // if just inline schedule is enabled
    if (!config.inlineCompactionEnabled() && config.scheduleInlineCompaction()
        && table.getActiveTimeline().getWriteTimeline().filterPendingCompactionTimeline().empty()) {
      // proceed only if there are no pending compactions
      metadata.addMetadata(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), "true");
      inlineScheduleCompaction(extraMetadata);
    }

    // Do an inline log compaction if enabled
    if (config.inlineLogCompactionEnabled()) {
      runAnyPendingLogCompactions(table);
      metadata.addMetadata(HoodieCompactionConfig.INLINE_LOG_COMPACT.key(), "true");
      inlineLogCompact(extraMetadata);
    } else {
      metadata.addMetadata(HoodieCompactionConfig.INLINE_LOG_COMPACT.key(), "false");
    }

    // Do an inline clustering if enabled
    if (config.inlineClusteringEnabled()) {
      metadata.addMetadata(HoodieClusteringConfig.INLINE_CLUSTERING.key(), "true");
      inlineClustering(table, extraMetadata);
    } else {
      metadata.addMetadata(HoodieClusteringConfig.INLINE_CLUSTERING.key(), "false");
    }

    // if just inline schedule is enabled
    if (!config.inlineClusteringEnabled() && config.scheduleInlineClustering()
        && !table.getActiveTimeline().getLastPendingClusterInstant().isPresent()) {
      // proceed only if there are no pending clustering
      metadata.addMetadata(HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING.key(), "true");
      inlineScheduleClustering(extraMetadata);
    }

    //  Do an inline partition ttl management if enabled
    if (config.isInlinePartitionTTLEnable()) {
      String instantTime = createNewInstantTime();
      table.managePartitionTTL(table.getContext(), instantTime);
    }
  }

  /**
   * Schedule table services such as clustering, compaction & cleaning.
   *
   * @param extraMetadata    Metadata to pass onto the scheduled service instant
   * @param tableServiceType Type of table service to schedule
   * @return
   */
  public Option scheduleTableService(String instantTime, Option> extraMetadata,
                                             TableServiceType tableServiceType) {
    // A lock is required to guard against race conditions between an ongoing writer and scheduling a table service.
    final Option inflightInstant = Option.of(new HoodieInstant(HoodieInstant.State.REQUESTED,
        tableServiceType.getAction(), instantTime));
    try {
      this.txnManager.beginTransaction(inflightInstant, Option.empty());
      LOG.info("Scheduling table service {}", tableServiceType);
      return scheduleTableServiceInternal(instantTime, extraMetadata, tableServiceType);
    } finally {
      this.txnManager.endTransaction(inflightInstant);
    }
  }

  protected Option scheduleTableServiceInternal(String instantTime, Option> extraMetadata,
                                                        TableServiceType tableServiceType) {
    if (!tableServicesEnabled(config)) {
      return Option.empty();
    }

    Option option = Option.empty();
    HoodieTable table = createTable(config, hadoopConf);

    switch (tableServiceType) {
      case ARCHIVE:
        LOG.info("Scheduling archiving is not supported. Skipping.");
        break;
      case CLUSTER:
        LOG.info("Scheduling clustering at instant time: {}", instantTime);
        Option clusteringPlan = table
            .scheduleClustering(context, instantTime, extraMetadata);
        option = clusteringPlan.isPresent() ? Option.of(instantTime) : Option.empty();
        break;
      case COMPACT:
        LOG.info("Scheduling compaction at instant time: {}", instantTime);
        Option compactionPlan = table
            .scheduleCompaction(context, instantTime, extraMetadata);
        option = compactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
        break;
      case LOG_COMPACT:
        LOG.info("Scheduling log compaction at instant time: {}", instantTime);
        Option logCompactionPlan = table
            .scheduleLogCompaction(context, instantTime, extraMetadata);
        option = logCompactionPlan.isPresent() ? Option.of(instantTime) : Option.empty();
        break;
      case CLEAN:
        LOG.info("Scheduling cleaning at instant time: {}", instantTime);
        Option cleanerPlan = table
            .scheduleCleaning(context, instantTime, extraMetadata);
        option = cleanerPlan.isPresent() ? Option.of(instantTime) : Option.empty();
        break;
      default:
        throw new IllegalArgumentException("Invalid TableService " + tableServiceType);
    }

    Option instantRange = delegateToTableServiceManager(tableServiceType, table);
    if (instantRange.isPresent()) {
      LOG.info("Delegate instant [{}] to table service manager", instantRange.get());
    }

    return option;
  }

  protected abstract HoodieTable createTable(HoodieWriteConfig config, Configuration hadoopConf);

  /**
   * Executes a clustering plan on a table, serially before or after an insert/upsert action.
   * Schedules and executes clustering inline.
   */
  protected Option inlineClustering(Option> extraMetadata) {
    Option clusteringInstantOpt = inlineScheduleClustering(extraMetadata);
    clusteringInstantOpt.ifPresent(clusteringInstant -> {
      // inline cluster should auto commit as the user is never given control
      cluster(clusteringInstant, true);
    });
    return clusteringInstantOpt;
  }

  private void inlineClustering(HoodieTable table, Option> extraMetadata) {
    if (shouldDelegateToTableServiceManager(config, ActionType.replacecommit)) {
      scheduleClustering(extraMetadata);
    } else {
      runAnyPendingClustering(table);
      inlineClustering(extraMetadata);
    }
  }

  /**
   * Schedules clustering inline.
   *
   * @param extraMetadata extra metadata to use.
   * @return clustering instant if scheduled.
   */
  protected Option inlineScheduleClustering(Option> extraMetadata) {
    return scheduleClustering(extraMetadata);
  }

  protected void runAnyPendingClustering(HoodieTable table) {
    table.getActiveTimeline().filterPendingReplaceTimeline().getInstants().forEach(instant -> {
      Option> instantPlan = ClusteringUtils.getClusteringPlan(table.getMetaClient(), instant);
      if (instantPlan.isPresent()) {
        LOG.info("Running pending clustering at instant {}", instantPlan.get().getLeft());
        cluster(instant.getTimestamp(), true);
      }
    });
  }

  /**
   * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the
   * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be
   * cleaned). This API provides the flexibility to schedule clean instant asynchronously via
   * {@link BaseHoodieTableServiceClient#scheduleTableService(String, Option, TableServiceType)} and disable inline scheduling
   * of clean.
   *
   * @param cleanInstantTime instant time for clean.
   * @param scheduleInline   true if needs to be scheduled inline. false otherwise.
   */
  @Nullable
  @Deprecated
  public HoodieCleanMetadata clean(String cleanInstantTime, boolean scheduleInline, boolean skipLocking) throws HoodieIOException {
    return clean(cleanInstantTime, scheduleInline);
  }

  /**
   * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the
   * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be
   * cleaned). This API provides the flexibility to schedule clean instant asynchronously via
   * {@link BaseHoodieTableServiceClient#scheduleTableService(String, Option, TableServiceType)} and disable inline scheduling
   * of clean.
   *
   * @param cleanInstantTime instant time for clean.
   * @param scheduleInline   true if needs to be scheduled inline. false otherwise.
   */
  @Nullable
  public HoodieCleanMetadata clean(String cleanInstantTime, boolean scheduleInline) throws HoodieIOException {
    if (!tableServicesEnabled(config)) {
      return null;
    }
    final Timer.Context timerContext = metrics.getCleanCtx();
    CleanerUtils.rollbackFailedWrites(config.getFailedWritesCleanPolicy(),
        HoodieTimeline.CLEAN_ACTION, () -> rollbackFailedWrites());

    HoodieTable table = createTable(config, hadoopConf);
    if (config.allowMultipleCleans() || !table.getActiveTimeline().getCleanerTimeline().filterInflightsAndRequested().firstInstant().isPresent()) {
      LOG.info("Cleaner started");
      // proceed only if multiple clean schedules are enabled or if there are no pending cleans.
      if (scheduleInline) {
        scheduleTableServiceInternal(cleanInstantTime, Option.empty(), TableServiceType.CLEAN);
        table.getMetaClient().reloadActiveTimeline();
      }

      if (shouldDelegateToTableServiceManager(config, ActionType.clean)) {
        LOG.warn("Cleaning is not yet supported with Table Service Manager.");
        return null;
      }
    }

    // Proceeds to execute any requested or inflight clean instances in the timeline
    HoodieCleanMetadata metadata = table.clean(context, cleanInstantTime);
    if (timerContext != null && metadata != null) {
      long durationMs = metrics.getDurationInMs(timerContext.stop());
      metrics.updateCleanMetrics(durationMs, metadata.getTotalFilesDeleted());
      LOG.info("Cleaned " + metadata.getTotalFilesDeleted() + " files"
          + " Earliest Retained Instant :" + metadata.getEarliestCommitToRetain()
          + " cleanerElapsedMs" + durationMs);
    }
    releaseResources(cleanInstantTime);
    return metadata;
  }

  /**
   * Trigger archival for the table. This ensures that the number of commits do not explode
   * and keep increasing unbounded over time.
   *
   * @param table table to commit on.
   */
  protected void archive(HoodieTable table) {
    if (!tableServicesEnabled(config)) {
      return;
    }
    try {
      final Timer.Context timerContext = metrics.getArchiveCtx();
      // We cannot have unbounded commit files. Archive commits if we have to archive.

      // Reload table timeline to reflect the latest commits,
      // there are some table services (for e.g, the cleaning) that executed right before the archiving.
      table.getMetaClient().reloadActiveTimeline();
      HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
      int instantsToArchive = archiver.archiveIfRequired(context, true);
      if (timerContext != null) {
        long durationMs = metrics.getDurationInMs(timerContext.stop());
        this.metrics.updateArchiveMetrics(durationMs, instantsToArchive);
      }
    } catch (IOException ioe) {
      throw new HoodieIOException("Failed to archive", ioe);
    }
  }

  /**
   * Get inflight timeline excluding compaction and clustering.
   *
   * @param metaClient
   * @return
   */
  private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTableMetaClient metaClient) {
    HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
    HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
      if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
        Option> instantPlan = ClusteringUtils.getClusteringPlan(metaClient, instant);
        return !instantPlan.isPresent();
      } else {
        return true;
      }
    });
    return inflightTimelineExcludeClusteringCommit;
  }

  protected Option getPendingRollbackInfo(HoodieTableMetaClient metaClient, String commitToRollback) {
    return getPendingRollbackInfo(metaClient, commitToRollback, true);
  }

  public Option getPendingRollbackInfo(HoodieTableMetaClient metaClient, String commitToRollback, boolean ignoreCompactionAndClusteringInstants) {
    return getPendingRollbackInfos(metaClient, ignoreCompactionAndClusteringInstants).getOrDefault(commitToRollback, Option.empty());
  }

  protected Map> getPendingRollbackInfos(HoodieTableMetaClient metaClient) {
    return getPendingRollbackInfos(metaClient, true);
  }

  /**
   * Fetch map of pending commits to be rolled-back to {@link HoodiePendingRollbackInfo}.
   *
   * @param metaClient instance of {@link HoodieTableMetaClient} to use.
   * @return map of pending commits to be rolled-back instants to Rollback Instant and Rollback plan Pair.
   */
  protected Map> getPendingRollbackInfos(HoodieTableMetaClient metaClient, boolean ignoreCompactionAndClusteringInstants) {
    List instants = metaClient.getActiveTimeline().filterPendingRollbackTimeline().getInstants();
    Map> infoMap = new HashMap<>();
    for (HoodieInstant rollbackInstant : instants) {
      HoodieRollbackPlan rollbackPlan;
      try {
        rollbackPlan = RollbackUtils.getRollbackPlan(metaClient, rollbackInstant);
      } catch (Exception e) {
        if (rollbackInstant.isRequested()) {
          LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", deleting the plan since it's in REQUESTED state", e);
          try {
            metaClient.getActiveTimeline().deletePending(rollbackInstant);
          } catch (HoodieIOException he) {
            LOG.warn("Cannot delete " + rollbackInstant, he);
            continue;
          }
        } else {
          // Here we assume that if the rollback is inflight, the rollback plan is intact
          // in instant.rollback.requested.  The exception here can be due to other reasons.
          LOG.warn("Fetching rollback plan failed for " + rollbackInstant + ", skip the plan", e);
        }
        continue;
      }

      try {
        String action = rollbackPlan.getInstantToRollback().getAction();
        String instantToRollback = rollbackPlan.getInstantToRollback().getCommitTime();
        if (ignoreCompactionAndClusteringInstants) {
          if (!HoodieTimeline.COMPACTION_ACTION.equals(action)) {
            boolean isClustering = HoodieTimeline.REPLACE_COMMIT_ACTION.equals(action)
                && ClusteringUtils.getClusteringPlan(metaClient, new HoodieInstant(true, action, instantToRollback)).isPresent();
            if (!isClustering) {
              infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
            }
          }
        } else {
          infoMap.putIfAbsent(instantToRollback, Option.of(new HoodiePendingRollbackInfo(rollbackInstant, rollbackPlan)));
        }
      } catch (Exception e) {
        LOG.warn("Processing rollback plan failed for " + rollbackInstant + ", skip the plan", e);
      }
    }
    return infoMap;
  }

  /**
   * Rolls back the failed delta commits corresponding to the indexing action.
   * 

* TODO(HUDI-5733): This should be cleaned up once the proper fix of rollbacks * in the metadata table is landed. * * @return {@code true} if rollback happens; {@code false} otherwise. */ protected boolean rollbackFailedIndexingCommits() { HoodieTable table = createTable(config, hadoopConf); List instantsToRollback = getFailedIndexingCommitsToRollbackForMetadataTable(table.getMetaClient()); Map> pendingRollbacks = getPendingRollbackInfos(table.getMetaClient()); instantsToRollback.forEach(entry -> pendingRollbacks.putIfAbsent(entry, Option.empty())); rollbackFailedWrites(pendingRollbacks); return !pendingRollbacks.isEmpty(); } private List getFailedIndexingCommitsToRollbackForMetadataTable(HoodieTableMetaClient metaClient) { if (!isMetadataTable(metaClient.getBasePath())) { return Collections.emptyList(); } HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder() .setBasePath(HoodieTableMetadata.getDatasetBasePath(config.getBasePath())) .setConf(metaClient.getStorageConf().newInstance()) .build(); HoodieTimeline dataIndexTimeline = dataMetaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.INDEXING_ACTION)); Stream inflightInstantsStream = metaClient.getCommitsTimeline() .filter(instant -> !instant.isCompleted() && isIndexingCommit(dataIndexTimeline, instant.getTimestamp())) .getInstantsAsStream(); return inflightInstantsStream.filter(instant -> { try { return heartbeatClient.isHeartbeatExpired(instant.getTimestamp()); } catch (IOException io) { throw new HoodieException("Failed to check heartbeat for instant " + instant, io); } }).map(HoodieInstant::getTimestamp).collect(Collectors.toList()); } /** * Rollback all failed writes. * * @return true if rollback was triggered. false otherwise. */ protected Boolean rollbackFailedWrites() { HoodieTable table = createTable(config, hadoopConf); List instantsToRollback = getInstantsToRollback(table.getMetaClient(), config.getFailedWritesCleanPolicy(), Option.empty()); Map> pendingRollbacks = getPendingRollbackInfos(table.getMetaClient()); instantsToRollback.forEach(entry -> pendingRollbacks.putIfAbsent(entry, Option.empty())); rollbackFailedWrites(pendingRollbacks); return !pendingRollbacks.isEmpty(); } protected void rollbackFailedWrites(Map> instantsToRollback) { rollbackFailedWrites(instantsToRollback, false); } protected void rollbackFailedWrites(Map> instantsToRollback, boolean skipLocking) { // sort in reverse order of commit times LinkedHashMap> reverseSortedRollbackInstants = instantsToRollback.entrySet() .stream().sorted((i1, i2) -> i2.getKey().compareTo(i1.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new)); boolean isMetadataTable = isMetadataTable(basePath); for (Map.Entry> entry : reverseSortedRollbackInstants.entrySet()) { if (!isMetadataTable && HoodieTimeline.compareTimestamps(entry.getKey(), HoodieTimeline.LESSER_THAN_OR_EQUALS, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS)) { // do we need to handle failed rollback of a bootstrap rollbackFailedBootstrap(); HeartbeatUtils.deleteHeartbeatFile(storage, basePath, entry.getKey(), config); break; } else { rollback(entry.getKey(), entry.getValue(), skipLocking); HeartbeatUtils.deleteHeartbeatFile(storage, basePath, entry.getKey(), config); } } } protected List getInstantsToRollback(HoodieTableMetaClient metaClient, HoodieFailedWritesCleaningPolicy cleaningPolicy, Option curInstantTime) { Stream inflightInstantsStream = getInflightTimelineExcludeCompactionAndClustering(metaClient) .getReverseOrderedInstants(); if (cleaningPolicy.isEager()) { // Metadata table uses eager cleaning policy, but we need to exclude inflight delta commits // from the async indexer (`HoodieIndexer`). // TODO(HUDI-5733): This should be cleaned up once the proper fix of rollbacks in the // metadata table is landed. if (metaClient.isMetadataTable()) { HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder() .setBasePath(HoodieTableMetadata.getDatasetBasePath(config.getBasePath())) .setConf(metaClient.getStorageConf().newInstance()) .build(); HoodieTimeline dataIndexTimeline = dataMetaClient.getActiveTimeline().filter(instant -> instant.getAction().equals(HoodieTimeline.INDEXING_ACTION)); return inflightInstantsStream.map(HoodieInstant::getTimestamp).filter(entry -> { if (curInstantTime.isPresent()) { return !entry.equals(curInstantTime.get()); } else { return !isIndexingCommit(dataIndexTimeline, entry); } }).collect(Collectors.toList()); } return inflightInstantsStream.map(HoodieInstant::getTimestamp).filter(entry -> { if (curInstantTime.isPresent()) { return !entry.equals(curInstantTime.get()); } else { return true; } }).collect(Collectors.toList()); } else if (cleaningPolicy.isLazy()) { return getInstantsToRollbackForLazyCleanPolicy(metaClient, inflightInstantsStream); } else if (cleaningPolicy.isNever()) { return Collections.emptyList(); } else { throw new IllegalArgumentException("Invalid Failed Writes Cleaning Policy " + config.getFailedWritesCleanPolicy()); } } private List getInstantsToRollbackForLazyCleanPolicy(HoodieTableMetaClient metaClient, Stream inflightInstantsStream) { // Get expired instants, must store them into list before double-checking List expiredInstants = inflightInstantsStream.filter(instant -> { try { // An instant transformed from inflight to completed have no heartbeat file and will be detected as expired instant here return heartbeatClient.isHeartbeatExpired(instant.getTimestamp()); } catch (IOException io) { throw new HoodieException("Failed to check heartbeat for instant " + instant, io); } }).map(HoodieInstant::getTimestamp).collect(Collectors.toList()); if (!expiredInstants.isEmpty()) { // Only return instants that haven't been completed by other writers metaClient.reloadActiveTimeline(); HoodieTimeline refreshedInflightTimeline = getInflightTimelineExcludeCompactionAndClustering(metaClient); return expiredInstants.stream().filter(refreshedInflightTimeline::containsInstant).collect(Collectors.toList()); } else { return Collections.emptyList(); } } /** * @param commitInstantTime Instant time of the commit * @param pendingRollbackInfo pending rollback instant and plan if rollback failed from previous attempt. * @param skipLocking if this is triggered by another parent transaction, locking can be skipped. * @throws HoodieRollbackException if rollback cannot be performed successfully * @Deprecated Rollback the inflight record changes with the given commit time. This * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String, boolean) */ @Deprecated public boolean rollback(final String commitInstantTime, Option pendingRollbackInfo, boolean skipLocking) throws HoodieRollbackException { final String rollbackInstantTime = pendingRollbackInfo.map(entry -> entry.getRollbackInstant().getTimestamp()) .orElseGet(() -> createNewInstantTime(!skipLocking)); return rollback(commitInstantTime, pendingRollbackInfo, rollbackInstantTime, skipLocking); } /** * @param commitInstantTime Instant time of the commit * @param pendingRollbackInfo pending rollback instant and plan if rollback failed from previous attempt. * @param skipLocking if this is triggered by another parent transaction, locking can be skipped. * @throws HoodieRollbackException if rollback cannot be performed successfully * @Deprecated Rollback the inflight record changes with the given commit time. This * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String, boolean) */ @Deprecated public boolean rollback(final String commitInstantTime, Option pendingRollbackInfo, String rollbackInstantTime, boolean skipLocking) throws HoodieRollbackException { LOG.info("Begin rollback of instant " + commitInstantTime); final Timer.Context timerContext = this.metrics.getRollbackCtx(); try { HoodieTable table = createTable(config, hadoopConf); Option commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream() .filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime)) .findFirst()); if (commitInstantOpt.isPresent() || pendingRollbackInfo.isPresent()) { LOG.info(String.format("Scheduling Rollback at instant time : %s " + "(exists in active timeline: %s), with rollback plan: %s", rollbackInstantTime, commitInstantOpt.isPresent(), pendingRollbackInfo.isPresent())); Option rollbackPlanOption = pendingRollbackInfo.map(entry -> Option.of(entry.getRollbackPlan())) .orElseGet(() -> table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers(), false)); if (rollbackPlanOption.isPresent()) { // There can be a case where the inflight rollback failed after the instant files // are deleted for commitInstantTime, so that commitInstantOpt is empty as it is // not present in the timeline. In such a case, the hoodie instant instance // is reconstructed to allow the rollback to be reattempted, and the deleteInstants // is set to false since they are already deleted. // Execute rollback HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent() ? table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true, skipLocking) : table.rollback(context, rollbackInstantTime, new HoodieInstant( true, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime), false, skipLocking); if (timerContext != null) { long durationInMs = metrics.getDurationInMs(timerContext.stop()); metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted()); } return true; } else { throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime); } } else { LOG.warn("Cannot find instant " + commitInstantTime + " in the timeline, for rollback"); return false; } } catch (Exception e) { throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime, e); } } /** * Main API to rollback failed bootstrap. */ public void rollbackFailedBootstrap() { LOG.info("Rolling back pending bootstrap if present"); HoodieTable table = createTable(config, hadoopConf); HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingMajorAndMinorCompaction(); Option instant = Option.fromJavaOptional( inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).findFirst()); if (instant.isPresent() && HoodieTimeline.compareTimestamps(instant.get(), HoodieTimeline.LESSER_THAN_OR_EQUALS, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS)) { LOG.info("Found pending bootstrap instants. Rolling them back"); table.rollbackBootstrap(context, createNewInstantTime()); LOG.info("Finished rolling back pending bootstrap"); } // if bootstrap failed, lets delete metadata and restart from scratch HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context); } /** * Some writers use SparkAllowUpdateStrategy and treat replacecommit plan as revocable plan. * In those cases, their ConflictResolutionStrategy implementation should run conflict resolution * even for clustering operations. * * @return boolean */ protected boolean isPreCommitRequired() { return this.config.getWriteConflictResolutionStrategy().isPreCommitRequired(); } private Option delegateToTableServiceManager(TableServiceType tableServiceType, HoodieTable table) { if (!config.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.compaction)) { return Option.empty(); } HoodieTableServiceManagerClient tableServiceManagerClient = new HoodieTableServiceManagerClient(table.getMetaClient(), config.getTableServiceManagerConfig()); switch (tableServiceType) { case COMPACT: return tableServiceManagerClient.executeCompaction(); case CLUSTER: return tableServiceManagerClient.executeClustering(); case CLEAN: return tableServiceManagerClient.executeClean(); default: LOG.info("Not supported delegate to table service manager, tableServiceType : " + tableServiceType.getAction()); return Option.empty(); } } @Override public void close() { AsyncArchiveService.forceShutdown(asyncArchiveService); asyncArchiveService = null; AsyncCleanerService.forceShutdown(asyncCleanerService); asyncCleanerService = null; // Stop timeline-server if running super.close(); } protected void handleWriteErrors(List writeStats, TableServiceType tableServiceType) { if (writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum() > 0) { String message = tableServiceType + " failed to write to files:" + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(HoodieWriteStat::getFileId).collect(Collectors.joining(",")); switch (tableServiceType) { case CLUSTER: throw new HoodieClusteringException(message); case LOG_COMPACT: throw new HoodieLogCompactException(message); case COMPACT: throw new HoodieCompactionException(message); default: throw new HoodieException(message); } } } /** * Called after each commit of a compaction or clustering table service, * to release any resources used. */ protected void releaseResources(String instantTime) { // do nothing here } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy