All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.client.BaseHoodieWriteClient Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.client;

import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieIndexCommitMetadata;
import org.apache.hudi.avro.model.HoodieIndexPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRestorePlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.callback.HoodieWriteCommitCallback;
import org.apache.hudi.callback.common.HoodieWriteCommitCallbackMessage;
import org.apache.hudi.callback.util.HoodieCommitCallbackFactory;
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
import org.apache.hudi.client.heartbeat.HeartbeatUtils;
import org.apache.hudi.client.utils.TransactionUtils;
import org.apache.hudi.common.HoodiePendingRollbackInfo;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.ActionType;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.TableServiceType;
import org.apache.hudi.common.model.WriteConcurrencyMode;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.ClusteringUtils;
import org.apache.hudi.common.util.CommitUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieRestoreException;
import org.apache.hudi.exception.HoodieRollbackException;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.internal.schema.InternalSchema;
import org.apache.hudi.internal.schema.Type;
import org.apache.hudi.internal.schema.action.InternalSchemaChangeApplier;
import org.apache.hudi.internal.schema.action.TableChange;
import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter;
import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
import org.apache.hudi.internal.schema.utils.AvroSchemaEvolutionUtils;
import org.apache.hudi.internal.schema.utils.InternalSchemaUtils;
import org.apache.hudi.internal.schema.utils.SerDeHelper;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.MetadataPartitionType;
import org.apache.hudi.metrics.HoodieMetrics;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.restore.RestoreUtils;
import org.apache.hudi.table.action.savepoint.SavepointHelpers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.table.upgrade.SupportsUpgradeDowngrade;
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
import org.apache.hudi.util.CommonClientUtils;

import com.codahale.metrics.Timer;
import org.apache.avro.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

import static org.apache.hudi.avro.AvroSchemaUtils.getAvroRecordQualifiedName;
import static org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY;
import static org.apache.hudi.common.table.timeline.InstantComparison.LESSER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeCommitMetadata;
import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath;

/**
 * Abstract Write Client providing functionality for performing commit, index updates and rollback
 * Reused for regular write operations like upsert/insert/bulk-insert as well as bootstrap
 *
 * @param  Type of data
 * @param  Type of inputs
 * @param  Type of keys
 * @param  Type of outputs
 */
public abstract class BaseHoodieWriteClient extends BaseHoodieClient implements RunsTableService {

  protected static final String LOOKUP_STR = "lookup";
  private static final long serialVersionUID = 1L;
  private static final Logger LOG = LoggerFactory.getLogger(BaseHoodieWriteClient.class);

  private final transient HoodieIndex index;
  private final SupportsUpgradeDowngrade upgradeDowngradeHelper;
  private transient WriteOperationType operationType;
  private transient HoodieWriteCommitCallback commitCallback;

  protected transient Timer.Context writeTimer = null;

  protected Option>> lastCompletedTxnAndMetadata = Option.empty();
  protected Set pendingInflightAndRequestedInstants = Collections.emptySet();

  protected BaseHoodieTableServiceClient tableServiceClient;

  /**
   * Create a write client, with new hudi index.
   * @param context HoodieEngineContext
   * @param writeConfig instance of HoodieWriteConfig
   * @param upgradeDowngradeHelper engine-specific instance of {@link SupportsUpgradeDowngrade}
   */
  @Deprecated
  public BaseHoodieWriteClient(HoodieEngineContext context,
                               HoodieWriteConfig writeConfig,
                               SupportsUpgradeDowngrade upgradeDowngradeHelper) {
    this(context, writeConfig, Option.empty(), upgradeDowngradeHelper);
  }

  /**
   * Create a write client, allows to specify all parameters.
   *
   * @param context         HoodieEngineContext
   * @param writeConfig     instance of HoodieWriteConfig
   * @param timelineService Timeline Service that runs as part of write client.
   */
  @Deprecated
  public BaseHoodieWriteClient(HoodieEngineContext context,
                               HoodieWriteConfig writeConfig,
                               Option timelineService,
                               SupportsUpgradeDowngrade upgradeDowngradeHelper) {
    super(context, writeConfig, timelineService);
    this.index = createIndex(writeConfig);
    this.upgradeDowngradeHelper = upgradeDowngradeHelper;
    this.metrics.emitIndexTypeMetrics(config.getIndexType().ordinal());
  }

  protected abstract HoodieIndex createIndex(HoodieWriteConfig writeConfig);

  public void setOperationType(WriteOperationType operationType) {
    this.operationType = operationType;
  }

  public WriteOperationType getOperationType() {
    return this.operationType;
  }

  public BaseHoodieTableServiceClient getTableServiceClient() {
    return tableServiceClient;
  }

  /**
   * Commit changes performed at the given instantTime marker.
   */
  public boolean commit(String instantTime, O writeStatuses) {
    return commit(instantTime, writeStatuses, Option.empty());
  }

  /**
   *
   * Commit changes performed at the given instantTime marker.
   */
  public boolean commit(String instantTime, O writeStatuses, Option> extraMetadata) {
    HoodieTableMetaClient metaClient = createMetaClient(false);
    String actionType = metaClient.getCommitActionType();
    return commit(instantTime, writeStatuses, extraMetadata, actionType, Collections.emptyMap());
  }

  public boolean commit(String instantTime, O writeStatuses, Option> extraMetadata,
                        String commitActionType, Map> partitionToReplacedFileIds) {
    return commit(instantTime, writeStatuses, extraMetadata, commitActionType, partitionToReplacedFileIds,
        Option.empty());
  }

  public abstract boolean commit(String instantTime, O writeStatuses, Option> extraMetadata,
                                 String commitActionType, Map> partitionToReplacedFileIds,
                                 Option> extraPreCommitFunc);

  public boolean commitStats(String instantTime, List stats, Option> extraMetadata,
                             String commitActionType) {
    return commitStats(instantTime, stats, extraMetadata, commitActionType, Collections.emptyMap(), Option.empty());
  }

  public boolean commitStats(String instantTime, List stats,
                             Option> extraMetadata,
                             String commitActionType, Map> partitionToReplaceFileIds,
                             Option> extraPreCommitFunc) {
    // Skip the empty commit if not allowed
    if (!config.allowEmptyCommit() && stats.isEmpty()) {
      return true;
    }
    LOG.info("Committing " + instantTime + " action " + commitActionType);
    // Create a Hoodie table which encapsulated the commits and files visible
    HoodieTable table = createTable(config);
    HoodieCommitMetadata metadata = CommitUtils.buildMetadata(stats, partitionToReplaceFileIds,
        extraMetadata, operationType, config.getWriteSchema(), commitActionType);
    HoodieInstant inflightInstant = table.getMetaClient().createNewInstant(State.INFLIGHT, commitActionType, instantTime);
    HeartbeatUtils.abortIfHeartbeatExpired(instantTime, table, heartbeatClient, config);
    this.txnManager.beginTransaction(Option.of(inflightInstant),
        lastCompletedTxnAndMetadata.isPresent() ? Option.of(lastCompletedTxnAndMetadata.get().getLeft()) : Option.empty());
    try {
      preCommit(metadata);
      if (extraPreCommitFunc.isPresent()) {
        extraPreCommitFunc.get().accept(table.getMetaClient(), metadata);
      }
      commit(table, commitActionType, instantTime, metadata, stats);
      postCommit(table, metadata, instantTime, extraMetadata);
      LOG.info("Committed " + instantTime);
    } catch (IOException e) {
      throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, e);
    } finally {
      this.txnManager.endTransaction(Option.of(inflightInstant));
      releaseResources(instantTime);
    }

    // trigger clean and archival.
    // Each internal call should ensure to lock if required.
    mayBeCleanAndArchive(table);
    // We don't want to fail the commit if hoodie.fail.writes.on.inline.table.service.exception is false. We catch warn if false
    try {
      // do this outside of lock since compaction, clustering can be time taking and we don't need a lock for the entire execution period
      runTableServicesInline(table, metadata, extraMetadata);
    } catch (Exception e) {
      if (config.isFailOnInlineTableServiceExceptionEnabled()) {
        throw e;
      }
      LOG.warn("Inline compaction or clustering failed with exception: " + e.getMessage()
          + ". Moving further since \"hoodie.fail.writes.on.inline.table.service.exception\" is set to false.");
    }

    emitCommitMetrics(instantTime, metadata, commitActionType);

    // callback if needed.
    if (config.writeCommitCallbackOn()) {
      if (null == commitCallback) {
        commitCallback = HoodieCommitCallbackFactory.create(config);
      }
      commitCallback.call(new HoodieWriteCommitCallbackMessage(
          instantTime, config.getTableName(), config.getBasePath(), stats, Option.of(commitActionType), extraMetadata));
    }
    return true;
  }

  protected void commit(HoodieTable table, String commitActionType, String instantTime, HoodieCommitMetadata metadata,
                        List stats) throws IOException {
    LOG.info("Committing " + instantTime + " action " + commitActionType);
    HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
    // Finalize write
    finalizeWrite(table, instantTime, stats);
    // do save internal schema to support Implicitly add columns in write process
    if (!metadata.getExtraMetadata().containsKey(SerDeHelper.LATEST_SCHEMA)
        && metadata.getExtraMetadata().containsKey(SCHEMA_KEY) && table.getConfig().getSchemaEvolutionEnable()) {
      saveInternalSchema(table, instantTime, metadata);
    }
    // update Metadata table
    writeTableMetadata(table, instantTime, metadata);
    activeTimeline.saveAsComplete(false, table.getMetaClient().createNewInstant(HoodieInstant.State.INFLIGHT, commitActionType, instantTime),
        serializeCommitMetadata(table.getMetaClient().getCommitMetadataSerDe(), metadata));
  }

  // Save internal schema
  private void saveInternalSchema(HoodieTable table, String instantTime, HoodieCommitMetadata metadata) {
    TableSchemaResolver schemaUtil = new TableSchemaResolver(table.getMetaClient());
    String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElse("");
    FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(table.getMetaClient());
    if (!historySchemaStr.isEmpty() || Boolean.parseBoolean(config.getString(HoodieCommonConfig.RECONCILE_SCHEMA.key()))) {
      InternalSchema internalSchema;
      Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema(), config.allowOperationMetadataField());
      if (historySchemaStr.isEmpty()) {
        internalSchema = SerDeHelper.fromJson(config.getInternalSchema()).orElseGet(() -> AvroInternalSchemaConverter.convert(avroSchema));
        internalSchema.setSchemaId(Long.parseLong(instantTime));
      } else {
        internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime),
            SerDeHelper.parseSchemas(historySchemaStr));
      }
      InternalSchema evolvedSchema = AvroSchemaEvolutionUtils.reconcileSchema(avroSchema, internalSchema, config.getBooleanOrDefault(HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS));
      if (evolvedSchema.equals(internalSchema)) {
        metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(evolvedSchema));
        //TODO save history schema by metaTable
        schemasManager.persistHistorySchemaStr(instantTime, historySchemaStr.isEmpty() ? SerDeHelper.inheritSchemas(evolvedSchema, "") : historySchemaStr);
      } else {
        evolvedSchema.setSchemaId(Long.parseLong(instantTime));
        String newSchemaStr = SerDeHelper.toJson(evolvedSchema);
        metadata.addMetadata(SerDeHelper.LATEST_SCHEMA, newSchemaStr);
        schemasManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(evolvedSchema, historySchemaStr));
      }
      // update SCHEMA_KEY
      metadata.addMetadata(SCHEMA_KEY, AvroInternalSchemaConverter.convert(evolvedSchema, avroSchema.getFullName()).toString());
    }
  }

  protected HoodieTable createTableAndValidate(HoodieWriteConfig writeConfig,
                                               BiFunction createTableFn) {
    HoodieTable table = createTableFn.apply(writeConfig, context);
    CommonClientUtils.validateTableVersion(table.getMetaClient().getTableConfig(), writeConfig);
    return table;
  }

  @FunctionalInterface
  protected interface TriFunction {
    R apply(T t, U u, V v);
  }

  protected HoodieTable createTableAndValidate(HoodieWriteConfig writeConfig,
                                               HoodieTableMetaClient metaClient,
                                               TriFunction createTableFn) {
    HoodieTable table = createTableFn.apply(writeConfig, context, metaClient);
    CommonClientUtils.validateTableVersion(table.getMetaClient().getTableConfig(), writeConfig);
    return table;
  }

  protected abstract HoodieTable createTable(HoodieWriteConfig config);

  protected abstract HoodieTable createTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient);

  void emitCommitMetrics(String instantTime, HoodieCommitMetadata metadata, String actionType) {
    if (writeTimer != null) {
      long durationInMs = metrics.getDurationInMs(writeTimer.stop());
      // instantTime could be a non-standard value, so use `parseDateFromInstantTimeSafely`
      // e.g. INIT_INSTANT_TS, METADATA_BOOTSTRAP_INSTANT_TS and FULL_BOOTSTRAP_INSTANT_TS in HoodieTimeline
      TimelineUtils.parseDateFromInstantTimeSafely(instantTime).ifPresent(parsedInstant ->
          metrics.updateCommitMetrics(parsedInstant.getTime(), durationInMs, metadata, actionType)
      );
      writeTimer = null;
    }
  }

  /**
   * Any pre-commit actions like conflict resolution goes here.
   * @param metadata commit metadata for which pre commit is being invoked.
   */
  protected void preCommit(HoodieCommitMetadata metadata) {
    // Create a Hoodie table after startTxn which encapsulated the commits and files visible.
    // Important to create this after the lock to ensure the latest commits show up in the timeline without need for reload
    HoodieTable table = createTable(config);
    resolveWriteConflict(table, metadata, this.pendingInflightAndRequestedInstants);
  }

  /**
   * Filter out HoodieRecords that already exists in the output folder. This is useful in deduplication.
   *
   * @param hoodieRecords Input Hoodie records.
   * @return A subset of hoodieRecords, with existing records filtered out.
   */
  public abstract I filterExists(I hoodieRecords);

  /**
   * Main API to run bootstrap to hudi.
   */
  public void bootstrap(Option> extraMetadata) {
    if (config.getWriteConcurrencyMode().supportsMultiWriter()) {
      throw new HoodieException("Cannot bootstrap the table in multi-writer mode");
    }
    HoodieTable table = initTable(WriteOperationType.UPSERT, Option.ofNullable(HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS));
    tableServiceClient.rollbackFailedBootstrap();
    table.bootstrap(context, extraMetadata);
  }

  /**
   * Upsert a batch of new records into Hoodie table at the supplied instantTime.
   *
   * @param records hoodieRecords to upsert
   * @param instantTime Instant time of the commit
   * @return WriteStatus to inspect errors and counts
   */
  public abstract O upsert(I records, final String instantTime);

  /**
   * Upserts the given prepared records into the Hoodie table, at the supplied instantTime.
   * 

* This implementation requires that the input records are already tagged, and de-duped if needed. * * @param preppedRecords Prepared HoodieRecords to upsert * @param instantTime Instant time of the commit * @return Collection of WriteStatus to inspect errors and counts */ public abstract O upsertPreppedRecords(I preppedRecords, final String instantTime); /** * Inserts the given HoodieRecords, into the table. This API is intended to be used for normal writes. *

* This implementation skips the index check and is able to leverage benefits such as small file handling/blocking * alignment, as with upsert(), by profiling the workload * * @param records HoodieRecords to insert * @param instantTime Instant time of the commit * @return Collection of WriteStatus to inspect errors and counts */ public abstract O insert(I records, final String instantTime); /** * Inserts the given prepared records into the Hoodie table, at the supplied instantTime. *

* This implementation skips the index check, skips de-duping and is able to leverage benefits such as small file * handling/blocking alignment, as with insert(), by profiling the workload. The prepared HoodieRecords should be * de-duped if needed. * * @param preppedRecords HoodieRecords to insert * @param instantTime Instant time of the commit * @return Collection of WriteStatus to inspect errors and counts */ public abstract O insertPreppedRecords(I preppedRecords, final String instantTime); /** * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie * table for the very first time (e.g: converting an existing table to Hoodie). *

* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)} * * @param records HoodieRecords to insert * @param instantTime Instant time of the commit * @return Collection of WriteStatus to inspect errors and counts */ public abstract O bulkInsert(I records, final String instantTime); /** * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie * table for the very first time (e.g: converting an existing table to Hoodie). *

* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)}. Optionally * it allows users to specify their own partitioner. If specified then it will be used for repartitioning records. See * {@link BulkInsertPartitioner}. * * @param records HoodieRecords to insert * @param instantTime Instant time of the commit * @param userDefinedBulkInsertPartitioner If specified then it will be used to partition input records before they are inserted * into hoodie. * @return Collection of WriteStatus to inspect errors and counts */ public abstract O bulkInsert(I records, final String instantTime, Option userDefinedBulkInsertPartitioner); /** * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie * table for the very first time (e.g: converting an existing table to Hoodie). The input records should contain no * duplicates if needed. *

* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)}. Optionally * it allows users to specify their own partitioner. If specified then it will be used for repartitioning records. See * {@link BulkInsertPartitioner}. * * @param preppedRecords HoodieRecords to insert * @param instantTime Instant time of the commit * @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted * into hoodie. * @return Collection of WriteStatus to inspect errors and counts */ public abstract O bulkInsertPreppedRecords(I preppedRecords, final String instantTime, Option bulkInsertPartitioner); /** * Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied instantTime {@link HoodieKey}s will be * de-duped and non-existent keys will be removed before deleting. * * @param keys {@link List} of {@link HoodieKey}s to be deleted * @param instantTime Commit time handle * @return Collection of WriteStatus to inspect errors and counts */ public abstract O delete(K keys, final String instantTime); /** * Delete records from Hoodie table based on {@link HoodieKey} and {@link org.apache.hudi.common.model.HoodieRecordLocation} specified in * preppedRecords. * * @param preppedRecords Empty records with key and locator set. * @param instantTime Commit time handle. * @return Collection of WriteStatus to inspect errors and counts. */ public abstract O deletePrepped(I preppedRecords, final String instantTime); /** * Common method containing steps to be performed before write (upsert/insert/... * @param instantTime * @param writeOperationType * @param metaClient */ public void preWrite(String instantTime, WriteOperationType writeOperationType, HoodieTableMetaClient metaClient) { setOperationType(writeOperationType); this.lastCompletedTxnAndMetadata = txnManager.isLockRequired() ? TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient) : Option.empty(); this.pendingInflightAndRequestedInstants = TransactionUtils.getInflightAndRequestedInstants(metaClient); this.pendingInflightAndRequestedInstants.remove(instantTime); tableServiceClient.setPendingInflightAndRequestedInstants(this.pendingInflightAndRequestedInstants); tableServiceClient.startAsyncCleanerService(this); tableServiceClient.startAsyncArchiveService(this); } /** * Common method containing steps to be performed after write (upsert/insert/..) operations including auto-commit. * @param result Commit Action Result * @param instantTime Instant Time * @param hoodieTable Hoodie Table * @return Write Status */ public O postWrite(HoodieWriteMetadata result, String instantTime, HoodieTable hoodieTable) { if (result.isCommitted()) { // Perform post commit operations. if (result.getFinalizeDuration().isPresent()) { metrics.updateFinalizeWriteMetrics(result.getFinalizeDuration().get().toMillis(), result.getWriteStats().get().size()); } postCommit(hoodieTable, result.getCommitMetadata().get(), instantTime, Option.empty()); mayBeCleanAndArchive(hoodieTable); emitCommitMetrics(instantTime, result.getCommitMetadata().get(), hoodieTable.getMetaClient().getCommitActionType()); } return result.getWriteStatuses(); } /** * Post Commit Hook. Derived classes use this method to perform post-commit processing * * @param table table to commit on * @param metadata Commit Metadata corresponding to committed instant * @param instantTime Instant Time * @param extraMetadata Additional Metadata passed by user */ protected void postCommit(HoodieTable table, HoodieCommitMetadata metadata, String instantTime, Option> extraMetadata) { try { context.setJobStatus(this.getClass().getSimpleName(),"Cleaning up marker directories for commit " + instantTime + " in table " + config.getTableName()); // Delete the marker directory for the instant. WriteMarkersFactory.get(config.getMarkersType(), table, instantTime) .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism()); } finally { this.heartbeatClient.stop(instantTime); } } /** * Triggers cleaning and archival for the table of interest. This method is called outside of locks. So, internal callers should ensure they acquire lock whereever applicable. * @param table instance of {@link HoodieTable} of interest. */ protected void mayBeCleanAndArchive(HoodieTable table) { autoCleanOnCommit(); autoArchiveOnCommit(table); } protected void runTableServicesInline(HoodieTable table, HoodieCommitMetadata metadata, Option> extraMetadata) { tableServiceClient.runTableServicesInline(table, metadata, extraMetadata); } protected void autoCleanOnCommit() { if (!config.isAutoClean()) { return; } if (config.isAsyncClean()) { LOG.info("Async cleaner has been spawned. Waiting for it to finish"); tableServiceClient.asyncClean(); LOG.info("Async cleaner has finished"); } else { LOG.info("Start to clean synchronously."); // Do not reuse instantTime for clean as metadata table requires all changes to have unique instant timestamps. clean(); } } protected void autoArchiveOnCommit(HoodieTable table) { if (!config.isAutoArchive()) { return; } if (config.isAsyncArchive()) { LOG.info("Async archiver has been spawned. Waiting for it to finish"); tableServiceClient.asyncArchive(); LOG.info("Async archiver has finished"); } else { LOG.info("Start to archive synchronously."); archive(table); } } /** * Run any pending compactions. */ public void runAnyPendingCompactions() { tableServiceClient.runAnyPendingCompactions(createTable(config)); } /** * Run any pending log compactions. */ public void runAnyPendingLogCompactions() { tableServiceClient.runAnyPendingLogCompactions(createTable(config)); } /** * Create a savepoint based on the latest commit action on the timeline. * * @param user User creating the savepoint * @param comment Comment for the savepoint */ public void savepoint(String user, String comment) { HoodieTable table = createTable(config); if (table.getCompletedCommitsTimeline().empty()) { throw new HoodieSavepointException("Could not savepoint. Commit timeline is empty"); } String latestCommit = table.getCompletedCommitsTimeline().lastInstant().get().requestedTime(); LOG.info("Savepointing latest commit " + latestCommit); savepoint(latestCommit, user, comment); } /** * Savepoint a specific commit instant time. Latest version of data files as of the passed in instantTime * will be referenced in the savepoint and will never be cleaned. The savepointed commit will never be rolledback or archived. *

* This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be manually created and * deleted. *

* Savepoint should be on a commit that could not have been cleaned. * * @param instantTime Commit that should be savepointed * @param user User creating the savepoint * @param comment Comment for the savepoint */ public void savepoint(String instantTime, String user, String comment) { HoodieTable table = createTable(config); table.savepoint(context, instantTime, user, comment); } /** * Delete a savepoint based on the latest commit action on the savepoint timeline. */ public void deleteSavepoint() { HoodieTable table = createTable(config); HoodieTimeline savePointTimeline = table.getActiveTimeline().getSavePointTimeline(); if (savePointTimeline.empty()) { throw new HoodieSavepointException("Could not delete savepoint. Savepoint timeline is empty"); } String savepointTime = savePointTimeline.lastInstant().get().requestedTime(); LOG.info("Deleting latest savepoint time " + savepointTime); deleteSavepoint(savepointTime); } /** * Delete a savepoint that was created. Once the savepoint is deleted, the commit can be rolledback and cleaner may * clean up data files. * * @param savepointTime Savepoint time to delete */ public void deleteSavepoint(String savepointTime) { HoodieTable table = createTable(config); SavepointHelpers.deleteSavepoint(table, savepointTime); } /** * Restore the data to a savepoint based on the latest commit action on the savepoint timeline. */ public void restoreToSavepoint() { HoodieTable table = createTable(config); HoodieTimeline savePointTimeline = table.getActiveTimeline().getSavePointTimeline(); if (savePointTimeline.empty()) { throw new HoodieSavepointException("Could not restore to savepoint. Savepoint timeline is empty"); } String savepointTime = savePointTimeline.lastInstant().get().requestedTime(); LOG.info("Restoring to latest savepoint time " + savepointTime); restoreToSavepoint(savepointTime); } /** * Restore the data to the savepoint. * * WARNING: This rolls back recent commits and deleted data files and also pending compactions after savepoint time. * Queries accessing the files will mostly fail. This is expected to be a manual operation and no concurrent write or * compaction is expected to be running * * @param savepointTime Savepoint time to rollback to */ public void restoreToSavepoint(String savepointTime) { boolean initializeMetadataTableIfNecessary = config.isMetadataTableEnabled(); if (initializeMetadataTableIfNecessary) { try { // Delete metadata table directly when users trigger savepoint rollback if mdt existed and if the savePointTime is beforeTimelineStarts // or before the oldest compaction on MDT. // We cannot restore to before the oldest compaction on MDT as we don't have the basefiles before that time. HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder() .setConf(storageConf.newInstance()) .setBasePath(getMetadataTableBasePath(config.getBasePath())).build(); Option oldestMdtCompaction = mdtMetaClient.getCommitTimeline().filterCompletedInstants().firstInstant(); boolean deleteMDT = false; if (oldestMdtCompaction.isPresent()) { if (LESSER_THAN_OR_EQUALS.test(savepointTime, oldestMdtCompaction.get().requestedTime())) { LOG.warn(String.format("Deleting MDT during restore to %s as the savepoint is older than oldest compaction %s on MDT", savepointTime, oldestMdtCompaction.get().requestedTime())); deleteMDT = true; } } // The instant required to sync rollback to MDT has been archived and the mdt syncing will be failed // So that we need to delete the whole MDT here. if (!deleteMDT) { HoodieInstant syncedInstant = mdtMetaClient.createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, savepointTime); if (mdtMetaClient.getCommitsTimeline().isBeforeTimelineStarts(syncedInstant.requestedTime())) { LOG.warn(String.format("Deleting MDT during restore to %s as the savepoint is older than the MDT timeline %s", savepointTime, mdtMetaClient.getCommitsTimeline().firstInstant().get().requestedTime())); deleteMDT = true; } } if (deleteMDT) { HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context); // rollbackToSavepoint action will try to bootstrap MDT at first but sync to MDT will fail at the current scenario. // so that we need to disable metadata initialized here. initializeMetadataTableIfNecessary = false; } } catch (Exception e) { // Metadata directory does not exist } } HoodieTable table = initTable(WriteOperationType.UNKNOWN, Option.empty(), initializeMetadataTableIfNecessary); SavepointHelpers.validateSavepointPresence(table, savepointTime); ValidationUtils.checkArgument(!config.shouldArchiveBeyondSavepoint(), "Restore is not supported when " + HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT.key() + " is enabled"); restoreToInstant(savepointTime, initializeMetadataTableIfNecessary); SavepointHelpers.validateSavepointRestore(table, savepointTime); } @Deprecated public boolean rollback(final String commitInstantTime) throws HoodieRollbackException { HoodieTable table = initTable(WriteOperationType.UNKNOWN, Option.empty()); Option pendingRollbackInfo = tableServiceClient.getPendingRollbackInfo(table.getMetaClient(), commitInstantTime); return tableServiceClient.rollback(commitInstantTime, pendingRollbackInfo, false, false); } @Deprecated public boolean rollback(final String commitInstantTime, String rollbackInstantTimestamp) throws HoodieRollbackException { HoodieTable table = initTable(WriteOperationType.UNKNOWN, Option.empty()); Option pendingRollbackInfo = tableServiceClient.getPendingRollbackInfo(table.getMetaClient(), commitInstantTime); return tableServiceClient.rollback(commitInstantTime, pendingRollbackInfo, rollbackInstantTimestamp, false, false); } /** * NOTE : This action requires all writers (ingest and compact) to a table to be stopped before proceeding. Revert * the (inflight/committed) record changes for all commits after the provided instant time. * * @param savepointToRestoreTimestamp savepoint instant time to which restoration is requested */ public HoodieRestoreMetadata restoreToInstant(final String savepointToRestoreTimestamp, boolean initialMetadataTableIfNecessary) throws HoodieRestoreException { LOG.info("Begin restore to instant " + savepointToRestoreTimestamp); Timer.Context timerContext = metrics.getRollbackCtx(); try { HoodieTable table = initTable(WriteOperationType.UNKNOWN, Option.empty(), initialMetadataTableIfNecessary); Pair> timestampAndRestorePlan = scheduleAndGetRestorePlan(savepointToRestoreTimestamp, table); final String restoreInstantTimestamp = timestampAndRestorePlan.getLeft(); Option restorePlanOption = timestampAndRestorePlan.getRight(); if (restorePlanOption.isPresent()) { HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTimestamp, savepointToRestoreTimestamp); if (timerContext != null) { final long durationInMs = metrics.getDurationInMs(timerContext.stop()); final long totalFilesDeleted = restoreMetadata.getHoodieRestoreMetadata().values().stream() .flatMap(Collection::stream) .mapToLong(HoodieRollbackMetadata::getTotalFilesDeleted) .sum(); metrics.updateRollbackMetrics(durationInMs, totalFilesDeleted); } return restoreMetadata; } else { throw new HoodieRestoreException("Failed to restore " + config.getBasePath() + " to commit " + savepointToRestoreTimestamp); } } catch (Exception e) { throw new HoodieRestoreException("Failed to restore to " + savepointToRestoreTimestamp, e); } } /** * Check if there is a failed restore with the same savepointToRestoreTimestamp. Reusing the commit instead of * creating a new one will prevent causing some issues with the metadata table. * */ private Pair> scheduleAndGetRestorePlan(final String savepointToRestoreTimestamp, HoodieTable table) throws IOException { Option failedRestore = table.getRestoreTimeline().filterInflightsAndRequested().lastInstant(); if (failedRestore.isPresent() && savepointToRestoreTimestamp.equals(RestoreUtils.getSavepointToRestoreTimestamp(table, failedRestore.get()))) { return Pair.of(failedRestore.get().requestedTime(), Option.of(RestoreUtils.getRestorePlan(table.getMetaClient(), failedRestore.get()))); } final String restoreInstantTimestamp = createNewInstantTime(); return Pair.of(restoreInstantTimestamp, table.scheduleRestore(context, restoreInstantTimestamp, savepointToRestoreTimestamp)); } /** * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be * cleaned) */ public HoodieCleanMetadata clean(String cleanInstantTime) throws HoodieIOException { return clean(cleanInstantTime, true, false); } /** * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be * cleaned) * @param cleanInstantTime instant time for clean. * @param skipLocking if this is triggered by another parent transaction, locking can be skipped. * @return instance of {@link HoodieCleanMetadata}. */ @Deprecated public HoodieCleanMetadata clean(String cleanInstantTime, boolean skipLocking) throws HoodieIOException { return clean(cleanInstantTime, true, false); } /** * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be * cleaned). This API provides the flexibility to schedule clean instant asynchronously via * {@link BaseHoodieWriteClient#scheduleTableService(String, Option, TableServiceType)} and disable inline scheduling * of clean. * @param cleanInstantTime instant time for clean. * @param scheduleInline true if needs to be scheduled inline. false otherwise. * @param skipLocking if this is triggered by another parent transaction, locking can be skipped. */ public HoodieCleanMetadata clean(String cleanInstantTime, boolean scheduleInline, boolean skipLocking) throws HoodieIOException { return tableServiceClient.clean(cleanInstantTime, scheduleInline); } public HoodieCleanMetadata clean() { return clean(createNewInstantTime()); } /** * Triggers clean for the table. This refers to Clean up any stale/old files/data lying around (either on file storage or index storage) based on the * * configurations and CleaningPolicy used. * @param skipLocking if this is triggered by another parent transaction, locking can be skipped. * @return instance of {@link HoodieCleanMetadata}. */ @Deprecated public HoodieCleanMetadata clean(boolean skipLocking) { return clean(createNewInstantTime()); } /** * Trigger archival for the table. This ensures that the number of commits do not explode * and keep increasing unbounded over time. * @param table table to commit on. */ protected void archive(HoodieTable table) { tableServiceClient.archive(table); } /** * Trigger archival for the table. This ensures that the number of commits do not explode * and keep increasing unbounded over time. */ public void archive() { // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = createTable(config); archive(table); } /** * Provides a new commit time for a write operation (insert/update/delete). */ public String startCommit() { HoodieTableMetaClient metaClient = createMetaClient(true); return startCommit(metaClient.getCommitActionType(), metaClient); } /** * Provides a new commit time for a write operation (insert/update/delete/insert_overwrite/insert_overwrite_table) with specified action. */ public String startCommit(String actionType, HoodieTableMetaClient metaClient) { if (needsUpgradeOrDowngrade(metaClient)) { executeUsingTxnManager(Option.empty(), () -> tryUpgrade(metaClient, Option.empty())); } CleanerUtils.rollbackFailedWrites(config.getFailedWritesCleanPolicy(), HoodieTimeline.COMMIT_ACTION, () -> tableServiceClient.rollbackFailedWrites()); String instantTime = createNewInstantTime(); startCommit(instantTime, actionType, metaClient); return instantTime; } /** * Provides a new commit time for a write operation (insert/update/delete/insert_overwrite/insert_overwrite_table) without specified action. * @param instantTime Instant time to be generated */ public void startCommitWithTime(String instantTime) { HoodieTableMetaClient metaClient = createMetaClient(true); startCommitWithTime(instantTime, metaClient.getCommitActionType(), metaClient); } /** * Completes a new commit time for a write operation (insert/update/delete/insert_overwrite/insert_overwrite_table) with specified action. */ public void startCommitWithTime(String instantTime, String actionType) { HoodieTableMetaClient metaClient = createMetaClient(true); startCommitWithTime(instantTime, actionType, metaClient); } /** * Starts a new commit time for a write operation (insert/update/delete) with specified action. */ private void startCommitWithTime(String instantTime, String actionType, HoodieTableMetaClient metaClient) { if (needsUpgradeOrDowngrade(metaClient)) { // unclear what instant to use, since upgrade does have a given instant. executeUsingTxnManager(Option.empty(), () -> tryUpgrade(metaClient, Option.empty())); } CleanerUtils.rollbackFailedWrites(config.getFailedWritesCleanPolicy(), HoodieTimeline.COMMIT_ACTION, () -> tableServiceClient.rollbackFailedWrites()); startCommit(instantTime, actionType, metaClient); } private void startCommit(String instantTime, String actionType, HoodieTableMetaClient metaClient) { LOG.info("Generate a new instant time: {} action: {}", instantTime, actionType); // check there are no inflight restore before starting a new commit. HoodieTimeline inflightRestoreTimeline = metaClient.getActiveTimeline().getRestoreTimeline().filterInflightsAndRequested(); ValidationUtils.checkArgument(inflightRestoreTimeline.countInstants() == 0, "Found pending restore in active timeline. Please complete the restore fully before proceeding. As of now, " + "table could be in an inconsistent state. Pending restores: " + Arrays.toString(inflightRestoreTimeline.getInstantsAsStream() .map(HoodieInstant::requestedTime).collect(Collectors.toList()).toArray())); if (config.getFailedWritesCleanPolicy().isLazy()) { this.heartbeatClient.start(instantTime); } if (ClusteringUtils.isClusteringOrReplaceCommitAction(actionType)) { metaClient.getActiveTimeline().createRequestedCommitWithReplaceMetadata(instantTime, actionType); } else { metaClient.getActiveTimeline().createNewInstant(metaClient.createNewInstant(HoodieInstant.State.REQUESTED, actionType, instantTime)); } } /** * Schedules a new compaction instant. * @param extraMetadata Extra Metadata to be stored */ public Option scheduleCompaction(Option> extraMetadata) throws HoodieIOException { String instantTime = createNewInstantTime(); return scheduleCompactionAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty(); } /** * Schedules a new compaction instant with passed-in instant time. * @param instantTime Compaction Instant Time * @param extraMetadata Extra Metadata to be stored */ public boolean scheduleCompactionAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException { return scheduleTableService(instantTime, extraMetadata, TableServiceType.COMPACT).isPresent(); } /** * Schedules INDEX action. * * @param partitionTypes - list of {@link MetadataPartitionType} which needs to be indexed * @return instant time for the requested INDEX action */ public Option scheduleIndexing(List partitionTypes, List partitionPaths) { String instantTime = createNewInstantTime(); Option indexPlan = createTable(config) .scheduleIndexing(context, instantTime, partitionTypes, partitionPaths); return indexPlan.isPresent() ? Option.of(instantTime) : Option.empty(); } /** * Runs INDEX action to build out the metadata partitions as planned for the given instant time. * * @param indexInstantTime - instant time for the requested INDEX action * @return {@link Option} after successful indexing. */ public Option index(String indexInstantTime) { return createTable(config).index(context, indexInstantTime); } /** * Drops the index and removes the metadata partitions. * * @param metadataPartitions - list of metadata partitions which need to be dropped */ public void dropIndex(List metadataPartitions) { HoodieTable table = createTable(config); String dropInstant = createNewInstantTime(); HoodieInstant ownerInstant = table.getMetaClient().createNewInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.INDEXING_ACTION, dropInstant); this.txnManager.beginTransaction(Option.of(ownerInstant), Option.empty()); try { context.setJobStatus(this.getClass().getSimpleName(), "Dropping partitions from metadata table: " + config.getTableName()); HoodieTableMetaClient metaClient = table.getMetaClient(); // For secondary index and expression index with wrong parameters, index definition for the MDT partition is // removed so that such indices are not recreated while initializing the writer. metadataPartitions.forEach(partition -> { if (MetadataPartitionType.isExpressionOrSecondaryIndex(partition)) { metaClient.deleteIndexDefinition(partition); } }); Option metadataWriterOpt = table.getMetadataWriter(dropInstant); // first update table config. Metadata writer initializes the inflight metadata // partitions so we need to first remove the metadata before creating the writer // Also the partitions need to be removed after creating the metadata writer since the writer // recreates enabled partitions metadataPartitions.forEach(partition -> { metaClient.getTableConfig().setMetadataPartitionState(metaClient, partition, false); }); if (metadataWriterOpt.isPresent()) { try (HoodieTableMetadataWriter metadataWriter = metadataWriterOpt.get()) { metadataWriter.dropMetadataPartitions(metadataPartitions); } catch (Exception e) { if (e instanceof HoodieException) { throw (HoodieException) e; } else { throw new HoodieException("Failed to drop partitions from metadata", e); } } } } finally { this.txnManager.endTransaction(Option.of(ownerInstant)); } } /** * Performs Clustering for the workload stored in instant-time. * * @param clusteringInstantTime Clustering Instant Time * @return Collection of WriteStatus to inspect errors and counts */ public HoodieWriteMetadata cluster(String clusteringInstantTime) { if (shouldDelegateToTableServiceManager(config, ActionType.clustering)) { throw new UnsupportedOperationException("Clustering should be delegated to table service manager instead of direct run."); } return cluster(clusteringInstantTime, true); } /** * Performs Compaction for the workload stored in instant-time. * * @param compactionInstantTime Compaction Instant Time * @return Collection of WriteStatus to inspect errors and counts */ public HoodieWriteMetadata compact(String compactionInstantTime) { if (shouldDelegateToTableServiceManager(config, ActionType.compaction)) { throw new UnsupportedOperationException("Compaction should be delegated to table service manager instead of direct run."); } return compact(compactionInstantTime, config.shouldAutoCommit()); } /** * Commit a compaction operation. Allow passing additional meta-data to be stored in commit instant file. * * @param compactionInstantTime Compaction Instant Time * @param metadata All the metadata that gets stored along with a commit * @param extraMetadata Extra Metadata to be stored */ public void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata, Option> extraMetadata) { tableServiceClient.commitCompaction(compactionInstantTime, metadata, extraMetadata); } /** * Commit Compaction and track metrics. */ protected void completeCompaction(HoodieCommitMetadata metadata, HoodieTable table, String compactionCommitTime) { tableServiceClient.completeCompaction(metadata, table, compactionCommitTime); } /** * Schedules a new log compaction instant. * @param extraMetadata Extra Metadata to be stored */ public Option scheduleLogCompaction(Option> extraMetadata) throws HoodieIOException { String instantTime = createNewInstantTime(); return scheduleLogCompactionAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty(); } /** * Schedules a new log compaction instant with passed-in instant time. * @param instantTime Log Compaction Instant Time * @param extraMetadata Extra Metadata to be stored */ public boolean scheduleLogCompactionAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException { return scheduleTableService(instantTime, extraMetadata, TableServiceType.LOG_COMPACT).isPresent(); } /** * Performs Log Compaction for the workload stored in instant-time. * * @param logCompactionInstantTime Log Compaction Instant Time * @return Collection of WriteStatus to inspect errors and counts */ public HoodieWriteMetadata logCompact(String logCompactionInstantTime) { return logCompact(logCompactionInstantTime, config.shouldAutoCommit()); } /** * Commit a log compaction operation. Allow passing additional meta-data to be stored in commit instant file. * * @param logCompactionInstantTime Log Compaction Instant Time * @param metadata All the metadata that gets stored along with a commit * @param extraMetadata Extra Metadata to be stored */ public void commitLogCompaction(String logCompactionInstantTime, HoodieCommitMetadata metadata, Option> extraMetadata) { HoodieTable table = createTable(config); extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata)); completeLogCompaction(metadata, table, logCompactionInstantTime); } /** * Commit Log Compaction and track metrics. */ protected void completeLogCompaction(HoodieCommitMetadata metadata, HoodieTable table, String logCompactionCommitTime) { tableServiceClient.completeLogCompaction(metadata, table, logCompactionCommitTime); } /** * Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time. * * @param compactionInstantTime Compaction Instant Time * @return Collection of Write Status */ protected HoodieWriteMetadata compact(String compactionInstantTime, boolean shouldComplete) { HoodieTable table = createTable(config); preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient()); return tableServiceClient.compact(compactionInstantTime, shouldComplete); } /** * Schedules compaction inline. * @param extraMetadata extra metadata to be used. * @return compaction instant if scheduled. */ protected Option inlineScheduleCompaction(Option> extraMetadata) { return scheduleCompaction(extraMetadata); } /** * Ensures compaction instant is in expected state and performs Log Compaction for the workload stored in instant-time.s * * @param logCompactionInstantTime Compaction Instant Time * @return Collection of Write Status */ protected HoodieWriteMetadata logCompact(String logCompactionInstantTime, boolean shouldComplete) { HoodieTable table = createTable(config); preWrite(logCompactionInstantTime, WriteOperationType.LOG_COMPACT, table.getMetaClient()); return tableServiceClient.logCompact(logCompactionInstantTime, shouldComplete); } /** * Schedules a new clustering instant. * @param extraMetadata Extra Metadata to be stored */ public Option scheduleClustering(Option> extraMetadata) throws HoodieIOException { String instantTime = createNewInstantTime(); return scheduleClusteringAtInstant(instantTime, extraMetadata) ? Option.of(instantTime) : Option.empty(); } /** * Schedules a new clustering instant with passed-in instant time. * @param instantTime clustering Instant Time * @param extraMetadata Extra Metadata to be stored */ public boolean scheduleClusteringAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException { return scheduleTableService(instantTime, extraMetadata, TableServiceType.CLUSTER).isPresent(); } /** * Schedules a new cleaning instant with passed-in instant time. * @param instantTime cleaning Instant Time * @param extraMetadata Extra Metadata to be stored */ protected boolean scheduleCleaningAtInstant(String instantTime, Option> extraMetadata) throws HoodieIOException { return scheduleTableService(instantTime, extraMetadata, TableServiceType.CLEAN).isPresent(); } /** * Ensures clustering instant is in expected state and performs clustering for the plan stored in metadata. * @param clusteringInstant Clustering Instant Time * @return Collection of Write Status */ public HoodieWriteMetadata cluster(String clusteringInstant, boolean shouldComplete) { HoodieTable table = createTable(config); preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient()); return tableServiceClient.cluster(clusteringInstant, shouldComplete); } public boolean purgePendingClustering(String clusteringInstant) { HoodieTable table = createTable(config); preWrite(clusteringInstant, WriteOperationType.CLUSTER, table.getMetaClient()); return tableServiceClient.purgePendingClustering(clusteringInstant); } /** * Schedule table services such as clustering, compaction & cleaning. * * @param extraMetadata Metadata to pass onto the scheduled service instant * @param tableServiceType Type of table service to schedule * * @return The given instant time option or empty if no table service plan is scheduled */ public Option scheduleTableService(Option> extraMetadata, TableServiceType tableServiceType) { String instantTime = createNewInstantTime(); return scheduleTableService(instantTime, extraMetadata, tableServiceType); } /** * Schedule table services such as clustering, compaction & cleaning. * * @param extraMetadata Metadata to pass onto the scheduled service instant * @param tableServiceType Type of table service to schedule * * @return The given instant time option or empty if no table service plan is scheduled */ public Option scheduleTableService(String instantTime, Option> extraMetadata, TableServiceType tableServiceType) { return tableServiceClient.scheduleTableService(instantTime, extraMetadata, tableServiceType); } public HoodieMetrics getMetrics() { return metrics; } public HoodieIndex getIndex() { return index; } /** * Performs necessary bootstrapping operations (for ex, validating whether Metadata Table has to be bootstrapped). * *

NOTE: THIS OPERATION IS EXECUTED UNDER LOCK, THEREFORE SHOULD AVOID ANY OPERATIONS * NOT REQUIRING EXTERNAL SYNCHRONIZATION * * @param metaClient instance of {@link HoodieTableMetaClient} * @param instantTime current inflight instant time */ protected void doInitTable(WriteOperationType operationType, HoodieTableMetaClient metaClient, Option instantTime) { Option ownerInstant = Option.empty(); if (instantTime.isPresent()) { ownerInstant = Option.of(metaClient.createNewInstant(HoodieInstant.State.INFLIGHT, CommitUtils.getCommitActionType(operationType, metaClient.getTableType()), instantTime.get())); } executeUsingTxnManager(ownerInstant, () -> { tryUpgrade(metaClient, instantTime); // TODO: this also does MT table management.. initMetadataTable(instantTime, metaClient); }); } private void executeUsingTxnManager(Option ownerInstant, Runnable r) { this.txnManager.beginTransaction(ownerInstant, Option.empty()); try { r.run(); } finally { this.txnManager.endTransaction(ownerInstant); } } /** * Bootstrap the metadata table. * * @param instantTime current inflight instant time */ protected void initMetadataTable(Option instantTime, HoodieTableMetaClient metaClient) { // by default do nothing. } // TODO: this method will be removed with restore/rollback changes in MDT protected final HoodieTable initTable(WriteOperationType operationType, Option instantTime, boolean initMetadataTable) { return initTable(operationType, instantTime); } /** * Instantiates and initializes instance of {@link HoodieTable}, performing crucial bootstrapping * operations such as: * * NOTE: This method is engine-agnostic and SHOULD NOT be overloaded, please check on * {@link #doInitTable(WriteOperationType, HoodieTableMetaClient, Option)} instead * *

    *
  • Checking whether upgrade/downgrade is required
  • *
  • Bootstrapping Metadata Table (if required)
  • *
  • Initializing metrics contexts
  • *
*/ public final HoodieTable initTable(WriteOperationType operationType, Option instantTime) { HoodieTableMetaClient metaClient = createMetaClient(true); // Setup write schemas for deletes if (WriteOperationType.isDelete(operationType)) { setWriteSchemaForDeletes(metaClient); } doInitTable(operationType, metaClient, instantTime); HoodieTable table = createTable(config, metaClient); // Validate table properties validateAgainstTableProperties(table.getMetaClient().getTableConfig(), config); switch (operationType) { case INSERT: case INSERT_PREPPED: case UPSERT: case UPSERT_PREPPED: case BULK_INSERT: case BULK_INSERT_PREPPED: case INSERT_OVERWRITE: case INSERT_OVERWRITE_TABLE: setWriteTimer(table.getMetaClient().getCommitActionType()); break; case CLUSTER: case COMPACT: case LOG_COMPACT: tableServiceClient.setTableServiceTimer(operationType); break; default: } return table; } public void validateAgainstTableProperties(HoodieTableConfig tableConfig, HoodieWriteConfig writeConfig) { // mismatch of table versions. CommonClientUtils.validateTableVersion(tableConfig, writeConfig); Properties properties = writeConfig.getProps(); // Once meta fields are disabled, it cant be re-enabled for a given table. if (!tableConfig.populateMetaFields() && writeConfig.populateMetaFields()) { throw new HoodieException(HoodieTableConfig.POPULATE_META_FIELDS.key() + " already disabled for the table. Can't be re-enabled back"); } // Meta fields can be disabled only when either {@code SimpleKeyGenerator}, {@code ComplexKeyGenerator}, // {@code NonpartitionedKeyGenerator} is used if (!tableConfig.populateMetaFields()) { String keyGenClass = KeyGeneratorType.getKeyGeneratorClassName(new HoodieConfig(properties)); if (StringUtils.isNullOrEmpty(keyGenClass)) { keyGenClass = "org.apache.hudi.keygen.SimpleKeyGenerator"; } if (!keyGenClass.equals("org.apache.hudi.keygen.SimpleKeyGenerator") && !keyGenClass.equals("org.apache.hudi.keygen.NonpartitionedKeyGenerator") && !keyGenClass.equals("org.apache.hudi.keygen.ComplexKeyGenerator")) { throw new HoodieException("Only simple, non-partitioned or complex key generator are supported when meta-fields are disabled. Used: " + keyGenClass); } } //Check to make sure it's not a COW table with consistent hashing bucket index if (tableConfig.getTableType() == HoodieTableType.COPY_ON_WRITE) { HoodieIndex.IndexType indexType = writeConfig.getIndexType(); if (indexType != null && indexType.equals(HoodieIndex.IndexType.BUCKET)) { String bucketEngine = properties.getProperty("hoodie.index.bucket.engine"); if (bucketEngine != null && bucketEngine.equals("CONSISTENT_HASHING")) { throw new HoodieException("Consistent hashing bucket index does not work with COW table. Use simple bucket index or an MOR table."); } } } } /** * Sets write schema from last instant since deletes may not have schema set in the config. */ protected void setWriteSchemaForDeletes(HoodieTableMetaClient metaClient) { try { HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); Option lastInstant = activeTimeline.filterCompletedInstants().filter(s -> s.getAction().equals(metaClient.getCommitActionType()) || s.getAction().equals(HoodieActiveTimeline.REPLACE_COMMIT_ACTION)) .lastInstant(); if (lastInstant.isPresent()) { HoodieCommitMetadata commitMetadata = metaClient.getCommitMetadataSerDe().deserialize(lastInstant.get(), activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class); String extraSchema = commitMetadata.getExtraMetadata().get(SCHEMA_KEY); if (!StringUtils.isNullOrEmpty(extraSchema)) { config.setSchema(commitMetadata.getExtraMetadata().get(SCHEMA_KEY)); } else { throw new HoodieIOException("Latest commit does not have any schema in commit metadata"); } } else { LOG.warn("None rows are deleted because the table is empty"); } } catch (IOException e) { throw new HoodieIOException("IOException thrown while reading last commit metadata", e); } } /** * Called after each write, to release any resources used. */ protected void releaseResources(String instantTime) { // do nothing here } @Override public void close() { // Stop timeline-server if running super.close(); // Calling this here releases any resources used by your index, so make sure to finish any related operations // before this point this.index.close(); this.tableServiceClient.close(); } public void setWriteTimer(String commitType) { if (commitType.equals(HoodieTimeline.COMMIT_ACTION)) { writeTimer = metrics.getCommitCtx(); } else if (commitType.equals(HoodieTimeline.DELTA_COMMIT_ACTION)) { writeTimer = metrics.getDeltaCommitCtx(); } } /** * Upgrades the hoodie table if need be when moving to a new Hudi version. * This method is called within a lock. Try to avoid double locking from within this method. * @param metaClient instance of {@link HoodieTableMetaClient} to use. * @param instantTime instant time of interest if we have one. */ protected void tryUpgrade(HoodieTableMetaClient metaClient, Option instantTime) { UpgradeDowngrade upgradeDowngrade = new UpgradeDowngrade(metaClient, config, context, upgradeDowngradeHelper); if (upgradeDowngrade.needsUpgradeOrDowngrade(config.getWriteVersion())) { // Ensure no inflight commits by setting EAGER policy and explicitly cleaning all failed commits List instantsToRollback = tableServiceClient.getInstantsToRollback(metaClient, HoodieFailedWritesCleaningPolicy.EAGER, instantTime); if (!instantsToRollback.isEmpty()) { Map> pendingRollbacks = tableServiceClient.getPendingRollbackInfos(metaClient); instantsToRollback.forEach(entry -> pendingRollbacks.putIfAbsent(entry, Option.empty())); // already called within a lock. tableServiceClient.rollbackFailedWrites(pendingRollbacks, true, true); } new UpgradeDowngrade(metaClient, config, context, upgradeDowngradeHelper) .run(HoodieTableVersion.current(), instantTime.orElse(null)); metaClient.reloadTableConfig(); metaClient.reloadActiveTimeline(); } } private boolean needsUpgradeOrDowngrade(HoodieTableMetaClient metaClient) { UpgradeDowngrade upgradeDowngrade = new UpgradeDowngrade(metaClient, config, context, upgradeDowngradeHelper); return upgradeDowngrade.needsUpgradeOrDowngrade(config.getWriteVersion()); } /** * Rolls back the failed delta commits corresponding to the indexing action. *

* TODO(HUDI-5733): This should be cleaned up once the proper fix of rollbacks * in the metadata table is landed. * * @return {@code true} if rollback happens; {@code false} otherwise. */ public boolean lazyRollbackFailedIndexing() { return tableServiceClient.rollbackFailedIndexingCommits(); } /** * Rollback failed writes if any. * * @return true if rollback happened. false otherwise. */ public boolean rollbackFailedWrites() { return tableServiceClient.rollbackFailedWrites(); } /** * add columns to table. * * @param colName col name to be added. if we want to add col to a nested filed, the fullName should be specified * @param schema col type to be added. * @param doc col doc to be added. * @param position col position to be added * @param positionType col position change type. now support three change types: first/after/before */ public void addColumn(String colName, Schema schema, String doc, String position, TableChange.ColumnPositionChange.ColumnPositionType positionType) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()) .applyAddChange(colName, AvroInternalSchemaConverter.convertToField(schema), doc, position, positionType); commitTableChange(newSchema, pair.getRight()); } public void addColumn(String colName, Schema schema) { addColumn(colName, schema, null, "", TableChange.ColumnPositionChange.ColumnPositionType.NO_OPERATION); } /** * delete columns to table. * * @param colNames col name to be deleted. if we want to delete col from a nested filed, the fullName should be specified */ public void deleteColumns(String... colNames) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyDeleteChange(colNames); commitTableChange(newSchema, pair.getRight()); } /** * rename col name for hudi table. * * @param colName col name to be renamed. if we want to rename col from a nested filed, the fullName should be specified * @param newName new name for current col. no need to specify fullName. */ public void renameColumn(String colName, String newName) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyRenameChange(colName, newName); commitTableChange(newSchema, pair.getRight()); } /** * update col nullable attribute for hudi table. * * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specified * @param nullable . */ public void updateColumnNullability(String colName, boolean nullable) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnNullabilityChange(colName, nullable); commitTableChange(newSchema, pair.getRight()); } /** * update col Type for hudi table. * only support update primitive type to primitive type. * cannot update nest type to nest type or primitive type eg: RecordType -> MapType, MapType -> LongType. * * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specified * @param newType . */ public void updateColumnType(String colName, Type newType) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnTypeChange(colName, newType); commitTableChange(newSchema, pair.getRight()); } /** * update col comment for hudi table. * * @param colName col name to be changed. if we want to change col from a nested filed, the fullName should be specified * @param doc . */ public void updateColumnComment(String colName, String doc) { Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()).applyColumnCommentChange(colName, doc); commitTableChange(newSchema, pair.getRight()); } /** * reorder the position of col. * * @param colName column which need to be reordered. if we want to change col from a nested filed, the fullName should be specified. * @param referColName reference position. * @param orderType col position change type. now support three change types: first/after/before */ public void reOrderColPosition(String colName, String referColName, TableChange.ColumnPositionChange.ColumnPositionType orderType) { if (colName == null || orderType == null || referColName == null) { return; } //get internalSchema Pair pair = getInternalSchemaAndMetaClient(); InternalSchema newSchema = new InternalSchemaChangeApplier(pair.getLeft()) .applyReOrderColPositionChange(colName, referColName, orderType); commitTableChange(newSchema, pair.getRight()); } public Pair getInternalSchemaAndMetaClient() { HoodieTableMetaClient metaClient = createMetaClient(true); TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient); return Pair.of(getInternalSchema(schemaUtil), metaClient); } public void commitTableChange(InternalSchema newSchema, HoodieTableMetaClient metaClient) { TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient); String historySchemaStr = schemaUtil.getTableHistorySchemaStrFromCommitMetadata().orElseGet( () -> SerDeHelper.inheritSchemas(getInternalSchema(schemaUtil), "")); Schema schema = AvroInternalSchemaConverter.convert(newSchema, getAvroRecordQualifiedName(config.getTableName())); String commitActionType = CommitUtils.getCommitActionType(WriteOperationType.ALTER_SCHEMA, metaClient.getTableType()); String instantTime = createNewInstantTime(); startCommitWithTime(instantTime, commitActionType, metaClient); config.setSchema(schema.toString()); HoodieActiveTimeline timeLine = metaClient.getActiveTimeline(); HoodieInstant requested = metaClient.createNewInstant(State.REQUESTED, commitActionType, instantTime); HoodieCommitMetadata metadata = new HoodieCommitMetadata(); metadata.setOperationType(WriteOperationType.ALTER_SCHEMA); try { timeLine.transitionRequestedToInflight(requested, serializeCommitMetadata(metaClient.getCommitMetadataSerDe(), metadata)); } catch (IOException io) { throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io); } Map extraMeta = new HashMap<>(); extraMeta.put(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(newSchema.setSchemaId(Long.parseLong(instantTime)))); // try to save history schemas FileBasedInternalSchemaStorageManager schemasManager = new FileBasedInternalSchemaStorageManager(metaClient); schemasManager.persistHistorySchemaStr(instantTime, SerDeHelper.inheritSchemas(newSchema, historySchemaStr)); commitStats(instantTime, Collections.emptyList(), Option.of(extraMeta), commitActionType); } private InternalSchema getInternalSchema(TableSchemaResolver schemaUtil) { return schemaUtil.getTableInternalSchemaFromCommitMetadata().orElseGet(() -> { try { return AvroInternalSchemaConverter.convert(schemaUtil.getTableAvroSchema()); } catch (Exception e) { throw new HoodieException(String.format("cannot find schema for current table: %s", config.getBasePath())); } }); } protected final void maybeDisableWriteRecordPositions(HoodieTableMetaClient metaClient) { // Disabling {@link WRITE_RECORD_POSITIONS} in the following two cases for correctness // even if the record positions are enabled in MOR: // (1) When there is a pending compaction, the new base files to be generated by compaction // is not available during this transaction. Given the log files in MOR from a new transaction // after a compaction is scheduled can be attached to the base file generated by the compaction // in the latest file slice, the accurate record positions may not be derived. // (2) When NBCC is enabled, the compaction can be scheduled while there are inflight // deltacommits, and unlike OCC, such an inflight deltacommit updating the same file group // under compaction can still be successfully committed. This can also introduce the // correctness problem as (1) that the positions in the log file can be inaccurate. if (config.shouldWriteRecordPositions() && config.getTableType() == HoodieTableType.MERGE_ON_READ && (config.getWriteConcurrencyMode() == WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL || !metaClient.getActiveTimeline().filterPendingCompactionTimeline().empty())) { config.setValue(HoodieWriteConfig.WRITE_RECORD_POSITIONS, String.valueOf(false)); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy