All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.table.upgrade.SevenToEightUpgradeHandler Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.table.upgrade;

import org.apache.hudi.client.timeline.versioning.v2.LSMTimelineWriter;
import org.apache.hudi.client.utils.LegacyArchivedMetaEntryReader;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.RecordMergeMode;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.BootstrapIndexType;
import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.HoodieTableVersion;
import org.apache.hudi.common.table.timeline.ActiveAction;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.InstantComparison;
import org.apache.hudi.common.table.timeline.InstantFileNameGenerator;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.timeline.versioning.v1.ActiveTimelineV1;
import org.apache.hudi.common.table.timeline.versioning.v1.CommitMetadataSerDeV1;
import org.apache.hudi.common.table.timeline.versioning.v2.ActiveTimelineV2;
import org.apache.hudi.common.table.timeline.versioning.v2.CommitMetadataSerDeV2;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.table.HoodieTable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.apache.hudi.common.table.timeline.HoodieInstant.UNDERSCORE;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.DELTA_COMMIT_ACTION;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
import static org.apache.hudi.common.table.timeline.TimelineLayout.TIMELINE_LAYOUT_V1;
import static org.apache.hudi.common.table.timeline.TimelineLayout.TIMELINE_LAYOUT_V2;
import static org.apache.hudi.table.upgrade.UpgradeDowngradeUtils.SIX_TO_EIGHT_TIMELINE_ACTION_MAP;
import static org.apache.hudi.table.upgrade.UpgradeDowngradeUtils.rollbackFailedWritesAndCompact;

/**
 * Version 7 is going to be placeholder version for bridge release 0.16.0.
 * Version 8 is the placeholder version to track 1.x.
 */
public class SevenToEightUpgradeHandler implements UpgradeHandler {

  private static final Logger LOG = LoggerFactory.getLogger(SevenToEightUpgradeHandler.class);

  @Override
  public Map upgrade(HoodieWriteConfig config, HoodieEngineContext context,
                                             String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
    Map tablePropsToAdd = new HashMap<>();
    HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
    HoodieTableMetaClient metaClient = table.getMetaClient();
    HoodieTableConfig tableConfig = metaClient.getTableConfig();
    // If auto upgrade is disabled, set writer version to 6 and return
    if (!config.autoUpgrade()) {
      /**
       * At this point, metadata should already be disabled (see {@link UpgradeDowngrade#needsUpgradeOrDowngrade(HoodieTableVersion)}).
       * So, check either this is a metadata table itself,  or metadata table is disabled.
       */
      ValidationUtils.checkState(table.isMetadataTable() || !config.isMetadataTableEnabled(),
          "Metadata table should be disabled to write in table version SIX using 1.0.0+" + metaClient.getBasePath());
      config.setValue(HoodieWriteConfig.WRITE_TABLE_VERSION, String.valueOf(HoodieTableVersion.SIX.versionCode()));
      return tablePropsToAdd;
    }

    // If metadata is enabled for the data table, and existing metadata table is behind the data table, then delete it
    if (!table.isMetadataTable() && config.isMetadataTableEnabled() && isMetadataTableBehindDataTable(config, metaClient)) {
      HoodieTableMetadataUtil.deleteMetadataTable(config.getBasePath(), context);
    }

    // Rollback and run compaction in one step
    rollbackFailedWritesAndCompact(table, context, config, upgradeDowngradeHelper, HoodieTableType.MERGE_ON_READ.equals(table.getMetaClient().getTableType()), HoodieTableVersion.SIX);
    try {
      HoodieTableMetaClient.createTableLayoutOnStorage(context.getStorageConf(), new StoragePath(config.getBasePath()), config.getProps(), TimelineLayoutVersion.VERSION_2, false);
    } catch (IOException e) {
      LOG.error("Failed to create table layout on storage for timeline layout version {}", TimelineLayoutVersion.VERSION_2, e);
      throw new HoodieIOException("Failed to create table layout on storage", e);
    }

    // handle table properties upgrade
    tablePropsToAdd.put(HoodieTableConfig.TIMELINE_PATH, HoodieTableConfig.TIMELINE_PATH.defaultValue());
    upgradePartitionFields(config, tableConfig, tablePropsToAdd);
    upgradeMergeMode(tableConfig, tablePropsToAdd);
    setInitialVersion(tableConfig, tablePropsToAdd);
    upgradeKeyGeneratorType(tableConfig, tablePropsToAdd);
    upgradeBootstrapIndexType(tableConfig, tablePropsToAdd);

    // Handle timeline upgrade:
    //  - Rewrite instants in active timeline to new format
    //  - Convert archived timeline to new LSM timeline format
    List instants;
    try {
      // We need to move all the instants - not just completed ones.
      instants = metaClient.scanHoodieInstantsFromFileSystem(metaClient.getTimelinePath(),
          ActiveTimelineV1.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
    } catch (IOException ioe) {
      LOG.error("Failed to get instants from filesystem", ioe);
      throw new HoodieIOException("Failed to get instants from filesystem", ioe);
    }

    if (!instants.isEmpty()) {
      InstantFileNameGenerator instantFileNameGenerator = metaClient.getInstantFileNameGenerator();
      CommitMetadataSerDeV2 commitMetadataSerDeV2 = new CommitMetadataSerDeV2();
      CommitMetadataSerDeV1 commitMetadataSerDeV1 = new CommitMetadataSerDeV1();
      ActiveTimelineV2 activeTimelineV2 = new ActiveTimelineV2(metaClient);
      context.map(instants, instant -> {
        String originalFileName = instantFileNameGenerator.getFileName(instant);
        return upgradeActiveTimelineInstant(instant, originalFileName, metaClient, commitMetadataSerDeV1, commitMetadataSerDeV2, activeTimelineV2);
      }, instants.size());
    }

    upgradeToLSMTimeline(table, context, config);

    return tablePropsToAdd;
  }

  private static boolean isMetadataTableBehindDataTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
    // if metadata table does not exist, then it is not behind
    if (!metaClient.getTableConfig().isMetadataTableAvailable()) {
      return false;
    }
    // get last commit instant in data table and metadata table
    HoodieInstant lastCommitInstantInDataTable = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant().orElse(null);
    HoodieTableMetaClient metadataTableMetaClient = HoodieTableMetaClient.builder()
        .setConf(metaClient.getStorageConf().newInstance())
        .setBasePath(HoodieTableMetadata.getMetadataTableBasePath(config.getBasePath()))
        .build();
    HoodieInstant lastCommitInstantInMetadataTable = metadataTableMetaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant().orElse(null);
    // if last commit instant in data table is greater than the last commit instant in metadata table, then metadata table is behind
    return lastCommitInstantInDataTable != null && lastCommitInstantInMetadataTable != null
        && InstantComparison.compareTimestamps(lastCommitInstantInMetadataTable.requestedTime(), InstantComparison.LESSER_THAN, lastCommitInstantInDataTable.requestedTime());
  }

  static void upgradePartitionFields(HoodieWriteConfig config, HoodieTableConfig tableConfig, Map tablePropsToAdd) {
    String keyGenerator = tableConfig.getKeyGeneratorClassName();
    String partitionPathField = config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key());
    if (keyGenerator != null && partitionPathField != null
        && (keyGenerator.equals(KeyGeneratorType.CUSTOM.getClassName()) || keyGenerator.equals(KeyGeneratorType.CUSTOM_AVRO.getClassName()))) {
      tablePropsToAdd.put(HoodieTableConfig.PARTITION_FIELDS, partitionPathField);
    }
  }

  static void upgradeMergeMode(HoodieTableConfig tableConfig, Map tablePropsToAdd) {
    if (tableConfig.getPayloadClass() != null
        && tableConfig.getPayloadClass().equals(OverwriteWithLatestAvroPayload.class.getName())) {
      if (HoodieTableType.COPY_ON_WRITE == tableConfig.getTableType()) {
        tablePropsToAdd.put(
            HoodieTableConfig.RECORD_MERGE_MODE,
            RecordMergeMode.COMMIT_TIME_ORDERING.name());
      } else {
        tablePropsToAdd.put(
            HoodieTableConfig.PAYLOAD_CLASS_NAME,
            DefaultHoodieRecordPayload.class.getName());
        tablePropsToAdd.put(
            HoodieTableConfig.RECORD_MERGE_MODE,
            RecordMergeMode.EVENT_TIME_ORDERING.name());
      }
    }
  }

  static void setInitialVersion(HoodieTableConfig tableConfig, Map tablePropsToAdd) {
    if (tableConfig.contains(HoodieTableConfig.VERSION)) {
      tablePropsToAdd.put(HoodieTableConfig.INITIAL_VERSION, String.valueOf(tableConfig.getTableVersion().versionCode()));
    } else {
      tablePropsToAdd.put(HoodieTableConfig.INITIAL_VERSION, String.valueOf(HoodieTableVersion.SIX.versionCode()));
    }
  }

  static void upgradeBootstrapIndexType(HoodieTableConfig tableConfig, Map tablePropsToAdd) {
    if (tableConfig.contains(HoodieTableConfig.BOOTSTRAP_INDEX_CLASS_NAME) || tableConfig.contains(HoodieTableConfig.BOOTSTRAP_INDEX_TYPE)) {
      String bootstrapIndexClass = BootstrapIndexType.getBootstrapIndexClassName(tableConfig);
      if (StringUtils.nonEmpty(bootstrapIndexClass)) {
        tablePropsToAdd.put(HoodieTableConfig.BOOTSTRAP_INDEX_CLASS_NAME, bootstrapIndexClass);
        tablePropsToAdd.put(HoodieTableConfig.BOOTSTRAP_INDEX_TYPE, BootstrapIndexType.fromClassName(bootstrapIndexClass).name());
      }
    }
  }

  static void upgradeKeyGeneratorType(HoodieTableConfig tableConfig, Map tablePropsToAdd) {
    String keyGenerator = tableConfig.getKeyGeneratorClassName();
    if (StringUtils.nonEmpty(keyGenerator)) {
      tablePropsToAdd.put(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME, keyGenerator);
      tablePropsToAdd.put(HoodieTableConfig.KEY_GENERATOR_TYPE, KeyGeneratorType.fromClassName(keyGenerator).name());
    }
  }

  static void upgradeToLSMTimeline(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig config) {
    table.getMetaClient().getTableConfig().getTimelineLayoutVersion().ifPresent(
        timelineLayoutVersion -> ValidationUtils.checkState(TimelineLayoutVersion.LAYOUT_VERSION_1.equals(timelineLayoutVersion),
            "Upgrade to LSM timeline is only supported for layout version 1. Given version: " + timelineLayoutVersion));
    try {
      LegacyArchivedMetaEntryReader reader = new LegacyArchivedMetaEntryReader(table.getMetaClient());
      StoragePath archivePath = new StoragePath(table.getMetaClient().getMetaPath(), "timeline/history");
      LSMTimelineWriter lsmTimelineWriter = LSMTimelineWriter.getInstance(config, table, Option.of(archivePath));
      int batchSize = config.getCommitArchivalBatchSize();
      List activeActionsBatch = new ArrayList<>(batchSize);
      try (ClosableIterator iterator = reader.getActiveActionsIterator()) {
        while (iterator.hasNext()) {
          activeActionsBatch.add(iterator.next());
          // If the batch is full, write it to the LSM timeline
          if (activeActionsBatch.size() == batchSize) {
            lsmTimelineWriter.write(new ArrayList<>(activeActionsBatch), Option.empty(), Option.empty());
            lsmTimelineWriter.compactAndClean(engineContext);
            activeActionsBatch.clear();
          }
        }

        // Write any remaining actions in the final batch
        if (!activeActionsBatch.isEmpty()) {
          lsmTimelineWriter.write(new ArrayList<>(activeActionsBatch), Option.empty(), Option.empty());
          lsmTimelineWriter.compactAndClean(engineContext);
        }
      }
    } catch (Exception e) {
      if (config.isFailOnTimelineArchivingEnabled()) {
        throw new HoodieException("Failed to upgrade to LSM timeline", e);
      } else {
        LOG.warn("Failed to upgrade to LSM timeline");
      }
    }
  }

  static boolean upgradeActiveTimelineInstant(HoodieInstant instant, String originalFileName, HoodieTableMetaClient metaClient, CommitMetadataSerDeV1 commitMetadataSerDeV1,
                                              CommitMetadataSerDeV2 commitMetadataSerDeV2, ActiveTimelineV2 activeTimelineV2) {
    String replacedFileName = originalFileName;
    boolean isCompleted = instant.isCompleted();
    // Rename the metadata file name from the ${instant_time}.action[.state] format in version 0.x
    // to the ${instant_time}_${completion_time}.action[.state] format in version 1.x.
    if (isCompleted) {
      String completionTime = instant.getCompletionTime(); // this is the file modification time
      String startTime = instant.requestedTime();
      replacedFileName = replacedFileName.replace(startTime, startTime + UNDERSCORE + completionTime);
    }
    // Rename the action if necessary (e.g., REPLACE_COMMIT_ACTION to CLUSTERING_ACTION).
    // NOTE: New action names were only applied for pending instants. Completed instants do not have any change in action names.
    if (SIX_TO_EIGHT_TIMELINE_ACTION_MAP.containsKey(instant.getAction()) && !isCompleted) {
      replacedFileName = replacedFileName.replace(instant.getAction(), SIX_TO_EIGHT_TIMELINE_ACTION_MAP.get(instant.getAction()));
    }
    try {
      return rewriteTimelineV1InstantFileToV2Format(instant, metaClient, originalFileName, replacedFileName, commitMetadataSerDeV1, commitMetadataSerDeV2, activeTimelineV2);
    } catch (IOException e) {
      LOG.warn("Can not to complete the upgrade from version seven to version eight. The reason for failure is {}", e.getMessage());
    }
    return false;
  }

  static boolean rewriteTimelineV1InstantFileToV2Format(HoodieInstant instant, HoodieTableMetaClient metaClient, String originalFileName, String replacedFileName,
                                                        CommitMetadataSerDeV1 commitMetadataSerDeV1, CommitMetadataSerDeV2 commitMetadataSerDeV2, ActiveTimelineV2 activeTimelineV2)
      throws IOException {
    StoragePath fromPath = new StoragePath(TIMELINE_LAYOUT_V1.getTimelinePathProvider().getTimelinePath(metaClient.getTableConfig(), metaClient.getBasePath()), originalFileName);
    StoragePath toPath = new StoragePath(TIMELINE_LAYOUT_V2.getTimelinePathProvider().getTimelinePath(metaClient.getTableConfig(), metaClient.getBasePath()), replacedFileName);
    boolean success = true;
    if (instant.getAction().equals(COMMIT_ACTION) || instant.getAction().equals(DELTA_COMMIT_ACTION) || (instant.getAction().equals(REPLACE_COMMIT_ACTION) && instant.isCompleted())) {
      Class clazz = instant.getAction().equals(REPLACE_COMMIT_ACTION) ? HoodieReplaceCommitMetadata.class : HoodieCommitMetadata.class;
      HoodieCommitMetadata commitMetadata = commitMetadataSerDeV1.deserialize(instant, metaClient.getActiveTimeline().getInstantDetails(instant).get(), clazz);
      Option data = commitMetadataSerDeV2.serialize(commitMetadata);
      String toPathStr = toPath.toUri().toString();
      activeTimelineV2.createFileInMetaPath(toPathStr, data, true);
      metaClient.getStorage().deleteFile(fromPath);
    } else {
      success = metaClient.getStorage().rename(fromPath, toPath);
    }
    if (!success) {
      throw new HoodieIOException("an error that occurred while renaming " + fromPath + " to: " + toPath);
    }
    return true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy