All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.cli.commands.CommitsCommand Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.cli.commands;

import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.InstantComparator;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.NumericUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;

import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
import static org.apache.hudi.common.table.timeline.InstantComparison.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.InstantComparison.compareTimestamps;
import static org.apache.hudi.common.table.timeline.TimelineUtils.getTimeline;

/**
 * CLI command to display commits options.
 */
@ShellComponent
public class CommitsCommand {

  private String printCommits(HoodieTimeline timeline,
                              final Integer limit,
                              final String sortByField,
                              final boolean descending,
                              final boolean headerOnly,
                              final String tempTableName) throws IOException {
    final List rows = new ArrayList<>();
    InstantComparator instantComparator = HoodieCLI.getTableMetaClient().getTimelineLayout().getInstantComparator();

    final List commits = timeline.getCommitsTimeline().filterCompletedInstants()
        .getInstantsAsStream().sorted(instantComparator.requestedTimeOrderedComparator().reversed()).collect(Collectors.toList());

    for (final HoodieInstant commit : commits) {
      if (timeline.getInstantDetails(commit).isPresent()) {
        final HoodieCommitMetadata commitMetadata = HoodieCLI.getTableMetaClient().getCommitMetadataSerDe().deserialize(
            commit,
            timeline.getInstantDetails(commit).get(),
            HoodieCommitMetadata.class);
        rows.add(new Comparable[] {commit.requestedTime(),
            commitMetadata.fetchTotalBytesWritten(),
            commitMetadata.fetchTotalFilesInsert(),
            commitMetadata.fetchTotalFilesUpdated(),
            commitMetadata.fetchTotalPartitionsWritten(),
            commitMetadata.fetchTotalRecordsWritten(),
            commitMetadata.fetchTotalUpdateRecordsWritten(),
            commitMetadata.fetchTotalWriteErrors()});
      }
    }

    final Map> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(
        HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN,
        entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString()))));

    final TableHeader header = HoodieTableHeaderFields.getTableHeader();

    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
        limit, headerOnly, rows, tempTableName);
  }

  private String printCommitsWithMetadata(HoodieTimeline timeline,
                                          final Integer limit, final String sortByField,
                                          final boolean descending,
                                          final boolean headerOnly,
                                          final String tempTableName,
                                          final String partition) throws IOException {
    final List rows = new ArrayList<>();
    InstantComparator instantComparator = HoodieCLI.getTableMetaClient().getTimelineLayout().getInstantComparator();

    final List commits = timeline.getCommitsTimeline().filterCompletedInstants()
        .getInstantsAsStream().sorted(instantComparator.requestedTimeOrderedComparator().reversed()).collect(Collectors.toList());

    for (final HoodieInstant commit : commits) {
      if (timeline.getInstantDetails(commit).isPresent()) {
        final HoodieCommitMetadata commitMetadata = HoodieCLI.getTableMetaClient().getCommitMetadataSerDe().deserialize(
            commit,
            timeline.getInstantDetails(commit).get(),
            HoodieCommitMetadata.class);

        for (Map.Entry> partitionWriteStat :
            commitMetadata.getPartitionToWriteStats().entrySet()) {
          for (HoodieWriteStat hoodieWriteStat : partitionWriteStat.getValue()) {
            if (StringUtils.isNullOrEmpty(partition) || partition.equals(hoodieWriteStat.getPartitionPath())) {
              rows.add(new Comparable[] {commit.getAction(), commit.requestedTime(), hoodieWriteStat.getPartitionPath(),
                  hoodieWriteStat.getFileId(), hoodieWriteStat.getPrevCommit(), hoodieWriteStat.getNumWrites(),
                  hoodieWriteStat.getNumInserts(), hoodieWriteStat.getNumDeletes(),
                  hoodieWriteStat.getNumUpdateWrites(), hoodieWriteStat.getTotalWriteErrors(),
                  hoodieWriteStat.getTotalLogBlocks(), hoodieWriteStat.getTotalCorruptLogBlock(),
                  hoodieWriteStat.getTotalRollbackBlocks(), hoodieWriteStat.getTotalLogRecords(),
                  hoodieWriteStat.getTotalUpdatedRecordsCompacted(), hoodieWriteStat.getTotalWriteBytes()
              });
            }
          }
        }
      }
    }

    final Map> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(
        HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN,
        entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString()))));

    return HoodiePrintHelper.print(HoodieTableHeaderFields.getTableHeaderWithExtraMetadata(),
        fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows, tempTableName);
  }

  @ShellMethod(key = "commits show", value = "Show the commits")
  public String showCommits(
      @ShellOption(value = {"--includeExtraMetadata"}, help = "Include extra metadata",
          defaultValue = "false") final boolean includeExtraMetadata,
      @ShellOption(value = {"--createView"}, help = "view name to store output table",
              defaultValue = "") final String exportTableName,
      @ShellOption(value = {"--limit"}, help = "Limit commits",
              defaultValue = "-1") final Integer limit,
      @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
      @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
      @ShellOption(value = {"--headeronly"}, help = "Print Header Only",
              defaultValue = "false") final boolean headerOnly,
      @ShellOption(value = {"--partition"}, help = "Partition value", defaultValue = ShellOption.NULL) final String partition,
      @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
              defaultValue = "false") final boolean includeArchivedTimeline)
      throws IOException {

    HoodieTimeline timeline = getTimeline(HoodieCLI.getTableMetaClient(), includeArchivedTimeline);
    if (includeExtraMetadata) {
      return printCommitsWithMetadata(timeline, limit, sortByField, descending, headerOnly, exportTableName, partition);
    } else {
      return printCommits(timeline, limit, sortByField, descending, headerOnly, exportTableName);
    }
  }

  @ShellMethod(key = "commits showarchived", value = "Show the archived commits")
  public String showArchivedCommits(
      @ShellOption(value = {"--includeExtraMetadata"}, help = "Include extra metadata",
          defaultValue = "false") final boolean includeExtraMetadata,
      @ShellOption(value = {"--createView"}, help = "view name to store output table",
          defaultValue = "") final String exportTableName,
      @ShellOption(value = {"--startTs"}, defaultValue = ShellOption.NULL, help = "start time for commits, default: now - 10 days")
          String startTs,
      @ShellOption(value = {"--endTs"}, defaultValue = ShellOption.NULL, help = "end time for commits, default: now - 1 day")
          String endTs,
      @ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
      @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
      @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
      @ShellOption(value = {"--headeronly"}, help = "Print Header Only", defaultValue = "false") final boolean headerOnly,
      @ShellOption(value = {"--partition"}, help = "Partition value", defaultValue = ShellOption.NULL) final String partition)
      throws IOException {
    if (StringUtils.isNullOrEmpty(startTs)) {
      startTs = getTimeDaysAgo(10);
    }
    if (StringUtils.isNullOrEmpty(endTs)) {
      endTs = getTimeDaysAgo(1);
    }
    HoodieArchivedTimeline archivedTimeline = HoodieCLI.getTableMetaClient().getArchivedTimeline();
    try {
      archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
      HoodieTimeline timelineRange = (HoodieTimeline)archivedTimeline.findInstantsInRange(startTs, endTs);
      if (includeExtraMetadata) {
        return printCommitsWithMetadata(timelineRange, limit, sortByField, descending, headerOnly, exportTableName, partition);
      } else {
        return printCommits(timelineRange, limit, sortByField, descending, headerOnly, exportTableName);
      }
    } finally {
      // clear the instant details from memory after printing to reduce usage
      archivedTimeline.clearInstantDetailsFromMemory(startTs, endTs);
    }
  }

  @ShellMethod(key = "commit showpartitions", value = "Show partition level details of a commit")
  public String showCommitPartitions(
      @ShellOption(value = {"--createView"}, help = "view name to store output table",
          defaultValue = "") final String exportTableName,
      @ShellOption(value = {"--commit"}, help = "Commit to show") final String instantTime,
      @ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
      @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
      @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
      @ShellOption(value = {"--headeronly"}, help = "Print Header Only",
          defaultValue = "false") final boolean headerOnly,
      @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
              defaultValue = "false") final boolean includeArchivedTimeline)
      throws Exception {

    HoodieTimeline defaultTimeline = getTimeline(HoodieCLI.getTableMetaClient(), includeArchivedTimeline);
    HoodieTimeline timeline = defaultTimeline.getCommitsTimeline().filterCompletedInstants();

    Option hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);

    if (!commitMetadataOptional.isPresent()) {
      return "Commit " + instantTime + " not found in Commits " + timeline;
    }

    HoodieCommitMetadata meta = commitMetadataOptional.get();
    List rows = new ArrayList<>();
    for (Map.Entry> entry : meta.getPartitionToWriteStats().entrySet()) {
      String action = hoodieInstantOption.get().getAction();
      String path = entry.getKey();
      List stats = entry.getValue();
      long totalFilesAdded = 0;
      long totalFilesUpdated = 0;
      long totalRecordsUpdated = 0;
      long totalRecordsInserted = 0;
      long totalBytesWritten = 0;
      long totalWriteErrors = 0;
      for (HoodieWriteStat stat : stats) {
        if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
          totalFilesAdded += 1;
        } else {
          totalFilesUpdated += 1;
          totalRecordsUpdated += stat.getNumUpdateWrites();
        }
        totalRecordsInserted += stat.getNumInserts();
        totalBytesWritten += stat.getTotalWriteBytes();
        totalWriteErrors += stat.getTotalWriteErrors();
      }
      rows.add(new Comparable[] {action, path, totalFilesAdded, totalFilesUpdated, totalRecordsInserted, totalRecordsUpdated,
          totalBytesWritten, totalWriteErrors});
    }

    Map> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry ->
        NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));

    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_ADDED)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_UPDATED)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_INSERTED)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS);

    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
        limit, headerOnly, rows, exportTableName);
  }

  @ShellMethod(key = "commit show_write_stats", value = "Show write stats of a commit")
  public String showWriteStats(
      @ShellOption(value = {"--createView"}, help = "view name to store output table",
          defaultValue = "") final String exportTableName,
      @ShellOption(value = {"--commit"}, help = "Commit to show") final String instantTime,
      @ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
      @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
      @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
      @ShellOption(value = {"--headeronly"}, help = "Print Header Only",
          defaultValue = "false") final boolean headerOnly,
      @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
              defaultValue = "false") final boolean includeArchivedTimeline)
      throws Exception {

    HoodieTimeline defaultTimeline = getTimeline(HoodieCLI.getTableMetaClient(), includeArchivedTimeline);
    HoodieTimeline timeline = defaultTimeline.getCommitsTimeline().filterCompletedInstants();

    Option hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);

    if (!commitMetadataOptional.isPresent()) {
      return "Commit " + instantTime + " not found in Commits " + timeline;
    }

    HoodieCommitMetadata meta = commitMetadataOptional.get();

    String action = hoodieInstantOption.get().getAction();
    long recordsWritten = meta.fetchTotalRecordsWritten();
    long bytesWritten = meta.fetchTotalBytesWritten();
    long avgRecSize = (long) Math.ceil((1.0 * bytesWritten) / recordsWritten);
    List rows = new ArrayList<>();
    rows.add(new Comparable[] {action, bytesWritten, recordsWritten, avgRecSize});

    Map> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN, entry ->
        NumericUtils.humanReadableByteCount((Long.parseLong(entry.toString()))));

    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN_COMMIT)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN_COMMIT)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_AVG_REC_SIZE_COMMIT);

    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
        limit, headerOnly, rows, exportTableName);
  }

  @ShellMethod(key = "commit showfiles", value = "Show file level details of a commit")
  public String showCommitFiles(
      @ShellOption(value = {"--createView"}, help = "view name to store output table",
          defaultValue = "") final String exportTableName,
      @ShellOption(value = {"--commit"}, help = "Commit to show") final String instantTime,
      @ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
      @ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
      @ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
      @ShellOption(value = {"--headeronly"}, help = "Print Header Only",
          defaultValue = "false") final boolean headerOnly,
      @ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
              defaultValue = "false") final boolean includeArchivedTimeline)
      throws Exception {

    HoodieTimeline defaultTimeline = getTimeline(HoodieCLI.getTableMetaClient(), includeArchivedTimeline);
    HoodieTimeline timeline = defaultTimeline.getCommitsTimeline().filterCompletedInstants();

    Option hoodieInstantOption = getCommitForInstant(timeline, instantTime);
    Option commitMetadataOptional = getHoodieCommitMetadata(timeline, hoodieInstantOption);

    if (!commitMetadataOptional.isPresent()) {
      return "Commit " + instantTime + " not found in Commits " + timeline;
    }

    HoodieCommitMetadata meta = commitMetadataOptional.get();
    List rows = new ArrayList<>();
    for (Map.Entry> entry : meta.getPartitionToWriteStats().entrySet()) {
      String action = hoodieInstantOption.get().getAction();
      String path = entry.getKey();
      List stats = entry.getValue();
      for (HoodieWriteStat stat : stats) {
        rows.add(new Comparable[] {action, path, stat.getFileId(), stat.getPrevCommit(), stat.getNumUpdateWrites(),
            stat.getNumWrites(), stat.getTotalWriteBytes(), stat.getTotalWriteErrors(), stat.getFileSizeInBytes()});
      }
    }

    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_ACTION)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_PREVIOUS_COMMIT)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_UPDATED)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_RECORDS_WRITTEN)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_ERRORS)
        .addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_SIZE);

    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending,
        limit, headerOnly, rows, exportTableName);
  }

  @ShellMethod(key = "commits compare", value = "Compare commits with another Hoodie table")
  public String compareCommits(@ShellOption(value = {"--path"}, help = "Path of the table to compare to") final String path) {

    HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();
    HoodieTableMetaClient target = HoodieTableMetaClient.builder()
        .setConf(HoodieCLI.conf.newInstance()).setBasePath(path).build();
    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    String targetLatestCommit =
        targetTimeline.getInstantsAsStream().iterator().hasNext() ? targetTimeline.lastInstant().get().requestedTime() : "0";
    String sourceLatestCommit =
        sourceTimeline.getInstantsAsStream().iterator().hasNext() ? sourceTimeline.lastInstant().get().requestedTime() : "0";

    if (sourceLatestCommit != null
        && compareTimestamps(targetLatestCommit, GREATER_THAN, sourceLatestCommit)) {
      // source is behind the target
      List commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
          .getInstantsAsStream().map(HoodieInstant::requestedTime).collect(Collectors.toList());
      return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size()
          + " commits. Commits to catch up - " + commitsToCatchup;
    } else {
      List commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
          .getInstantsAsStream().map(HoodieInstant::requestedTime).collect(Collectors.toList());
      return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size()
          + " commits. Commits to catch up - " + commitsToCatchup;
    }
  }

  @ShellMethod(key = "commits sync", value = "Sync commits with another Hoodie table")
  public String syncCommits(@ShellOption(value = {"--path"}, help = "Path of the table to sync to") final String path) {
    HoodieCLI.syncTableMetadata = HoodieTableMetaClient.builder()
        .setConf(HoodieCLI.conf.newInstance()).setBasePath(path).build();
    HoodieCLI.state = HoodieCLI.CLIState.SYNC;
    return "Load sync state between " + HoodieCLI.getTableMetaClient().getTableConfig().getTableName() + " and "
        + HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
  }

  /*
  Checks whether a commit or replacecommit action exists in the timeline.
  * */
  private Option getCommitForInstant(HoodieTimeline timeline, String instantTime) {
    List instants = Arrays.asList(
        HoodieCLI.getTableMetaClient().createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, instantTime),
        HoodieCLI.getTableMetaClient().createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime),
        HoodieCLI.getTableMetaClient().createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, instantTime));

    return Option.fromJavaOptional(instants.stream().filter(timeline::containsInstant).findAny());
  }

  private Option getHoodieCommitMetadata(HoodieTimeline timeline, Option hoodieInstant) throws IOException {
    if (hoodieInstant.isPresent()) {
      return Option.of(TimelineUtils.getCommitMetadata(hoodieInstant.get(), timeline));
    }
    return Option.empty();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy