org.apache.hudi.cli.commands.DiffCommand Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.cli.commands;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.utils.CLIUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.InstantComparator;
import org.apache.hudi.common.table.timeline.TimelineLayout;
import org.apache.hudi.common.util.NumericUtils;
import org.apache.hudi.common.util.Option;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* Given a file id or partition value, this command line utility tracks the changes to the file group or partition across range of commits.
* Usage: diff file --fileId
*/
@ShellComponent
public class DiffCommand {
private static final BiFunction FILE_ID_CHECKER = (writeStat, fileId) -> fileId.equals(writeStat.getFileId());
private static final BiFunction PARTITION_CHECKER = (writeStat, partitionPath) -> partitionPath.equals(writeStat.getPartitionPath());
@ShellMethod(key = "diff file", value = "Check how file differs across range of commits")
public String diffFile(
@ShellOption(value = {"--fileId"}, help = "File ID to diff across range of commits") String fileId,
@ShellOption(value = {"--startTs"}, help = "start time for compactions, default: now - 10 days",
defaultValue = ShellOption.NULL) String startTs,
@ShellOption(value = {"--endTs"}, help = "end time for compactions, default: now - 1 day",
defaultValue = ShellOption.NULL) String endTs,
@ShellOption(value = {"--limit"}, help = "Limit compactions", defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only", defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
defaultValue = "false") final boolean includeArchivedTimeline) throws IOException {
HoodieTimeline timeline = CLIUtils.getTimelineInRange(startTs, endTs, includeArchivedTimeline);
return printCommitsWithMetadataForFileId(timeline, limit, sortByField, descending, headerOnly, "", fileId);
}
@ShellMethod(key = "diff partition", value = "Check how file differs across range of commits. It is meant to be used only for partitioned tables.")
public String diffPartition(
@ShellOption(value = {"--partitionPath"}, help = "Relative partition path to diff across range of commits") String partitionPath,
@ShellOption(value = {"--startTs"}, help = "start time for compactions, default: now - 10 days",
defaultValue = ShellOption.NULL) String startTs,
@ShellOption(value = {"--endTs"}, help = "end time for compactions, default: now - 1 day",
defaultValue = ShellOption.NULL) String endTs,
@ShellOption(value = {"--limit"}, help = "Limit compactions", defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only", defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--includeArchivedTimeline"}, help = "Include archived commits as well",
defaultValue = "false") final boolean includeArchivedTimeline) throws IOException {
HoodieTimeline timeline = CLIUtils.getTimelineInRange(startTs, endTs, includeArchivedTimeline);
return printCommitsWithMetadataForPartition(timeline, limit, sortByField, descending, headerOnly, "", partitionPath);
}
private String printCommitsWithMetadataForFileId(HoodieTimeline timeline,
final Integer limit,
final String sortByField,
final boolean descending,
final boolean headerOnly,
final String tempTableName,
final String fileId) throws IOException {
return printDiffWithMetadata(timeline, limit, sortByField, descending, headerOnly, tempTableName, fileId, FILE_ID_CHECKER);
}
private String printCommitsWithMetadataForPartition(HoodieTimeline timeline,
final Integer limit,
final String sortByField,
final boolean descending,
final boolean headerOnly,
final String tempTableName,
final String partition) throws IOException {
return printDiffWithMetadata(timeline, limit, sortByField, descending, headerOnly, tempTableName, partition, PARTITION_CHECKER);
}
private String printDiffWithMetadata(HoodieTimeline timeline, Integer limit, String sortByField, boolean descending, boolean headerOnly, String tempTableName, String diffEntity,
BiFunction diffEntityChecker) throws IOException {
TimelineLayout layout = TimelineLayout.fromVersion(timeline.getTimelineLayoutVersion());
List rows = new ArrayList<>();
InstantComparator instantComparator = HoodieCLI.getTableMetaClient().getTimelineLayout().getInstantComparator();
List commits = timeline.getCommitsTimeline().filterCompletedInstants()
.getInstantsAsStream().sorted(instantComparator.requestedTimeOrderedComparator().reversed()).collect(Collectors.toList());
for (final HoodieInstant commit : commits) {
Option instantDetails = timeline.getInstantDetails(commit);
if (instantDetails.isPresent()) {
HoodieCommitMetadata commitMetadata = layout.getCommitMetadataSerDe().deserialize(commit, instantDetails.get(), HoodieCommitMetadata.class);
for (Map.Entry> partitionWriteStat :
commitMetadata.getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat hoodieWriteStat : partitionWriteStat.getValue()) {
populateRows(rows, commit, hoodieWriteStat, diffEntity, diffEntityChecker);
}
}
}
}
Map> fieldNameToConverterMap = new HashMap<>();
fieldNameToConverterMap.put(
HoodieTableHeaderFields.HEADER_TOTAL_BYTES_WRITTEN,
entry -> NumericUtils.humanReadableByteCount((Double.parseDouble(entry.toString()))));
return HoodiePrintHelper.print(HoodieTableHeaderFields.getTableHeaderWithExtraMetadata(),
fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows, tempTableName);
}
private void populateRows(List rows, HoodieInstant commit, HoodieWriteStat hoodieWriteStat,
String value, BiFunction checker) {
if (checker.apply(hoodieWriteStat, value)) {
rows.add(new Comparable[] {
commit.getAction(),
commit.requestedTime(),
hoodieWriteStat.getPartitionPath(),
hoodieWriteStat.getFileId(),
hoodieWriteStat.getPrevCommit(),
hoodieWriteStat.getNumWrites(),
hoodieWriteStat.getNumInserts(),
hoodieWriteStat.getNumDeletes(),
hoodieWriteStat.getNumUpdateWrites(),
hoodieWriteStat.getTotalWriteErrors(),
hoodieWriteStat.getTotalLogBlocks(),
hoodieWriteStat.getTotalCorruptLogBlock(),
hoodieWriteStat.getTotalRollbackBlocks(),
hoodieWriteStat.getTotalLogRecords(),
hoodieWriteStat.getTotalUpdatedRecordsCompacted(),
hoodieWriteStat.getTotalWriteBytes()
});
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy