org.apache.hudi.cli.commands.CompactionCommand Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.cli.commands;
import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.cli.commands.SparkMain.SparkCommand;
import org.apache.hudi.cli.utils.InputStreamConsumer;
import org.apache.hudi.cli.utils.SparkUtil;
import org.apache.hudi.client.CompactionAdminClient.RenameOpResult;
import org.apache.hudi.client.CompactionAdminClient.ValidationOpResult;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.InstantGenerator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.table.action.compact.OperationResult;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.spark.launcher.SparkLauncher;
import org.apache.spark.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.cli.utils.CommitUtil.getTimeDaysAgo;
import static org.apache.hudi.util.JavaScalaConverters.convertJavaPropertiesToScalaMap;
/**
* CLI command to display compaction related options.
*/
@ShellComponent
public class CompactionCommand {
private static final Logger LOG = LoggerFactory.getLogger(CompactionCommand.class);
private static final String TMP_DIR = "/tmp/";
public static final String COMPACTION_SCH_SUCCESSFUL = "Attempted to schedule compaction for ";
public static final String COMPACTION_EXE_SUCCESSFUL = "Compaction successfully completed for ";
public static final String COMPACTION_SCH_EXE_SUCCESSFUL = "Schedule and execute compaction successfully completed";
private HoodieTableMetaClient checkAndGetMetaClient() {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
if (client.getTableType() != HoodieTableType.MERGE_ON_READ) {
throw new HoodieException("Compactions can only be run for table type : MERGE_ON_READ");
}
return client;
}
@ShellMethod(key = "compactions show all", value = "Shows all compactions that are in active timeline")
public String compactionsAll(
@ShellOption(value = {"--includeExtraMetadata"}, help = "Include extra metadata",
defaultValue = "false") final boolean includeExtraMetadata,
@ShellOption(value = {"--limit"}, help = "Limit commits",
defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly) {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
return printAllCompactions(activeTimeline,
compactionPlanReader(this::readCompactionPlanForActiveTimeline, activeTimeline),
includeExtraMetadata, sortByField, descending, limit, headerOnly);
}
@ShellMethod(key = "compaction show", value = "Shows compaction details for a specific compaction instant")
public String compactionShow(
@ShellOption(value = "--instant",
help = "Base path for the target hoodie table") final String compactionInstantTime,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--partition"}, help = "Partition value", defaultValue = ShellOption.NULL) final String partition)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
InstantGenerator instantGenerator = client.getInstantGenerator();
HoodieCompactionPlan compactionPlan =
activeTimeline.readCompactionPlan(instantGenerator.getCompactionRequestedInstant(compactionInstantTime));
return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly, partition);
}
@ShellMethod(key = "compactions showarchived", value = "Shows compaction details for specified time window")
public String compactionsShowArchived(
@ShellOption(value = {"--includeExtraMetadata"}, help = "Include extra metadata",
defaultValue = "false") final boolean includeExtraMetadata,
@ShellOption(value = {"--startTs"}, defaultValue = ShellOption.NULL,
help = "start time for compactions, default: now - 10 days") String startTs,
@ShellOption(value = {"--endTs"}, defaultValue = ShellOption.NULL,
help = "end time for compactions, default: now - 1 day") String endTs,
@ShellOption(value = {"--limit"}, help = "Limit compactions", defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly) {
if (StringUtils.isNullOrEmpty(startTs)) {
startTs = getTimeDaysAgo(10);
}
if (StringUtils.isNullOrEmpty(endTs)) {
endTs = getTimeDaysAgo(1);
}
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
archivedTimeline.loadCompactionDetailsInMemory(startTs, endTs);
try {
return printAllCompactions(archivedTimeline,
compactionPlanReader(this::readCompactionPlanForArchivedTimeline, archivedTimeline),
includeExtraMetadata, sortByField, descending, limit, headerOnly);
} finally {
archivedTimeline.clearInstantDetailsFromMemory(startTs, endTs);
}
}
@ShellMethod(key = "compaction showarchived", value = "Shows compaction details for a specific compaction instant")
public String compactionShowArchived(
@ShellOption(value = "--instant", help = "instant time") final String compactionInstantTime,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") final Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") final String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") final boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") final boolean headerOnly,
@ShellOption(value = {"--partition"}, help = "Partition value", defaultValue = ShellOption.NULL) final String partition)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
HoodieInstant instant = client.createNewInstant(HoodieInstant.State.COMPLETED,
HoodieTimeline.COMPACTION_ACTION, compactionInstantTime);
try {
archivedTimeline.loadCompactionDetailsInMemory(compactionInstantTime);
HoodieCompactionPlan compactionPlan = archivedTimeline.readCompactionPlan(instant);
return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly, partition);
} finally {
archivedTimeline.clearInstantDetailsFromMemory(compactionInstantTime);
}
}
@ShellMethod(key = "compaction schedule", value = "Schedule Compaction")
public String scheduleCompact(
@ShellOption(value = "--sparkMemory", defaultValue = "1G",
help = "Spark executor memory") final String sparkMemory,
@ShellOption(value = "--propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
defaultValue = "") final String propsFilePath,
@ShellOption(value = "--hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
defaultValue = "") final String[] configs,
@ShellOption(value = "--sparkMaster", defaultValue = "local", help = "Spark Master") String master)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
// First get a compaction instant time and pass it to spark launcher for scheduling compaction
String compactionInstantTime = client.createNewInstantTime();
String sparkPropertiesPath =
Utils.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_SCHEDULE, master, sparkMemory, HoodieCLI.basePath,
client.getTableConfig().getTableName(), compactionInstantTime, propsFilePath);
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to run compaction for " + compactionInstantTime;
}
return COMPACTION_SCH_SUCCESSFUL + compactionInstantTime;
}
@ShellMethod(key = "compaction run", value = "Run Compaction for given instant time")
public String compact(
@ShellOption(value = {"--parallelism"}, defaultValue = "3",
help = "Parallelism for hoodie compaction") final String parallelism,
@ShellOption(value = "--schemaFilePath",
help = "Path for Avro schema file", defaultValue = "") final String schemaFilePath,
@ShellOption(value = "--sparkMaster", defaultValue = "local",
help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "4G",
help = "Spark executor memory") final String sparkMemory,
@ShellOption(value = "--retry", defaultValue = "1", help = "Number of retries") final String retry,
@ShellOption(value = "--compactionInstant", help = "Instant of compaction.request",
defaultValue = ShellOption.NULL) String compactionInstantTime,
@ShellOption(value = "--propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
defaultValue = "") final String propsFilePath,
@ShellOption(value = "--hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
defaultValue = "") final String[] configs)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
if (null == compactionInstantTime) {
// pick outstanding one with lowest timestamp
Option firstPendingInstant =
client.reloadActiveTimeline().filterCompletedAndCompactionInstants()
.filter(instant -> instant.getAction().equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant()
.map(HoodieInstant::requestedTime);
if (!firstPendingInstant.isPresent()) {
return "NO PENDING COMPACTION TO RUN";
}
compactionInstantTime = firstPendingInstant.get();
}
String sparkPropertiesPath =
Utils.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_RUN, master, sparkMemory, HoodieCLI.basePath,
client.getTableConfig().getTableName(), compactionInstantTime, parallelism, schemaFilePath,
retry, propsFilePath);
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to run compaction for " + compactionInstantTime;
}
return COMPACTION_EXE_SUCCESSFUL + compactionInstantTime;
}
@ShellMethod(key = "compaction scheduleAndExecute", value = "Schedule compaction plan and execute this plan")
public String compact(
@ShellOption(value = {"--parallelism"}, defaultValue = "3",
help = "Parallelism for hoodie compaction") final String parallelism,
@ShellOption(value = "--schemaFilePath",
help = "Path for Avro schema file", defaultValue = ShellOption.NULL) final String schemaFilePath,
@ShellOption(value = "--sparkMaster", defaultValue = "local",
help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "4G",
help = "Spark executor memory") final String sparkMemory,
@ShellOption(value = "--retry", defaultValue = "1", help = "Number of retries") final String retry,
@ShellOption(value = "--propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
defaultValue = "") final String propsFilePath,
@ShellOption(value = "--hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
defaultValue = "") final String[] configs)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String sparkPropertiesPath =
Utils.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_SCHEDULE_AND_EXECUTE, master, sparkMemory, HoodieCLI.basePath,
client.getTableConfig().getTableName(), parallelism, schemaFilePath,
retry, propsFilePath);
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to schedule and execute compaction ";
}
return COMPACTION_SCH_EXE_SUCCESSFUL;
}
/**
* Prints all compaction details.
*/
private static String printAllCompactions(HoodieTimeline timeline,
Function compactionPlanReader,
boolean includeExtraMetadata,
String sortByField,
boolean descending,
int limit,
boolean headerOnly) {
Stream instantsStream = timeline.getWriteTimeline().getReverseOrderedInstants();
List> compactionPlans = instantsStream
.map(instant -> Pair.of(instant, compactionPlanReader.apply(instant)))
.filter(pair -> pair.getRight() != null)
.collect(Collectors.toList());
Set committedInstants = timeline.getCommitAndReplaceTimeline().filterCompletedInstants()
.getInstantsAsStream().map(HoodieInstant::requestedTime).collect(Collectors.toSet());
List rows = new ArrayList<>();
for (Pair compactionPlan : compactionPlans) {
HoodieCompactionPlan plan = compactionPlan.getRight();
HoodieInstant instant = compactionPlan.getLeft();
final HoodieInstant.State state;
if (committedInstants.contains(instant.requestedTime())) {
state = HoodieInstant.State.COMPLETED;
} else {
state = instant.getState();
}
if (includeExtraMetadata) {
rows.add(new Comparable[] {instant.requestedTime(), state.toString(),
plan.getOperations() == null ? 0 : plan.getOperations().size(),
plan.getExtraMetadata().toString()});
} else {
rows.add(new Comparable[] {instant.requestedTime(), state.toString(),
plan.getOperations() == null ? 0 : plan.getOperations().size()});
}
}
Map> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
if (includeExtraMetadata) {
header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
}
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
/**
* Compaction reading is different for different timelines. Create partial function to override special logic.
* We can make these read methods part of HoodieDefaultTimeline and override where necessary. But the
* BiFunction below has 'hacky' exception blocks, so restricting it to CLI.
*/
private
Function compactionPlanReader(
BiFunction f, T timeline) {
return (y) -> f.apply(timeline, y);
}
private HoodieCompactionPlan readCompactionPlanForArchivedTimeline(HoodieArchivedTimeline archivedTimeline,
HoodieInstant instant) {
try {
return archivedTimeline.readCompactionPlan(instant);
} catch (Exception e) {
throw new HoodieException(e.getMessage(), e);
}
}
/**
* TBD Can we make this part of HoodieActiveTimeline or a utility class.
*/
private HoodieCompactionPlan readCompactionPlanForActiveTimeline(HoodieActiveTimeline activeTimeline,
HoodieInstant instant) {
InstantGenerator instantGenerator = HoodieCLI.getTableMetaClient().getInstantGenerator();
try {
if (!HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction())) {
try {
// This could be a completed compaction. Assume a compaction request file is present but skip if fails
return activeTimeline.readCompactionPlan(
instantGenerator.getCompactionRequestedInstant(instant.requestedTime()));
} catch (HoodieIOException ioe) {
// SKIP
return null;
}
} else {
return activeTimeline.readCompactionPlan(
instantGenerator.getCompactionRequestedInstant(instant.requestedTime()));
}
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
}
protected static String printCompaction(HoodieCompactionPlan compactionPlan,
String sortByField,
boolean descending,
int limit,
boolean headerOnly,
final String partition) {
List rows = new ArrayList<>();
if ((null != compactionPlan) && (null != compactionPlan.getOperations())) {
for (HoodieCompactionOperation op : compactionPlan.getOperations()) {
if (StringUtils.isNullOrEmpty(partition) || partition.equals(op.getPartitionPath())) {
rows.add(new Comparable[] {op.getPartitionPath(), op.getFileId(), op.getBaseInstantTime(), op.getDataFilePath(),
op.getDeltaFilePaths().size(), op.getMetrics() == null ? "" : op.getMetrics().toString()});
}
}
}
Map> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_METRICS);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
private static String getTmpSerializerFile() {
return TMP_DIR + UUID.randomUUID() + ".ser";
}
private T deSerializeOperationResult(StoragePath inputPath,
HoodieStorage storage) throws Exception {
InputStream inputStream = storage.open(inputPath);
ObjectInputStream in = new ObjectInputStream(inputStream);
try {
T result = (T) in.readObject();
LOG.info("Result : " + result);
return result;
} finally {
in.close();
inputStream.close();
}
}
@ShellMethod(key = "compaction validate", value = "Validate Compaction")
public String validateCompaction(
@ShellOption(value = "--instant", help = "Compaction Instant") String compactionInstant,
@ShellOption(value = {"--parallelism"}, defaultValue = "3", help = "Parallelism") String parallelism,
@ShellOption(value = "--sparkMaster", defaultValue = "local", help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "2G", help = "executor memory") String sparkMemory,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") boolean headerOnly)
throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
StoragePath outputPath = new StoragePath(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils
.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_VALIDATE, master, sparkMemory, HoodieCLI.basePath,
compactionInstant, outputPathStr, parallelism);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to validate compaction for " + compactionInstant;
}
List res = deSerializeOperationResult(outputPath, HoodieCLI.storage);
boolean valid = res.stream().map(OperationResult::isSuccess).reduce(Boolean::logicalAnd).orElse(true);
String message = "\n\n\t COMPACTION PLAN " + (valid ? "VALID" : "INVALID") + "\n\n";
List rows = new ArrayList<>();
res.forEach(r -> {
Comparable[] row = new Comparable[] {r.getOperation().getFileId(), r.getOperation().getBaseInstantTime(),
r.getOperation().getDataFileName().isPresent() ? r.getOperation().getDataFileName().get() : "",
r.getOperation().getDeltaFileNames().size(), r.isSuccess(),
r.getException().isPresent() ? r.getException().get().getMessage() : ""};
rows.add(row);
});
Map> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_DATA_FILE)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_VALID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit,
headerOnly, rows);
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.storage.exists(outputPath)) {
HoodieCLI.storage.deleteFile(outputPath);
}
}
return output;
}
@ShellMethod(key = "compaction unschedule", value = "Unschedule Compaction")
public String unscheduleCompaction(
@ShellOption(value = "--instant", help = "Compaction Instant") String compactionInstant,
@ShellOption(value = {"--parallelism"}, defaultValue = "3", help = "Parallelism") String parallelism,
@ShellOption(value = "--sparkMaster", defaultValue = "local", help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "2G", help = "executor memory") String sparkMemory,
@ShellOption(value = {"--skipValidation"}, help = "skip validation", defaultValue = "false") boolean skipV,
@ShellOption(value = {"--dryRun"}, help = "Dry Run Mode", defaultValue = "false") boolean dryRun,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") boolean headerOnly)
throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
StoragePath outputPath = new StoragePath(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils
.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_UNSCHEDULE_PLAN, master, sparkMemory, HoodieCLI.basePath,
compactionInstant, outputPathStr, parallelism, Boolean.valueOf(skipV).toString(),
Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for " + compactionInstant;
}
List res = deSerializeOperationResult(outputPath, HoodieCLI.storage);
output =
getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule pending compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.storage.exists(outputPath)) {
HoodieCLI.storage.deleteFile(outputPath);
}
}
return output;
}
@ShellMethod(key = "compaction unscheduleFileId", value = "UnSchedule Compaction for a fileId")
public String unscheduleCompactFile(
@ShellOption(value = "--fileId", help = "File Id") final String fileId,
@ShellOption(value = "--partitionPath", defaultValue = "", help = "partition path") final String partitionPath,
@ShellOption(value = "--sparkMaster", defaultValue = "local", help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "2G", help = "executor memory") String sparkMemory,
@ShellOption(value = {"--skipValidation"}, help = "skip validation", defaultValue = "false") boolean skipV,
@ShellOption(value = {"--dryRun"}, help = "Dry Run Mode", defaultValue = "false") boolean dryRun,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Header Only", defaultValue = "false") boolean headerOnly)
throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
StoragePath outputPath = new StoragePath(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils
.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_UNSCHEDULE_FILE, master, sparkMemory, HoodieCLI.basePath,
fileId, partitionPath, outputPathStr, "1", Boolean.valueOf(skipV).toString(),
Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for file " + fileId;
}
List res = deSerializeOperationResult(outputPath, HoodieCLI.storage);
output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly,
"unschedule file from pending compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.storage.exists(outputPath)) {
HoodieCLI.storage.deleteFile(outputPath);
}
}
return output;
}
@ShellMethod(key = "compaction repair", value = "Renames the files to make them consistent with the timeline as "
+ "dictated by Hoodie metadata. Use when compaction unschedule fails partially.")
public String repairCompaction(
@ShellOption(value = "--instant", help = "Compaction Instant") String compactionInstant,
@ShellOption(value = {"--parallelism"}, defaultValue = "3", help = "Parallelism") String parallelism,
@ShellOption(value = "--sparkMaster", defaultValue = "local", help = "Spark Master") String master,
@ShellOption(value = "--sparkMemory", defaultValue = "2G", help = "executor memory") String sparkMemory,
@ShellOption(value = {"--dryRun"}, help = "Dry Run Mode", defaultValue = "false") boolean dryRun,
@ShellOption(value = {"--limit"}, help = "Limit commits", defaultValue = "-1") Integer limit,
@ShellOption(value = {"--sortBy"}, help = "Sorting Field", defaultValue = "") String sortByField,
@ShellOption(value = {"--desc"}, help = "Ordering", defaultValue = "false") boolean descending,
@ShellOption(value = {"--headeronly"}, help = "Print Header Only",
defaultValue = "false") boolean headerOnly)
throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
StoragePath outputPath = new StoragePath(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils
.getDefaultPropertiesFile(convertJavaPropertiesToScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
SparkMain.addAppArgs(sparkLauncher, SparkCommand.COMPACT_REPAIR, master, sparkMemory, HoodieCLI.basePath,
compactionInstant, outputPathStr, parallelism, Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for " + compactionInstant;
}
List res = deSerializeOperationResult(outputPath, HoodieCLI.storage);
output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "repair compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.storage.exists(outputPath)) {
HoodieCLI.storage.deleteFile(outputPath);
}
}
return output;
}
private String getRenamesToBePrinted(List res, Integer limit, String sortByField, boolean descending,
boolean headerOnly, String operation) {
Option result =
Option.fromJavaOptional(res.stream().map(r -> r.isExecuted() && r.isSuccess()).reduce(Boolean::logicalAnd));
if (result.isPresent()) {
System.out.println("There were some file renames that needed to be done to " + operation);
if (result.get()) {
System.out.println("All renames successfully completed to " + operation + " done !!");
} else {
System.out.println("Some renames failed. table could be in inconsistent-state. Try running compaction repair");
}
List rows = new ArrayList<>();
res.forEach(r -> {
Comparable[] row =
new Comparable[] {r.getOperation().fileId, r.getOperation().srcPath, r.getOperation().destPath,
r.isExecuted(), r.isSuccess(), r.getException().isPresent() ? r.getException().get().getMessage() : ""};
rows.add(row);
});
Map> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_SOURCE_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DESTINATION_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_EXECUTED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_SUCCEEDED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
} else {
return "No File renames needed to " + operation + ". Operation successful.";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy