org.apache.hudi.util.ClusteringUtil Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.util;
import org.apache.hudi.client.HoodieFlinkWriteClient;
import org.apache.hudi.client.clustering.plan.strategy.FlinkConsistentBucketClusteringPlanStrategy;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.ClusteringUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.configuration.OptionsResolver;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieFlinkTable;
import org.apache.flink.configuration.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.stream.Collectors;
/**
* Utilities for flink hudi clustering.
*/
public class ClusteringUtil {
private static final Logger LOG = LoggerFactory.getLogger(ClusteringUtil.class);
public static void validateClusteringScheduling(Configuration conf) {
if (!OptionsResolver.isAppendMode(conf) && OptionsResolver.isBucketIndexType(conf)) {
HoodieIndex.BucketIndexEngineType bucketIndexEngineType = OptionsResolver.getBucketEngineType(conf);
switch (bucketIndexEngineType) {
case SIMPLE:
throw new HoodieNotSupportedException("Clustering is not supported for simple bucket index.");
case CONSISTENT_HASHING:
String clusteringPlanStrategyClass = conf.getString(FlinkOptions.CLUSTERING_PLAN_STRATEGY_CLASS, OptionsResolver.getDefaultPlanStrategyClassName(conf));
if (!clusteringPlanStrategyClass.equalsIgnoreCase(FlinkConsistentBucketClusteringPlanStrategy.class.getName())) {
throw new HoodieNotSupportedException(
"CLUSTERING_PLAN_STRATEGY_CLASS should be set to " + FlinkConsistentBucketClusteringPlanStrategy.class.getName() + " in order to work with Consistent Hashing Bucket Index.");
}
break;
default:
throw new HoodieNotSupportedException("Unknown bucket index engine type: " + bucketIndexEngineType);
}
}
}
/**
* Schedules clustering plan by condition.
*
* @param conf The configuration
* @param writeClient The write client
* @param committed Whether the instant was committed
*/
public static void scheduleClustering(Configuration conf, HoodieFlinkWriteClient writeClient, boolean committed) {
validateClusteringScheduling(conf);
if (committed) {
writeClient.scheduleClustering(Option.empty());
}
}
/**
* Force rolls back all the inflight clustering instants, especially for job failover restart.
*
* @param table The hoodie table
* @param writeClient The write client
*/
public static void rollbackClustering(HoodieFlinkTable> table, HoodieFlinkWriteClient writeClient) {
List inflightInstants = ClusteringUtils.getPendingClusteringInstantTimes(table.getMetaClient())
.stream()
.filter(instant -> instant.getState() == HoodieInstant.State.INFLIGHT)
.collect(Collectors.toList());
inflightInstants.forEach(inflightInstant -> {
LOG.info("Rollback the inflight clustering instant: " + inflightInstant + " for failover");
table.rollbackInflightClustering(inflightInstant,
commitToRollback -> writeClient.getTableServiceClient().getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
table.getMetaClient().reloadActiveTimeline();
});
}
/**
* Force rolls back the inflight clustering instant, for handling failure case.
*
* @param table The hoodie table
* @param writeClient The write client
* @param instantTime The instant time
*/
public static void rollbackClustering(HoodieFlinkTable> table, HoodieFlinkWriteClient> writeClient, String instantTime) {
HoodieActiveTimeline activeTimeline = table.getMetaClient().reloadActiveTimeline();
Option inflightInstantOpt = ClusteringUtils.getInflightClusteringInstant(instantTime, activeTimeline, table.getInstantGenerator());
if (inflightInstantOpt.isPresent() && ClusteringUtils.isClusteringInstant(activeTimeline, inflightInstantOpt.get(), table.getInstantGenerator())) {
LOG.warn("Rollback failed clustering instant: [" + instantTime + "]");
table.rollbackInflightClustering(inflightInstantOpt.get(),
commitToRollback -> writeClient.getTableServiceClient().getPendingRollbackInfo(table.getMetaClient(), commitToRollback, false));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy