All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.util.CompactionUtil Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.util;

import org.apache.hudi.client.HoodieFlinkWriteClient;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.sink.compact.FlinkCompactionConfig;
import org.apache.hudi.table.HoodieFlinkTable;

import org.apache.avro.Schema;
import org.apache.flink.configuration.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Locale;

/**
 * Utilities for flink hudi compaction.
 */
public class CompactionUtil {

  private static final Logger LOG = LoggerFactory.getLogger(CompactionUtil.class);

  /**
   * Schedules a new compaction instant.
   *
   * @param writeClient         The write client
   * @param deltaTimeCompaction Whether the compaction is trigger by elapsed delta time
   * @param committed           Whether the last instant was committed successfully
   */
  public static void scheduleCompaction(
      HoodieFlinkWriteClient writeClient,
      boolean deltaTimeCompaction,
      boolean committed) {
    if (committed) {
      writeClient.scheduleCompaction(Option.empty());
    } else if (deltaTimeCompaction) {
      // if there are no new commits and the compaction trigger strategy is based on elapsed delta time,
      // schedules the compaction anyway.
      writeClient.scheduleCompaction(Option.empty());
    }
  }

  /**
   * Sets up the avro schema string into the give configuration {@code conf}
   * through reading from the hoodie table metadata.
   *
   * @param conf The configuration
   */
  public static void setAvroSchema(Configuration conf, HoodieTableMetaClient metaClient) throws Exception {
    TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
    Schema tableAvroSchema = tableSchemaResolver.getTableAvroSchema(false);
    conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, tableAvroSchema.toString());
  }

  /**
   * Sets up the avro schema string into the HoodieWriteConfig {@code HoodieWriteConfig}
   * through reading from the hoodie table metadata.
   *
   * @param writeConfig The HoodieWriteConfig
   */
  public static void setAvroSchema(HoodieWriteConfig writeConfig, HoodieTableMetaClient metaClient) throws Exception {
    TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
    Schema tableAvroSchema = tableSchemaResolver.getTableAvroSchema(false);
    writeConfig.setSchema(tableAvroSchema.toString());
  }

  /**
   * Sets up the preCombine field into the given configuration {@code conf}
   * through reading from the hoodie table metadata.
   * 

* This value is non-null as compaction can only be performed on MOR tables. * Of which, MOR tables will have non-null precombine fields. * * @param conf The configuration */ public static void setPreCombineField(Configuration conf, HoodieTableMetaClient metaClient) { String preCombineField = metaClient.getTableConfig().getPreCombineField(); if (preCombineField != null) { conf.setString(FlinkOptions.PRECOMBINE_FIELD, preCombineField); } } /** * Infers the changelog mode based on the data file schema(including metadata fields). * *

We can improve the code if the changelog mode is set up as table config. * * @param conf The configuration * @param metaClient The meta client */ public static void inferChangelogMode(Configuration conf, HoodieTableMetaClient metaClient) throws Exception { TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient); Schema tableAvroSchema = tableSchemaResolver.getTableAvroSchemaFromDataFile(); if (tableAvroSchema.getField(HoodieRecord.OPERATION_METADATA_FIELD) != null) { conf.setBoolean(FlinkOptions.CHANGELOG_ENABLED, true); } } /** * Infers the metadata config based on the existence of metadata folder. * *

We can improve the code if the metadata config is set up as table config. * * @param conf The configuration * @param metaClient The meta client */ public static void inferMetadataConf(Configuration conf, HoodieTableMetaClient metaClient) { String path = HoodieTableMetadata.getMetadataTableBasePath(conf.getString(FlinkOptions.PATH)); if (!StreamerUtil.tableExists(path, (org.apache.hadoop.conf.Configuration) metaClient.getStorageConf().unwrap())) { conf.setBoolean(FlinkOptions.METADATA_ENABLED, false); } } public static void rollbackCompaction(HoodieFlinkTable table, String instantTime) { HoodieInstant inflightInstant = table.getInstantGenerator().getCompactionInflightInstant(instantTime); if (table.getMetaClient().reloadActiveTimeline().filterPendingCompactionTimeline().containsInstant(inflightInstant)) { LOG.warn("Rollback failed compaction instant: [" + instantTime + "]"); table.rollbackInflightCompaction(inflightInstant); } } /** * Force rolls back all the inflight compaction instants, especially for job failover restart. * * @param table The hoodie table */ public static void rollbackCompaction(HoodieFlinkTable table) { HoodieTimeline inflightCompactionTimeline = table.getActiveTimeline() .filterPendingCompactionTimeline() .filter(instant -> instant.getState() == HoodieInstant.State.INFLIGHT); inflightCompactionTimeline.getInstants().forEach(inflightInstant -> { LOG.info("Rollback the inflight compaction instant: " + inflightInstant + " for failover"); table.rollbackInflightCompaction(inflightInstant); table.getMetaClient().reloadActiveTimeline(); }); } /** * Rolls back the earliest compaction if there exists. * *

Makes the strategy not that radical: firstly check whether there exists inflight compaction instants, * rolls back the first inflight instant only if it has timed out. That means, if there are * multiple timed out instants on the timeline, we only roll back the first one at a time. */ public static void rollbackEarliestCompaction(HoodieFlinkTable table, Configuration conf) { Option earliestInflight = table.getActiveTimeline() .filterPendingCompactionTimeline() .filter(instant -> instant.getState() == HoodieInstant.State.INFLIGHT).firstInstant(); if (earliestInflight.isPresent()) { HoodieInstant instant = earliestInflight.get(); String currentTime = table.getMetaClient().createNewInstantTime(); int timeout = conf.getInteger(FlinkOptions.COMPACTION_TIMEOUT_SECONDS); if (StreamerUtil.instantTimeDiffSeconds(currentTime, instant.requestedTime()) >= timeout) { LOG.info("Rollback the inflight compaction instant: " + instant + " for timeout(" + timeout + "s)"); table.rollbackInflightCompaction(instant); table.getMetaClient().reloadActiveTimeline(); } } } /** * Returns whether the execution sequence is LIFO. */ public static boolean isLIFO(String seq) { return seq.toUpperCase(Locale.ROOT).equals(FlinkCompactionConfig.SEQ_LIFO); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy