All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.common.util.CompactionUtils Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 *  Copyright (c) 2018 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.common.util;

import com.uber.hoodie.avro.model.HoodieCompactionOperation;
import com.uber.hoodie.avro.model.HoodieCompactionPlan;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieFileGroupId;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.util.collection.Pair;
import com.uber.hoodie.exception.HoodieException;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/**
 * Helper class to generate compaction plan from FileGroup/FileSlice abstraction
 */
public class CompactionUtils {

  private static final Logger LOG = LogManager.getLogger(CompactionUtils.class);

  /**
   * Generate compaction operation from file-slice
   *
   * @param partitionPath          Partition path
   * @param fileSlice              File Slice
   * @param metricsCaptureFunction Metrics Capture function
   * @return Compaction Operation
   */
  public static HoodieCompactionOperation buildFromFileSlice(String partitionPath, FileSlice fileSlice,
      Optional, Map>> metricsCaptureFunction) {
    HoodieCompactionOperation.Builder builder = HoodieCompactionOperation.newBuilder();
    builder.setPartitionPath(partitionPath);
    builder.setFileId(fileSlice.getFileId());
    builder.setBaseInstantTime(fileSlice.getBaseInstantTime());
    builder.setDeltaFilePaths(fileSlice.getLogFiles().map(lf -> lf.getPath().toString()).collect(Collectors.toList()));
    if (fileSlice.getDataFile().isPresent()) {
      builder.setDataFilePath(fileSlice.getDataFile().get().getPath());
    }

    if (metricsCaptureFunction.isPresent()) {
      builder.setMetrics(metricsCaptureFunction.get().apply(Pair.of(partitionPath, fileSlice)));
    }
    return builder.build();
  }

  /**
   * Generate compaction plan from file-slices
   *
   * @param partitionFileSlicePairs list of partition file-slice pairs
   * @param extraMetadata           Extra Metadata
   * @param metricsCaptureFunction  Metrics Capture function
   */
  public static HoodieCompactionPlan buildFromFileSlices(
      List> partitionFileSlicePairs,
      Optional> extraMetadata,
      Optional, Map>> metricsCaptureFunction) {
    HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder();
    extraMetadata.ifPresent(m -> builder.setExtraMetadata(m));
    builder.setOperations(partitionFileSlicePairs.stream().map(pfPair ->
        buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction)).collect(Collectors.toList()));
    return builder.build();
  }

  /**
   * Build Avro generated Compaction operation payload from compaction operation POJO for serialization
   */
  public static HoodieCompactionOperation buildHoodieCompactionOperation(CompactionOperation op) {
    return HoodieCompactionOperation.newBuilder().setFileId(op.getFileId())
        .setBaseInstantTime(op.getBaseInstantTime())
        .setPartitionPath(op.getPartitionPath())
        .setDataFilePath(op.getDataFilePath().isPresent() ? op.getDataFilePath().get() : null)
        .setDeltaFilePaths(op.getDeltaFilePaths())
        .setMetrics(op.getMetrics()).build();
  }

  /**
   * Build Compaction operation payload from Avro version for using in Spark executors
   *
   * @param hc HoodieCompactionOperation
   */
  public static CompactionOperation buildCompactionOperation(HoodieCompactionOperation hc) {
    return CompactionOperation.convertFromAvroRecordInstance(hc);
  }

  /**
   * Get all pending compaction plans along with their instants
   *
   * @param metaClient Hoodie Meta Client
   */
  public static List> getAllPendingCompactionPlans(
      HoodieTableMetaClient metaClient) {
    List pendingCompactionInstants =
        metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList());
    return pendingCompactionInstants.stream().map(instant -> {
      try {
        return Pair.of(instant, getCompactionPlan(metaClient, instant.getTimestamp()));
      } catch (IOException e) {
        throw new HoodieException(e);
      }
    }).collect(Collectors.toList());
  }

  public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient,
      String compactionInstant) throws IOException {
    HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
        metaClient.getActiveTimeline().getInstantAuxiliaryDetails(
            HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
    return compactionPlan;
  }

  /**
   * Get all PartitionPath + file-ids with pending Compaction operations and their target compaction instant time
   *
   * @param metaClient Hoodie Table Meta Client
   */
  public static Map> getAllPendingCompactionOperations(
      HoodieTableMetaClient metaClient) {
    List> pendingCompactionPlanWithInstants =
        getAllPendingCompactionPlans(metaClient);

    Map> fgIdToPendingCompactionWithInstantMap =
        new HashMap<>();
    pendingCompactionPlanWithInstants.stream().flatMap(instantPlanPair -> {
      HoodieInstant instant = instantPlanPair.getKey();
      HoodieCompactionPlan compactionPlan = instantPlanPair.getValue();
      List ops = compactionPlan.getOperations();
      if (null != ops) {
        return ops.stream().map(op -> {
          return Pair.of(new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()),
              Pair.of(instant.getTimestamp(), op));
        });
      } else {
        return Stream.empty();
      }
    }).forEach(pair -> {
      // Defensive check to ensure a single-fileId does not have more than one pending compaction
      if (fgIdToPendingCompactionWithInstantMap.containsKey(pair.getKey())) {
        String msg = "Hoodie File Id (" + pair.getKey() + ") has more thant 1 pending compactions. Instants: "
            + pair.getValue() + ", " + fgIdToPendingCompactionWithInstantMap.get(pair.getKey());
        throw new IllegalStateException(msg);
      }
      fgIdToPendingCompactionWithInstantMap.put(pair.getKey(), pair.getValue());
    });
    return fgIdToPendingCompactionWithInstantMap;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy