All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.model.CompactionOperation Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.model;

import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;

import org.apache.hadoop.fs.Path;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
 * Encapsulates all the needed information about a compaction and make a decision whether this compaction is effective
 * or not.
 */
public class CompactionOperation implements Serializable {

  private String baseInstantTime;
  private Option dataFileCommitTime;
  private List deltaFileNames;
  private Option dataFileName;
  private HoodieFileGroupId id;
  private Map metrics;

  // Only for serialization/de-serialization
  @Deprecated
  public CompactionOperation() {}

  public CompactionOperation(String fileId, String partitionPath, String baseInstantTime,
      Option dataFileCommitTime, List deltaFileNames, Option dataFileName,
      Map metrics) {
    this.baseInstantTime = baseInstantTime;
    this.dataFileCommitTime = dataFileCommitTime;
    this.deltaFileNames = deltaFileNames;
    this.dataFileName = dataFileName;
    this.id = new HoodieFileGroupId(partitionPath, fileId);
    this.metrics = metrics;
  }

  public CompactionOperation(Option dataFile, String partitionPath, List logFiles,
      Map metrics) {
    if (dataFile.isPresent()) {
      this.baseInstantTime = dataFile.get().getCommitTime();
      this.dataFileName = Option.of(dataFile.get().getFileName());
      this.id = new HoodieFileGroupId(partitionPath, dataFile.get().getFileId());
      this.dataFileCommitTime = Option.of(dataFile.get().getCommitTime());
    } else {
      assert logFiles.size() > 0;
      this.dataFileName = Option.empty();
      this.baseInstantTime = FSUtils.getBaseCommitTimeFromLogPath(logFiles.get(0).getPath());
      this.id = new HoodieFileGroupId(partitionPath, FSUtils.getFileIdFromLogPath(logFiles.get(0).getPath()));
      this.dataFileCommitTime = Option.empty();
    }

    this.deltaFileNames = logFiles.stream().map(s -> s.getPath().getName()).collect(Collectors.toList());
    this.metrics = metrics;
  }

  public String getBaseInstantTime() {
    return baseInstantTime;
  }

  public Option getDataFileCommitTime() {
    return dataFileCommitTime;
  }

  public List getDeltaFileNames() {
    return deltaFileNames;
  }

  public Option getDataFileName() {
    return dataFileName;
  }

  public String getFileId() {
    return id.getFileId();
  }

  public String getPartitionPath() {
    return id.getPartitionPath();
  }

  public Map getMetrics() {
    return metrics;
  }

  public HoodieFileGroupId getFileGroupId() {
    return id;
  }

  public Option getBaseFile(String basePath, String partitionPath) {
    Path dirPath = FSUtils.getPartitionPath(basePath, partitionPath);
    return dataFileName.map(df -> new HoodieBaseFile(new Path(dirPath, df).toString()));
  }

  /**
   * Convert Avro generated Compaction operation to POJO for Spark RDD operation.
   * 
   * @param operation Hoodie Compaction Operation
   * @return
   */
  public static CompactionOperation convertFromAvroRecordInstance(HoodieCompactionOperation operation) {
    CompactionOperation op = new CompactionOperation();
    op.baseInstantTime = operation.getBaseInstantTime();
    op.dataFileName = Option.ofNullable(operation.getDataFilePath());
    op.dataFileCommitTime = op.dataFileName.map(p -> FSUtils.getCommitTime(new Path(p).getName()));
    op.deltaFileNames = new ArrayList<>(operation.getDeltaFilePaths());
    op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
    op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
    return op;
  }

  @Override
  public String toString() {
    return "CompactionOperation{baseInstantTime='" + baseInstantTime + '\'' + ", dataFileCommitTime="
        + dataFileCommitTime + ", deltaFileNames=" + deltaFileNames + ", dataFileName=" + dataFileName + ", id='" + id
        + '\'' + ", metrics=" + metrics + '}';
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }
    CompactionOperation operation = (CompactionOperation) o;
    return Objects.equals(baseInstantTime, operation.baseInstantTime)
        && Objects.equals(dataFileCommitTime, operation.dataFileCommitTime)
        && Objects.equals(deltaFileNames, operation.deltaFileNames)
        && Objects.equals(dataFileName, operation.dataFileName) && Objects.equals(id, operation.id);
  }

  @Override
  public int hashCode() {
    return Objects.hash(baseInstantTime, id);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy