All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.common.model.CompactionOperation Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 *  Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.common.model;

import com.google.common.base.Optional;
import com.uber.hoodie.avro.model.HoodieCompactionOperation;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * Encapsulates all the needed information about a compaction and make a decision whether this
 * compaction is effective or not
 *
 */
public class CompactionOperation implements Serializable {

  private String baseInstantTime;
  // Using Guava Optional as it is serializable
  private Optional dataFileCommitTime;
  private List deltaFilePaths;
  private Optional dataFilePath;
  private HoodieFileGroupId id;
  private Map metrics;

  //Only for serialization/de-serialization
  @Deprecated
  public CompactionOperation() {
  }

  public CompactionOperation(java.util.Optional dataFile, String partitionPath,
      List logFiles, Map metrics) {
    if (dataFile.isPresent()) {
      this.baseInstantTime = dataFile.get().getCommitTime();
      this.dataFilePath = Optional.of(dataFile.get().getPath());
      this.id = new HoodieFileGroupId(partitionPath, dataFile.get().getFileId());
      this.dataFileCommitTime = Optional.of(dataFile.get().getCommitTime());
    } else {
      assert logFiles.size() > 0;
      this.dataFilePath = Optional.absent();
      this.baseInstantTime = FSUtils.getBaseCommitTimeFromLogPath(logFiles.get(0).getPath());
      this.id = new HoodieFileGroupId(partitionPath, FSUtils.getFileIdFromLogPath(logFiles.get(0).getPath()));
      this.dataFileCommitTime = Optional.absent();
    }

    this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString())
        .collect(Collectors.toList());
    this.metrics = metrics;
  }

  public String getBaseInstantTime() {
    return baseInstantTime;
  }

  public Optional getDataFileCommitTime() {
    return dataFileCommitTime;
  }

  public List getDeltaFilePaths() {
    return deltaFilePaths;
  }

  public Optional getDataFilePath() {
    return dataFilePath;
  }

  public String getFileId() {
    return id.getFileId();
  }

  public String getPartitionPath() {
    return id.getPartitionPath();
  }

  public Map getMetrics() {
    return metrics;
  }

  public HoodieFileGroupId getFileGroupId() {
    return id;
  }

  /**
   * Convert Avro generated Compaction operation to POJO for Spark RDD operation
   * @param operation Hoodie Compaction Operation
   * @return
   */
  public static CompactionOperation convertFromAvroRecordInstance(HoodieCompactionOperation operation) {
    CompactionOperation op = new CompactionOperation();
    op.baseInstantTime = operation.getBaseInstantTime();
    op.dataFilePath = Optional.fromNullable(operation.getDataFilePath());
    op.deltaFilePaths = new ArrayList<>(operation.getDeltaFilePaths());
    op.id = new HoodieFileGroupId(operation.getPartitionPath(), operation.getFileId());
    op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
    return op;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy