All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.io.compact.strategy.LogFileSizeBasedCompactionStrategy Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 *  Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.io.compact.strategy;

import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.io.compact.CompactionOperation;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

/**
 * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and
 * limits the compactions within a configured IO bound
 *
 * @see BoundedIOCompactionStrategy
 * @see CompactionStrategy
 */
public class LogFileSizeBasedCompactionStrategy extends BoundedIOCompactionStrategy implements
    Comparator {

  private static final String TOTAL_LOG_FILE_SIZE = "TOTAL_LOG_FILE_SIZE";

  @Override
  public Map captureMetrics(HoodieDataFile dataFile, String partitionPath,
      List logFiles) {

    Map metrics = super.captureMetrics(dataFile, partitionPath, logFiles);
    // Total size of all the log files
    Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize)
        .filter(Optional::isPresent).map(Optional::get).reduce((size1, size2) -> size1 + size2)
        .orElse(0L);
    // save the metrics needed during the order
    metrics.put(TOTAL_LOG_FILE_SIZE, totalLogFileSize);
    return metrics;
  }

  @Override
  public List orderAndFilter(HoodieWriteConfig writeConfig,
      List operations) {
    // Order the operations based on the reverse size of the logs and limit them by the IO
    return super
        .orderAndFilter(writeConfig, operations.stream().sorted(this).collect(Collectors.toList()));
  }

  @Override
  public int compare(CompactionOperation op1, CompactionOperation op2) {
    Long totalLogSize1 = (Long) op1.getMetrics().get(TOTAL_LOG_FILE_SIZE);
    Long totalLogSize2 = (Long) op2.getMetrics().get(TOTAL_LOG_FILE_SIZE);
    // Reverse the comparison order - so that larger log file size is compacted first
    return totalLogSize2.compareTo(totalLogSize1);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy