All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.util.WriteStatMerger Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.util;

import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieWriteStat;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Helper clazz to merge hoodie write stats that belong to one file path.
 *
 * 

CAUTION: The merge can be buggy, we need to maintain the new variables for the write stat. */ public class WriteStatMerger { public static HoodieWriteStat merge(HoodieWriteStat stat1, HoodieWriteStat stat2) { if (stat1 instanceof HoodieDeltaWriteStat) { return mergeDeltaWriteStat((HoodieDeltaWriteStat) stat1, (HoodieDeltaWriteStat) stat2); } return mergeWriteStat(new HoodieWriteStat(), stat1, stat2); } private static HoodieDeltaWriteStat mergeDeltaWriteStat( HoodieDeltaWriteStat stat1, HoodieDeltaWriteStat stat2) { HoodieDeltaWriteStat merged = new HoodieDeltaWriteStat(); mergeWriteStat(merged, stat1, stat2); merged.setLogVersion(stat2.getLogVersion()); merged.setLogOffset(maxLong(stat1.getLogOffset(), stat2.getLogOffset())); merged.setBaseFile(stat2.getBaseFile()); // log files List mergedLogFiles = new ArrayList<>(stat1.getLogFiles()); for (String logFile : stat2.getLogFiles()) { if (!mergedLogFiles.contains(logFile)) { mergedLogFiles.add(logFile); } } merged.setLogFiles(mergedLogFiles); // column stats if (stat1.getColumnStats().isPresent()) { merged.putRecordsStats(stat1.getColumnStats().get()); } if (stat2.getColumnStats().isPresent()) { merged.putRecordsStats(stat2.getColumnStats().get()); } return merged; } private static HoodieWriteStat mergeWriteStat(HoodieWriteStat merged, HoodieWriteStat stat1, HoodieWriteStat stat2) { merged.setFileId(stat2.getFileId()); merged.setPath(stat2.getPath()); // merge cdc stats merged.setCdcStats(getMergedCdcStats(stat1.getCdcStats(), stat2.getCdcStats())); // prev commit merged.setPrevCommit(stat2.getPrevCommit()); merged.setNumWrites(stat2.getNumWrites() + stat1.getNumWrites()); merged.setNumDeletes(stat2.getNumDeletes() + stat1.getNumDeletes()); merged.setNumUpdateWrites(stat2.getNumUpdateWrites() + stat1.getNumUpdateWrites()); merged.setNumInserts(stat2.getNumInserts() + stat1.getNumInserts()); merged.setTotalWriteBytes(stat2.getTotalWriteBytes() + stat1.getTotalWriteBytes()); merged.setTotalWriteErrors(stat2.getTotalWriteErrors() + stat1.getTotalWriteErrors()); // ------------------------------------------------------------------------- // Nullable // ------------------------------------------------------------------------- // tmp path merged.setTempPath(stat2.getTempPath()); // partition path merged.setPartitionPath(stat2.getPartitionPath()); // runtime stats merged.setRuntimeStats(getMergedRuntimeStats(stat1.getRuntimeStats(), stat2.getRuntimeStats())); // log statistics merged.setTotalLogRecords(stat2.getTotalLogRecords() + stat1.getTotalLogRecords()); merged.setTotalLogFilesCompacted(stat2.getTotalLogFilesCompacted() + stat1.getTotalLogFilesCompacted()); merged.setTotalLogSizeCompacted(stat2.getTotalLogSizeCompacted() + stat1.getTotalLogSizeCompacted()); merged.setTotalUpdatedRecordsCompacted(stat2.getTotalUpdatedRecordsCompacted() + stat1.getTotalUpdatedRecordsCompacted()); merged.setTotalLogBlocks(stat2.getTotalLogBlocks() + stat1.getTotalLogBlocks()); merged.setTotalCorruptLogBlock(stat2.getTotalCorruptLogBlock() + stat1.getTotalCorruptLogBlock()); merged.setTotalRollbackBlocks(stat2.getTotalRollbackBlocks() + stat1.getTotalRollbackBlocks()); merged.setFileSizeInBytes(stat2.getFileSizeInBytes() + stat1.getFileSizeInBytes()); // event time merged.setMinEventTime(minLong(stat1.getMinEventTime(), stat2.getMinEventTime())); merged.setMaxEventTime(maxLong(stat1.getMaxEventTime(), stat2.getMaxEventTime())); return stat2; } private static HoodieWriteStat.RuntimeStats getMergedRuntimeStats( HoodieWriteStat.RuntimeStats runtimeStats1, HoodieWriteStat.RuntimeStats runtimeStats2) { final HoodieWriteStat.RuntimeStats runtimeStats; if (runtimeStats1 != null && runtimeStats2 != null) { runtimeStats = new HoodieWriteStat.RuntimeStats(); runtimeStats.setTotalScanTime(runtimeStats1.getTotalScanTime() + runtimeStats2.getTotalScanTime()); runtimeStats.setTotalUpsertTime(runtimeStats1.getTotalUpsertTime() + runtimeStats2.getTotalUpsertTime()); runtimeStats.setTotalCreateTime(runtimeStats1.getTotalCreateTime() + runtimeStats2.getTotalCreateTime()); } else if (runtimeStats1 == null) { runtimeStats = runtimeStats2; } else { runtimeStats = runtimeStats1; } return runtimeStats; } private static Map getMergedCdcStats(Map cdcStats1, Map cdcStats2) { final Map cdcStats; if (cdcStats1 != null && cdcStats2 != null) { cdcStats = new HashMap<>(); cdcStats.putAll(cdcStats1); cdcStats.putAll(cdcStats2); } else if (cdcStats1 == null) { cdcStats = cdcStats2; } else { cdcStats = cdcStats1; } return cdcStats; } private static Long minLong(Long v1, Long v2) { if (v1 == null) { return v2; } if (v2 == null) { return v1; } return v1.compareTo(v2) < 0 ? v1 : v2; } private static Long maxLong(Long v1, Long v2) { if (v1 == null) { return v2; } if (v2 == null) { return v1; } return v1.compareTo(v2) > 0 ? v1 : v2; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy