org.apache.hudi.common.model.HoodieCommitMetadata Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.model;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.JsonUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeCommitMetadata;
/**
* All the metadata that gets stored along with a commit.
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieCommitMetadata implements Serializable {
public static final String SCHEMA_KEY = "schema";
private static final Logger LOG = LoggerFactory.getLogger(HoodieCommitMetadata.class);
protected Map> partitionToWriteStats;
protected Boolean compacted;
protected Map extraMetadata;
protected WriteOperationType operationType = WriteOperationType.UNKNOWN;
// for ser/deser
public HoodieCommitMetadata() {
this(false);
}
public HoodieCommitMetadata(boolean compacted) {
extraMetadata = new HashMap<>();
partitionToWriteStats = new HashMap<>();
this.compacted = compacted;
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
if (!partitionToWriteStats.containsKey(partitionPath)) {
partitionToWriteStats.put(partitionPath, new ArrayList<>());
}
partitionToWriteStats.get(partitionPath).add(stat);
}
public void addMetadata(String metaKey, String value) {
extraMetadata.put(metaKey, value);
}
public List getWriteStats(String partitionPath) {
return partitionToWriteStats.get(partitionPath);
}
public Map getExtraMetadata() {
return extraMetadata;
}
public Map> getPartitionToWriteStats() {
return partitionToWriteStats;
}
public List getWriteStats() {
return partitionToWriteStats.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
}
public String getMetadata(String metaKey) {
return extraMetadata.get(metaKey);
}
public Boolean getCompacted() {
return compacted;
}
public void setCompacted(Boolean compacted) {
this.compacted = compacted;
}
public HashMap getFileIdAndRelativePaths() {
HashMap filePaths = new HashMap<>();
// list all partitions paths
for (List stats : getPartitionToWriteStats().values()) {
for (HoodieWriteStat stat : stats) {
filePaths.put(stat.getFileId(), stat.getPath());
}
}
return filePaths;
}
public void setOperationType(WriteOperationType type) {
this.operationType = type;
}
public WriteOperationType getOperationType() {
return this.operationType;
}
public HashMap getFileIdAndFullPaths(StoragePath basePath) {
HashMap fullPaths = new HashMap<>();
for (Map.Entry entry : getFileIdAndRelativePaths().entrySet()) {
String fullPath = entry.getValue() != null
? FSUtils.constructAbsolutePath(basePath, entry.getValue()).toString()
: null;
fullPaths.put(entry.getKey(), fullPath);
}
return fullPaths;
}
public List getFullPathsByPartitionPath(String basePath, String partitionPath) {
HashSet fullPaths = new HashSet<>();
if (getPartitionToWriteStats().get(partitionPath) != null) {
for (HoodieWriteStat stat : getPartitionToWriteStats().get(partitionPath)) {
if ((stat.getFileId() != null)) {
String fullPath = FSUtils.constructAbsolutePath(basePath, stat.getPath()).toString();
fullPaths.add(fullPath);
}
}
}
return new ArrayList<>(fullPaths);
}
public Map getFileGroupIdAndFullPaths(String basePath) {
Map fileGroupIdToFullPaths = new HashMap<>();
for (Map.Entry> entry : getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat : entry.getValue()) {
HoodieFileGroupId fileGroupId = new HoodieFileGroupId(stat.getPartitionPath(), stat.getFileId());
StoragePath fullPath = new StoragePath(basePath, stat.getPath());
fileGroupIdToFullPaths.put(fileGroupId, fullPath.toString());
}
}
return fileGroupIdToFullPaths;
}
/**
* Extract the file status of all affected files from the commit metadata. If a file has
* been touched multiple times in the given commits, the return value will keep the one
* from the latest commit.
*
* @param storage {@link HoodieStorage} instance.
* @param basePath The base path
* @return the file full path to file status mapping
*/
public Map getFullPathToInfo(HoodieStorage storage,
String basePath) {
Map fullPathToInfoMap = new HashMap<>();
for (List stats : getPartitionToWriteStats().values()) {
// Iterate through all the written files.
for (HoodieWriteStat stat : stats) {
String relativeFilePath = stat.getPath();
StoragePath fullPath = relativeFilePath != null
? FSUtils.constructAbsolutePath(basePath, relativeFilePath) : null;
if (fullPath != null) {
long blockSize = storage.getDefaultBlockSize(fullPath);
StoragePathInfo pathInfo = new StoragePathInfo(
fullPath, stat.getFileSizeInBytes(), false, (short) 0, blockSize, 0);
fullPathToInfoMap.put(fullPath.getName(), pathInfo);
}
}
}
return fullPathToInfoMap;
}
/**
* Extract the file status of all affected files from the commit metadata. If a file has
* been touched multiple times in the given commits, the return value will keep the one
* from the latest commit by file group ID.
*
* Note: different with {@link #getFullPathToInfo(HoodieStorage, String)},
* only the latest commit file for a file group is returned,
* this is an optimization for COPY_ON_WRITE table to eliminate legacy files for filesystem view.
*
* @param basePath The base path
* @return the file ID to file status mapping
*/
public Map getFileIdToInfo(String basePath) {
Map fileIdToInfoMap = new HashMap<>();
for (List stats : getPartitionToWriteStats().values()) {
// Iterate through all the written files.
for (HoodieWriteStat stat : stats) {
String relativeFilePath = stat.getPath();
StoragePath fullPath =
relativeFilePath != null ? FSUtils.constructAbsolutePath(basePath,
relativeFilePath) : null;
if (fullPath != null) {
StoragePathInfo pathInfo =
new StoragePathInfo(fullPath, stat.getFileSizeInBytes(), false, (short) 0, 0, 0);
fileIdToInfoMap.put(stat.getFileId(), pathInfo);
}
}
}
return fileIdToInfoMap;
}
public String toJsonString() throws IOException {
if (partitionToWriteStats.containsKey(null)) {
LOG.info("partition path is null for " + partitionToWriteStats.get(null));
partitionToWriteStats.remove(null);
}
return JsonUtils.getObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
}
public static T fromJsonString(String jsonStr, Class clazz) throws Exception {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file
return clazz.newInstance();
}
return JsonUtils.getObjectMapper().readValue(jsonStr, clazz);
}
/**
* parse the bytes of deltacommit, and get the base file and the log files belonging to this
* provided file group.
*/
public static Option>> getFileSliceForFileGroupFromDeltaCommit(byte[] bytes, HoodieFileGroupId fileGroupId) {
try {
org.apache.hudi.avro.model.HoodieCommitMetadata commitMetadata = deserializeCommitMetadata(bytes);
Map> partitionToWriteStatsMap =
commitMetadata.getPartitionToWriteStats();
for (Map.Entry> partitionToWriteStat: partitionToWriteStatsMap.entrySet()) {
for (org.apache.hudi.avro.model.HoodieWriteStat writeStat: partitionToWriteStat.getValue()) {
HoodieFileGroupId fgId = new HoodieFileGroupId(partitionToWriteStat.getKey(), writeStat.getFileId());
if (fgId.equals(fileGroupId)) {
return Option.of(Pair.of(writeStat.getBaseFile() == null ? "" : writeStat.getBaseFile(), writeStat.getLogFiles()));
}
}
}
return Option.empty();
} catch (Exception e) {
throw new HoodieException("Fail to parse the base file and log files from DeltaCommit", e);
}
}
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
public long fetchTotalPartitionsWritten() {
return partitionToWriteStats.size();
}
public long fetchTotalFilesInsert() {
long totalFilesInsert = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equalsIgnoreCase("null")) {
totalFilesInsert++;
}
}
}
return totalFilesInsert;
}
public long fetchTotalFilesUpdated() {
long totalFilesUpdated = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equalsIgnoreCase("null")) {
totalFilesUpdated++;
}
}
}
return totalFilesUpdated;
}
public long fetchTotalUpdateRecordsWritten() {
long totalUpdateRecordsWritten = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
}
}
return totalUpdateRecordsWritten;
}
public long fetchTotalInsertRecordsWritten() {
long totalInsertRecordsWritten = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
// determine insert rows in every file
if (stat.getPrevCommit() != null) {
totalInsertRecordsWritten += stat.getNumInserts();
}
}
}
return totalInsertRecordsWritten;
}
public long fetchTotalRecordsWritten() {
long totalRecordsWritten = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalRecordsWritten += stat.getNumWrites();
}
}
return totalRecordsWritten;
}
public long fetchTotalBytesWritten() {
long totalBytesWritten = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalBytesWritten += stat.getTotalWriteBytes();
}
}
return totalBytesWritten;
}
public long fetchTotalWriteErrors() {
long totalWriteErrors = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalWriteErrors += stat.getTotalWriteErrors();
}
}
return totalWriteErrors;
}
public long getTotalRecordsDeleted() {
long totalDeletes = 0;
for (List stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalDeletes += stat.getNumDeletes();
}
}
return totalDeletes;
}
public Long getTotalLogRecordsCompacted() {
Long totalLogRecords = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogRecords += writeStat.getTotalLogRecords();
}
}
return totalLogRecords;
}
public Long getTotalLogFilesCompacted() {
Long totalLogFiles = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogFiles += writeStat.getTotalLogFilesCompacted();
}
}
return totalLogFiles;
}
public Long getTotalCompactedRecordsUpdated() {
Long totalUpdateRecords = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalUpdateRecords += writeStat.getTotalUpdatedRecordsCompacted();
}
}
return totalUpdateRecords;
}
public Long getTotalCorruptLogBlocks() {
Long totalCorruptedLogBlocks = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalCorruptedLogBlocks += writeStat.getTotalCorruptLogBlock();
}
}
return totalCorruptedLogBlocks;
}
public Long getTotalRollbackLogBlocks() {
Long totalRollbackLogBlocks = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalRollbackLogBlocks += writeStat.getTotalRollbackBlocks();
}
}
return totalRollbackLogBlocks;
}
public Long getTotalLogFilesSize() {
Long totalLogFilesSize = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogFilesSize += writeStat.getTotalLogSizeCompacted();
}
}
return totalLogFilesSize;
}
public Long getTotalScanTime() {
Long totalScanTime = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalScanTime += writeStat.getRuntimeStats().getTotalScanTime();
}
}
}
return totalScanTime;
}
public Long getTotalCreateTime() {
Long totalCreateTime = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalCreateTime += writeStat.getRuntimeStats().getTotalCreateTime();
}
}
}
return totalCreateTime;
}
public Long getTotalUpsertTime() {
Long totalUpsertTime = 0L;
for (Map.Entry> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalUpsertTime += writeStat.getRuntimeStats().getTotalUpsertTime();
}
}
}
return totalUpsertTime;
}
public Pair