org.apache.hudi.metadata.HoodieTableMetadataUtil Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.metadata;
import org.apache.hudi.avro.ConvertingGenericData;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.BooleanWrapper;
import org.apache.hudi.avro.model.DateWrapper;
import org.apache.hudi.avro.model.DoubleWrapper;
import org.apache.hudi.avro.model.FloatWrapper;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.avro.model.HoodieRecordIndexInfo;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.avro.model.IntWrapper;
import org.apache.hudi.avro.model.LongWrapper;
import org.apache.hudi.avro.model.StringWrapper;
import org.apache.hudi.avro.model.TimeMicrosWrapper;
import org.apache.hudi.avro.model.TimestampMicrosWrapper;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.data.HoodieAccumulator;
import org.apache.hudi.common.data.HoodieAtomicLongAccumulator;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.EngineType;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.function.SerializableBiFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieIndexDefinition;
import org.apache.hudi.common.model.HoodieIndexMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.model.HoodieRecordGlobalLocation;
import org.apache.hudi.common.model.HoodieRecordMerger;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieFileSliceReader;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.InstantGenerator;
import org.apache.hudi.common.table.timeline.TimelineFactory;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Either;
import org.apache.hudi.common.util.FileFormatUtils;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.HoodieRecordUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.collection.Tuple3;
import org.apache.hudi.common.util.hash.ColumnIndexID;
import org.apache.hudi.common.util.hash.PartitionIndexID;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieIOFactory;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.HoodieStorageUtils;
import org.apache.hudi.storage.StorageConfiguration;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.util.Lazy;
import org.apache.avro.AvroTypeException;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.UUID;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema;
import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields;
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
import static org.apache.hudi.avro.HoodieAvroUtils.getSchemaForFields;
import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
import static org.apache.hudi.common.config.HoodieCommonConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES;
import static org.apache.hudi.common.config.HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED;
import static org.apache.hudi.common.config.HoodieCommonConfig.MAX_MEMORY_FOR_COMPACTION;
import static org.apache.hudi.common.config.HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE;
import static org.apache.hudi.common.config.HoodieReaderConfig.ENABLE_OPTIMIZED_LOG_BLOCKS_SCAN;
import static org.apache.hudi.common.fs.FSUtils.getFileNameFromPath;
import static org.apache.hudi.common.model.HoodieRecord.COMMIT_TIME_METADATA_FIELD;
import static org.apache.hudi.common.model.HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION;
import static org.apache.hudi.common.model.HoodieRecord.PARTITION_PATH_METADATA_FIELD;
import static org.apache.hudi.common.model.HoodieRecord.RECORD_KEY_METADATA_FIELD;
import static org.apache.hudi.common.table.timeline.InstantComparison.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.InstantComparison.LESSER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.InstantComparison.compareTimestamps;
import static org.apache.hudi.common.util.ConfigUtils.getReaderConfigs;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
import static org.apache.hudi.metadata.HoodieMetadataPayload.RECORD_INDEX_MISSING_FILEINDEX_FALLBACK;
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP;
/**
* A utility to convert timeline information to metadata table records.
*/
public class HoodieTableMetadataUtil {
private static final Logger LOG = LoggerFactory.getLogger(HoodieTableMetadataUtil.class);
public static final String PARTITION_NAME_FILES = "files";
public static final String PARTITION_NAME_PARTITION_STATS = "partition_stats";
public static final String PARTITION_NAME_COLUMN_STATS = "column_stats";
public static final String PARTITION_NAME_BLOOM_FILTERS = "bloom_filters";
public static final String PARTITION_NAME_RECORD_INDEX = "record_index";
public static final String PARTITION_NAME_EXPRESSION_INDEX = "expr_index";
public static final String PARTITION_NAME_EXPRESSION_INDEX_PREFIX = "expr_index_";
public static final String PARTITION_NAME_SECONDARY_INDEX = "secondary_index";
public static final String PARTITION_NAME_SECONDARY_INDEX_PREFIX = "secondary_index_";
private static final Set SUPPORTED_TYPES_PARTITION_STATS = new HashSet<>(Arrays.asList(
Schema.Type.INT, Schema.Type.LONG, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.STRING, Schema.Type.BOOLEAN, Schema.Type.NULL, Schema.Type.BYTES));
private static final Set SUPPORTED_META_FIELDS_PARTITION_STATS = new HashSet<>(Arrays.asList(
HoodieRecord.HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName(),
HoodieRecord.HoodieMetadataField.PARTITION_PATH_METADATA_FIELD.getFieldName(),
HoodieRecord.HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.getFieldName()));
private HoodieTableMetadataUtil() {
}
public static final Set> COLUMN_STATS_RECORD_SUPPORTED_TYPES = new HashSet<>(Arrays.asList(
IntWrapper.class, BooleanWrapper.class, DateWrapper.class,
DoubleWrapper.class, FloatWrapper.class, LongWrapper.class,
StringWrapper.class, TimeMicrosWrapper.class, TimestampMicrosWrapper.class));
/**
* Returns whether the files partition of metadata table is ready for read.
*
* @param metaClient {@link HoodieTableMetaClient} instance.
* @return true if the files partition of metadata table is ready for read,
* based on the table config; false otherwise.
*/
public static boolean isFilesPartitionAvailable(HoodieTableMetaClient metaClient) {
return metaClient.getTableConfig().getMetadataPartitions()
.contains(HoodieTableMetadataUtil.PARTITION_NAME_FILES);
}
/**
* Collects {@link HoodieColumnRangeMetadata} for the provided collection of records, pretending
* as if provided records have been persisted w/in given {@code filePath}
*
* @param records target records to compute column range metadata for
* @param targetFields columns (fields) to be collected
* @param filePath file path value required for {@link HoodieColumnRangeMetadata}
*
* @return map of {@link HoodieColumnRangeMetadata} for each of the provided target fields for
* the collection of provided records
*/
public static Map> collectColumnRangeMetadata(
List records, List targetFields, String filePath, Schema recordSchema) {
// Helper class to calculate column stats
class ColumnStats {
Object minValue;
Object maxValue;
long nullCount;
long valueCount;
}
HashMap allColumnStats = new HashMap<>();
// Collect stats for all columns by iterating through records while accounting
// corresponding stats
records.forEach((record) -> {
// For each column (field) we have to index update corresponding column stats
// with the values from this record
targetFields.forEach(field -> {
ColumnStats colStats = allColumnStats.computeIfAbsent(field.name(), ignored -> new ColumnStats());
Schema fieldSchema = getNestedFieldSchemaFromWriteSchema(recordSchema, field.name());
Object fieldValue;
if (record.getRecordType() == HoodieRecordType.AVRO) {
fieldValue = HoodieAvroUtils.getRecordColumnValues(record, new String[]{field.name()}, recordSchema, false)[0];
} else if (record.getRecordType() == HoodieRecordType.SPARK) {
fieldValue = record.getColumnValues(recordSchema, new String[]{field.name()}, false)[0];
} else {
throw new HoodieException(String.format("Unknown record type: %s", record.getRecordType()));
}
colStats.valueCount++;
if (fieldValue != null && isColumnTypeSupported(fieldSchema, Option.of(record.getRecordType()))) {
// Set the min value of the field
if (colStats.minValue == null
|| ConvertingGenericData.INSTANCE.compare(fieldValue, colStats.minValue, fieldSchema) < 0) {
colStats.minValue = fieldValue;
}
// Set the max value of the field
if (colStats.maxValue == null || ConvertingGenericData.INSTANCE.compare(fieldValue, colStats.maxValue, fieldSchema) > 0) {
colStats.maxValue = fieldValue;
}
} else {
colStats.nullCount++;
}
});
});
Stream> hoodieColumnRangeMetadataStream =
targetFields.stream().map(field -> {
ColumnStats colStats = allColumnStats.get(field.name());
HoodieColumnRangeMetadata hcrm = HoodieColumnRangeMetadata.create(
filePath,
field.name(),
colStats == null ? null : coerceToComparable(field.schema(), colStats.minValue),
colStats == null ? null : coerceToComparable(field.schema(), colStats.maxValue),
colStats == null ? 0L : colStats.nullCount,
colStats == null ? 0L : colStats.valueCount,
// NOTE: Size and compressed size statistics are set to 0 to make sure we're not
// mixing up those provided by Parquet with the ones from other encodings,
// since those are not directly comparable
0L,
0L
);
return hcrm;
});
return hoodieColumnRangeMetadataStream.collect(
Collectors.toMap(HoodieColumnRangeMetadata::getColumnName, Function.identity()));
}
public static Option getColumnStatsValueAsString(Object statsValue) {
if (statsValue == null) {
LOG.info("Invalid column stats value: {}", statsValue);
return Option.empty();
}
Class> statsValueClass = statsValue.getClass();
if (COLUMN_STATS_RECORD_SUPPORTED_TYPES.contains(statsValueClass)) {
return Option.of(String.valueOf(((IndexedRecord) statsValue).get(0)));
} else {
throw new HoodieNotSupportedException("Unsupported type: " + statsValueClass.getSimpleName());
}
}
/**
* Delete the metadata table for the dataset. This will be invoked during upgrade/downgrade operation during which
* no other
* process should be running.
*
* @param basePath base path of the dataset
* @param context instance of {@link HoodieEngineContext}.
*/
public static void deleteMetadataTable(String basePath, HoodieEngineContext context) {
HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder()
.setBasePath(basePath).setConf(context.getStorageConf().newInstance()).build();
deleteMetadataTable(dataMetaClient, context, false);
}
/**
* Deletes the metadata partition from the file system.
*
* @param basePath - base path of the dataset
* @param context - instance of {@link HoodieEngineContext}
* @param partitionPath - Partition path of the partition to delete
*/
public static void deleteMetadataPartition(StoragePath basePath, HoodieEngineContext context, String partitionPath) {
HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder()
.setBasePath(basePath).setConf(context.getStorageConf().newInstance()).build();
deleteMetadataTablePartition(dataMetaClient, context, partitionPath, false);
}
/**
* Check if the given metadata partition exists.
*
* @param basePath base path of the dataset
* @param context instance of {@link HoodieEngineContext}.
*/
public static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, String partitionPath) {
final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
HoodieStorage storage = HoodieStorageUtils.getStorage(metadataTablePath, context.getStorageConf());
try {
return storage.exists(new StoragePath(metadataTablePath, partitionPath));
} catch (Exception e) {
throw new HoodieIOException(String.format("Failed to check metadata partition %s exists.", partitionPath));
}
}
public static boolean metadataPartitionExists(StoragePath basePath, HoodieEngineContext context, String partitionPath) {
return metadataPartitionExists(basePath.toString(), context, partitionPath);
}
/**
* Convert commit action to metadata records for the enabled partition types.
*
* @param context - Engine context to use
* @param hoodieConfig - Hudi configs
* @param commitMetadata - Commit action metadata
* @param instantTime - Action instant time
* @param dataMetaClient - HoodieTableMetaClient for data
* @param metadataConfig - HoodieMetadataConfig
* @param enabledPartitionTypes - List of enabled MDT partitions
* @param bloomFilterType - Type of generated bloom filter records
* @param bloomIndexParallelism - Parallelism for bloom filter record generation
* @return Map of partition to metadata records for the commit action
*/
public static Map> convertMetadataToRecords(HoodieEngineContext context,
HoodieConfig hoodieConfig,
HoodieCommitMetadata commitMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig,
List enabledPartitionTypes,
String bloomFilterType,
int bloomIndexParallelism, Integer writesFileIdEncoding) {
final Map> partitionToRecordsMap = new HashMap<>();
final HoodieData filesPartitionRecordsRDD = context.parallelize(
convertMetadataToFilesPartitionRecords(commitMetadata, instantTime), 1);
partitionToRecordsMap.put(MetadataPartitionType.FILES.getPartitionPath(), filesPartitionRecordsRDD);
if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
final HoodieData metadataBloomFilterRecords = convertMetadataToBloomFilterRecords(
context, hoodieConfig, commitMetadata, instantTime, dataMetaClient, bloomFilterType, bloomIndexParallelism);
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS.getPartitionPath(), metadataBloomFilterRecords);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
final HoodieData metadataColumnStatsRDD = convertMetadataToColumnStatsRecords(commitMetadata, context,
dataMetaClient, metadataConfig);
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS.getPartitionPath(), metadataColumnStatsRDD);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.PARTITION_STATS)) {
checkState(MetadataPartitionType.COLUMN_STATS.isMetadataPartitionAvailable(dataMetaClient),
"Column stats partition must be enabled to generate partition stats. Please enable: " + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key());
final HoodieData partitionStatsRDD = convertMetadataToPartitionStatsRecords(commitMetadata, context, dataMetaClient, metadataConfig);
partitionToRecordsMap.put(MetadataPartitionType.PARTITION_STATS.getPartitionPath(), partitionStatsRDD);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.RECORD_INDEX)) {
partitionToRecordsMap.put(MetadataPartitionType.RECORD_INDEX.getPartitionPath(), convertMetadataToRecordIndexRecords(context, commitMetadata, metadataConfig,
dataMetaClient, writesFileIdEncoding, instantTime));
}
return partitionToRecordsMap;
}
/**
* Finds all new files/partitions created as part of commit and creates metadata table records for them.
*
* @param commitMetadata - Commit action metadata
* @param instantTime - Commit action instant time
* @return List of metadata table records
*/
public static List convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata,
String instantTime) {
List records = new ArrayList<>(commitMetadata.getPartitionToWriteStats().size());
// Add record bearing added partitions list
List partitionsAdded = getPartitionsAdded(commitMetadata);
records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded));
// Update files listing records for each individual partition
HoodieAccumulator newFileCount = HoodieAtomicLongAccumulator.create();
List> updatedPartitionFilesRecords =
commitMetadata.getPartitionToWriteStats().entrySet()
.stream()
.map(entry -> {
String partitionStatName = entry.getKey();
List writeStats = entry.getValue();
HashMap updatedFilesToSizesMapping =
writeStats.stream().reduce(new HashMap<>(writeStats.size()),
(map, stat) -> {
String pathWithPartition = stat.getPath();
if (pathWithPartition == null) {
// Empty partition
LOG.warn("Unable to find path in write stat to update metadata table {}", stat);
return map;
}
String fileName = FSUtils.getFileName(pathWithPartition, partitionStatName);
// Since write-stats are coming in no particular order, if the same
// file have previously been appended to w/in the txn, we simply pick max
// of the sizes as reported after every write, since file-sizes are
// monotonically increasing (ie file-size never goes down, unless deleted)
map.merge(fileName, stat.getFileSizeInBytes(), Math::max);
Map cdcPathAndSizes = stat.getCdcStats();
if (cdcPathAndSizes != null && !cdcPathAndSizes.isEmpty()) {
cdcPathAndSizes.forEach((key, value) -> map.put(FSUtils.getFileName(key, partitionStatName), value));
}
return map;
},
CollectionUtils::combine);
newFileCount.add(updatedFilesToSizesMapping.size());
return HoodieMetadataPayload.createPartitionFilesRecord(partitionStatName, updatedFilesToSizesMapping,
Collections.emptyList());
})
.collect(Collectors.toList());
records.addAll(updatedPartitionFilesRecords);
LOG.info("Updating at {} from Commit/{}. #partitions_updated={}, #files_added={}", instantTime, commitMetadata.getOperationType(),
records.size(), newFileCount.value());
return records;
}
private static List getPartitionsAdded(HoodieCommitMetadata commitMetadata) {
return commitMetadata.getPartitionToWriteStats().keySet().stream()
// We need to make sure we properly handle case of non-partitioned tables
.map(HoodieTableMetadataUtil::getPartitionIdentifierForFilesPartition)
.collect(Collectors.toList());
}
/**
* Returns all the incremental write partition paths as a set with the given commits metadata.
*
* @param metadataList The commits metadata
* @return the partition path set
*/
public static Set getWritePartitionPaths(List metadataList) {
return metadataList.stream()
.map(HoodieCommitMetadata::getWritePartitionPaths)
.flatMap(Collection::stream)
.collect(Collectors.toSet());
}
/**
* Convert commit action metadata to bloom filter records.
*
* @param context - Engine context to use
* @param hoodieConfig - Hudi configs
* @param commitMetadata - Commit action metadata
* @param instantTime - Action instant time
* @param dataMetaClient - HoodieTableMetaClient for data
* @param bloomFilterType - Type of generated bloom filter records
* @param bloomIndexParallelism - Parallelism for bloom filter record generation
* @return HoodieData of metadata table records
*/
public static HoodieData convertMetadataToBloomFilterRecords(HoodieEngineContext context,
HoodieConfig hoodieConfig,
HoodieCommitMetadata commitMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
String bloomFilterType,
int bloomIndexParallelism) {
final List allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream()
.flatMap(Collection::stream).collect(Collectors.toList());
if (allWriteStats.isEmpty()) {
return context.emptyHoodieData();
}
final int parallelism = Math.max(Math.min(allWriteStats.size(), bloomIndexParallelism), 1);
HoodieData allWriteStatsRDD = context.parallelize(allWriteStats, parallelism);
return allWriteStatsRDD.flatMap(hoodieWriteStat -> {
final String partition = hoodieWriteStat.getPartitionPath();
// For bloom filter index, delta writes do not change the base file bloom filter entries
if (hoodieWriteStat instanceof HoodieDeltaWriteStat) {
return Collections.emptyListIterator();
}
String pathWithPartition = hoodieWriteStat.getPath();
if (pathWithPartition == null) {
// Empty partition
LOG.error("Failed to find path in write stat to update metadata table {}", hoodieWriteStat);
return Collections.emptyListIterator();
}
String fileName = FSUtils.getFileName(pathWithPartition, partition);
if (!FSUtils.isBaseFile(new StoragePath(fileName))) {
return Collections.emptyListIterator();
}
final StoragePath writeFilePath = new StoragePath(dataMetaClient.getBasePath(), pathWithPartition);
try (HoodieFileReader fileReader = HoodieIOFactory.getIOFactory(dataMetaClient.getStorage())
.getReaderFactory(HoodieRecordType.AVRO).getFileReader(hoodieConfig, writeFilePath)) {
try {
final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
if (fileBloomFilter == null) {
LOG.error("Failed to read bloom filter for {}", writeFilePath);
return Collections.emptyListIterator();
}
ByteBuffer bloomByteBuffer = ByteBuffer.wrap(getUTF8Bytes(fileBloomFilter.serializeToString()));
HoodieRecord record = HoodieMetadataPayload.createBloomFilterMetadataRecord(
partition, fileName, instantTime, bloomFilterType, bloomByteBuffer, false);
return Collections.singletonList(record).iterator();
} catch (Exception e) {
LOG.error("Failed to read bloom filter for {}", writeFilePath);
return Collections.emptyListIterator();
}
} catch (IOException e) {
LOG.error("Failed to get bloom filter for file: {}, write stat: {}", writeFilePath, hoodieWriteStat);
}
return Collections.emptyListIterator();
});
}
/**
* Convert the clean action to metadata records.
*/
public static Map> convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieCleanMetadata cleanMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig,
List enabledPartitionTypes,
int bloomIndexParallelism) {
final Map> partitionToRecordsMap = new HashMap<>();
final HoodieData filesPartitionRecordsRDD = engineContext.parallelize(
convertMetadataToFilesPartitionRecords(cleanMetadata, instantTime), 1);
partitionToRecordsMap.put(MetadataPartitionType.FILES.getPartitionPath(), filesPartitionRecordsRDD);
if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
final HoodieData metadataBloomFilterRecordsRDD =
convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, bloomIndexParallelism);
partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS.getPartitionPath(), metadataBloomFilterRecordsRDD);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
final HoodieData metadataColumnStatsRDD =
convertMetadataToColumnStatsRecords(cleanMetadata, engineContext,
dataMetaClient, metadataConfig);
partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS.getPartitionPath(), metadataColumnStatsRDD);
}
if (enabledPartitionTypes.contains(MetadataPartitionType.EXPRESSION_INDEX)) {
convertMetadataToExpressionIndexRecords(engineContext, cleanMetadata, instantTime, dataMetaClient, metadataConfig, bloomIndexParallelism, partitionToRecordsMap);
}
return partitionToRecordsMap;
}
private static void convertMetadataToExpressionIndexRecords(HoodieEngineContext engineContext, HoodieCleanMetadata cleanMetadata,
String instantTime, HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig, int bloomIndexParallelism,
Map> partitionToRecordsMap) {
Option indexMetadata = dataMetaClient.getIndexMetadata();
if (indexMetadata.isPresent()) {
HoodieIndexMetadata metadata = indexMetadata.get();
Map indexDefinitions = metadata.getIndexDefinitions();
if (indexDefinitions.isEmpty()) {
throw new HoodieMetadataException("Expression index metadata not found");
}
// iterate over each index definition and check:
// if it is a expression index using column_stats, then follow the same approach as column_stats
// if it is a expression index using bloom_filters, then follow the same approach as bloom_filters
// else throw an exception
for (Map.Entry entry : indexDefinitions.entrySet()) {
String indexName = entry.getKey();
HoodieIndexDefinition indexDefinition = entry.getValue();
if (MetadataPartitionType.EXPRESSION_INDEX.equals(MetadataPartitionType.fromPartitionPath(indexDefinition.getIndexName()))) {
if (indexDefinition.getIndexType().equalsIgnoreCase(PARTITION_NAME_BLOOM_FILTERS)) {
partitionToRecordsMap.put(indexName, convertMetadataToBloomFilterRecords(cleanMetadata, engineContext, instantTime, bloomIndexParallelism));
} else if (indexDefinition.getIndexType().equalsIgnoreCase(PARTITION_NAME_COLUMN_STATS)) {
HoodieMetadataConfig modifiedMetadataConfig = HoodieMetadataConfig.newBuilder()
.withProperties(metadataConfig.getProps())
.withColumnStatsIndexForColumns(String.join(",", indexDefinition.getSourceFields()))
.build();
partitionToRecordsMap.put(indexName,
convertMetadataToColumnStatsRecords(cleanMetadata, engineContext, dataMetaClient, modifiedMetadataConfig));
} else {
throw new HoodieMetadataException("Unsupported expression index type");
}
}
}
} else {
throw new HoodieMetadataException("Expression index metadata not found");
}
}
/**
* Finds all files that were deleted as part of a clean and creates metadata table records for them.
*
* @param cleanMetadata
* @param instantTime
* @return a list of metadata table records
*/
public static List convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata,
String instantTime) {
List records = new LinkedList<>();
int[] fileDeleteCount = {0};
List deletedPartitions = new ArrayList<>();
cleanMetadata.getPartitionMetadata().forEach((partitionName, partitionMetadata) -> {
// Files deleted from a partition
List deletedFiles = partitionMetadata.getDeletePathPatterns();
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partitionName, Collections.emptyMap(),
deletedFiles);
records.add(record);
fileDeleteCount[0] += deletedFiles.size();
boolean isPartitionDeleted = partitionMetadata.getIsPartitionDeleted();
if (isPartitionDeleted) {
deletedPartitions.add(partitionName);
}
});
if (!deletedPartitions.isEmpty()) {
// if there are partitions to be deleted, add them to delete list
records.add(HoodieMetadataPayload.createPartitionListRecord(deletedPartitions, true));
}
LOG.info("Updating at {} from Clean. #partitions_updated={}, #files_deleted={}, #partitions_deleted={}",
instantTime, records.size(), fileDeleteCount[0], deletedPartitions.size());
return records;
}
public static Map> convertMissingPartitionRecords(HoodieEngineContext engineContext,
List deletedPartitions, Map> filesAdded,
Map> filesDeleted, String instantTime) {
List records = new LinkedList<>();
int[] fileDeleteCount = {0};
int[] filesAddedCount = {0};
filesAdded.forEach((partition, filesToAdd) -> {
filesAddedCount[0] += filesToAdd.size();
List filesToDelete = filesDeleted.getOrDefault(partition, Collections.emptyList());
fileDeleteCount[0] += filesToDelete.size();
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, filesToAdd, filesToDelete);
records.add(record);
});
// there could be partitions which only has missing deleted files.
filesDeleted.forEach((partition, filesToDelete) -> {
if (!filesAdded.containsKey(partition)) {
fileDeleteCount[0] += filesToDelete.size();
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, Collections.emptyMap(), filesToDelete);
records.add(record);
}
});
if (!deletedPartitions.isEmpty()) {
// if there are partitions to be deleted, add them to delete list
records.add(HoodieMetadataPayload.createPartitionListRecord(deletedPartitions, true));
}
LOG.info("Re-adding missing records at {} during Restore. #partitions_updated={}, #files_added={}, #files_deleted={}, #partitions_deleted={}",
instantTime, records.size(), filesAddedCount[0], fileDeleteCount[0], deletedPartitions.size());
return Collections.singletonMap(MetadataPartitionType.FILES.getPartitionPath(), engineContext.parallelize(records, 1));
}
/**
* Convert clean metadata to bloom filter index records.
*
* @param cleanMetadata - Clean action metadata
* @param engineContext - Engine context
* @param instantTime - Clean action instant time
* @param bloomIndexParallelism - Parallelism for bloom filter record generation
* @return List of bloom filter index records for the clean metadata
*/
public static HoodieData convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
String instantTime,
int bloomIndexParallelism) {
List> deleteFileList = new ArrayList<>();
cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
// Files deleted from a partition
List deletedFiles = partitionMetadata.getDeletePathPatterns();
deletedFiles.forEach(entry -> {
final StoragePath deletedFilePath = new StoragePath(entry);
if (FSUtils.isBaseFile(deletedFilePath)) {
deleteFileList.add(Pair.of(partition, deletedFilePath.getName()));
}
});
});
final int parallelism = Math.max(Math.min(deleteFileList.size(), bloomIndexParallelism), 1);
HoodieData> deleteFileListRDD = engineContext.parallelize(deleteFileList, parallelism);
return deleteFileListRDD.map(deleteFileInfoPair -> HoodieMetadataPayload.createBloomFilterMetadataRecord(
deleteFileInfoPair.getLeft(), deleteFileInfoPair.getRight(), instantTime, StringUtils.EMPTY_STRING,
ByteBuffer.allocate(0), true));
}
/**
* Convert clean metadata to column stats index records.
*
* @param cleanMetadata - Clean action metadata
* @param engineContext - Engine context
* @param dataMetaClient - HoodieTableMetaClient for data
* @param metadataConfig - HoodieMetadataConfig
* @return List of column stats index records for the clean metadata
*/
public static HoodieData convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig) {
List> deleteFileList = new ArrayList<>();
cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
// Files deleted from a partition
List deletedFiles = partitionMetadata.getDeletePathPatterns();
deletedFiles.forEach(entry -> deleteFileList.add(Pair.of(partition, entry)));
});
List columnsToIndex = getColumnsToIndex(dataMetaClient.getTableConfig(), metadataConfig,
Lazy.lazily(() -> tryResolveSchemaForTable(dataMetaClient)));
if (columnsToIndex.isEmpty()) {
// In case there are no columns to index, bail
LOG.warn("No columns to index for column stats index.");
return engineContext.emptyHoodieData();
}
int parallelism = Math.max(Math.min(deleteFileList.size(), metadataConfig.getColumnStatsIndexParallelism()), 1);
return engineContext.parallelize(deleteFileList, parallelism)
.flatMap(deleteFileInfoPair -> {
String partitionPath = deleteFileInfoPair.getLeft();
String fileName = deleteFileInfoPair.getRight();
return getColumnStatsRecords(partitionPath, fileName, dataMetaClient, columnsToIndex, true).iterator();
});
}
@VisibleForTesting
public static HoodieData convertMetadataToRecordIndexRecords(HoodieEngineContext engineContext,
HoodieCommitMetadata commitMetadata,
HoodieMetadataConfig metadataConfig,
HoodieTableMetaClient dataTableMetaClient,
int writesFileIdEncoding,
String instantTime) {
List allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream()
.flatMap(Collection::stream).collect(Collectors.toList());
if (allWriteStats.isEmpty() || commitMetadata.getOperationType() == WriteOperationType.COMPACT) {
return engineContext.emptyHoodieData();
}
try {
int parallelism = Math.max(Math.min(allWriteStats.size(), metadataConfig.getRecordIndexMaxParallelism()), 1);
String basePath = dataTableMetaClient.getBasePath().toString();
HoodieFileFormat baseFileFormat = dataTableMetaClient.getTableConfig().getBaseFileFormat();
// RLI cannot support logs having inserts with current offering. So, lets validate that.
if (allWriteStats.stream().anyMatch(writeStat -> {
String fileName = FSUtils.getFileName(writeStat.getPath(), writeStat.getPartitionPath());
return FSUtils.isLogFile(fileName) && writeStat.getNumInserts() > 0;
})) {
throw new HoodieIOException("RLI cannot support logs having inserts with current offering. Would recommend disabling Record Level Index");
}
// we might need to set some additional variables if we need to process log files.
// for RLI and MOR table, we only care about log files if they contain any deletes. If not, all entries in logs are considered as updates, for which
// we do not need to generate new RLI record.
boolean anyLogFilesWithDeletes = allWriteStats.stream().anyMatch(writeStat -> {
String fileName = FSUtils.getFileName(writeStat.getPath(), writeStat.getPartitionPath());
return FSUtils.isLogFile(fileName) && writeStat.getNumDeletes() > 0;
});
Option writerSchemaOpt = Option.empty();
if (anyLogFilesWithDeletes) { // if we have a log file w/ deletes.
writerSchemaOpt = tryResolveSchemaForTable(dataTableMetaClient);
}
int maxBufferSize = metadataConfig.getMaxReaderBufferSize();
StorageConfiguration storageConfiguration = dataTableMetaClient.getStorageConf();
Option finalWriterSchemaOpt = writerSchemaOpt;
HoodieData recordIndexRecords = engineContext.parallelize(allWriteStats, parallelism)
.flatMap(writeStat -> {
HoodieStorage storage = HoodieStorageUtils.getStorage(new StoragePath(writeStat.getPath()), storageConfiguration);
StoragePath fullFilePath = new StoragePath(dataTableMetaClient.getBasePath(), writeStat.getPath());
// handle base files
if (writeStat.getPath().endsWith(baseFileFormat.getFileExtension())) {
return BaseFileRecordParsingUtils.generateRLIMetadataHoodieRecordsForBaseFile(basePath, writeStat, writesFileIdEncoding, instantTime, storage);
} else if (FSUtils.isLogFile(fullFilePath)) {
// for logs, we only need to process log files containing deletes
if (writeStat.getNumDeletes() > 0) {
Set deletedRecordKeys = getRecordKeys(fullFilePath.toString(), dataTableMetaClient,
finalWriterSchemaOpt, maxBufferSize, instantTime, false, true);
return deletedRecordKeys.stream().map(recordKey -> HoodieMetadataPayload.createRecordIndexDelete(recordKey)).collect(toList()).iterator();
}
// ignore log file data blocks.
return new ArrayList().iterator();
} else {
throw new HoodieIOException("Unsupported file type " + fullFilePath.toString() + " while generating MDT records");
}
});
// there are chances that same record key from data table has 2 entries (1 delete from older partition and 1 insert to newer partition)
// lets do reduce by key to ignore the deleted entry.
// first deduce parallelism to avoid too few tasks for large number of records.
long totalWriteBytesForRLI = allWriteStats.stream().mapToLong(writeStat -> {
// if there are no inserts or deletes, we can ignore this write stat for RLI
if (writeStat.getNumInserts() == 0 && writeStat.getNumDeletes() == 0) {
return 0;
}
return writeStat.getTotalWriteBytes();
}).sum();
// approximate task partition size of 100MB
// (TODO: make this configurable)
long targetPartitionSize = 100 * 1024 * 1024;
parallelism = (int) Math.max(1, (totalWriteBytesForRLI + targetPartitionSize - 1) / targetPartitionSize);
return reduceByKeys(recordIndexRecords, parallelism);
} catch (Exception e) {
throw new HoodieException("Failed to generate RLI records for metadata table", e);
}
}
/**
* There are chances that same record key from data table has 2 entries (1 delete from older partition and 1 insert to newer partition)
* So, this method performs reduce by key to ignore the deleted entry.
* @param recordIndexRecords hoodie records after rli index lookup.
* @param parallelism parallelism to use.
* @return
*/
@VisibleForTesting
public static HoodieData reduceByKeys(HoodieData recordIndexRecords, int parallelism) {
return recordIndexRecords.mapToPair(
(SerializablePairFunction) t -> Pair.of(t.getKey(), t))
.reduceByKey((SerializableBiFunction) (record1, record2) -> {
boolean isRecord1Deleted = record1.getData() instanceof EmptyHoodieRecordPayload;
boolean isRecord2Deleted = record2.getData() instanceof EmptyHoodieRecordPayload;
if (isRecord1Deleted && !isRecord2Deleted) {
return record2;
} else if (!isRecord1Deleted && isRecord2Deleted) {
return record1;
} else if (isRecord1Deleted && isRecord2Deleted) {
// let's delete just 1 of them
return record1;
} else {
throw new HoodieIOException("Two HoodieRecord updates to RLI is seen for same record key " + record2.getRecordKey() + ", record 1 : "
+ record1.getData().toString() + ", record 2 : " + record2.getData().toString());
}
}, parallelism).values();
}
@VisibleForTesting
public static List getRecordKeysDeletedOrUpdated(HoodieEngineContext engineContext,
HoodieCommitMetadata commitMetadata,
HoodieMetadataConfig metadataConfig,
HoodieTableMetaClient dataTableMetaClient,
String instantTime) {
List allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream()
.flatMap(Collection::stream).collect(Collectors.toList());
if (allWriteStats.isEmpty()) {
return Collections.emptyList();
}
try {
int parallelism = Math.max(Math.min(allWriteStats.size(), metadataConfig.getRecordIndexMaxParallelism()), 1);
String basePath = dataTableMetaClient.getBasePath().toString();
HoodieFileFormat baseFileFormat = dataTableMetaClient.getTableConfig().getBaseFileFormat();
// SI cannot support logs having inserts with current offering. So, lets validate that.
if (allWriteStats.stream().anyMatch(writeStat -> {
String fileName = FSUtils.getFileName(writeStat.getPath(), writeStat.getPartitionPath());
return FSUtils.isLogFile(fileName) && writeStat.getNumInserts() > 0;
})) {
throw new HoodieIOException("Secondary index cannot support logs having inserts with current offering. Can you drop secondary index.");
}
// we might need to set some additional variables if we need to process log files.
boolean anyLogFiles = allWriteStats.stream().anyMatch(writeStat -> {
String fileName = FSUtils.getFileName(writeStat.getPath(), writeStat.getPartitionPath());
return FSUtils.isLogFile(fileName);
});
Option writerSchemaOpt = Option.empty();
if (anyLogFiles) {
writerSchemaOpt = tryResolveSchemaForTable(dataTableMetaClient);
}
int maxBufferSize = metadataConfig.getMaxReaderBufferSize();
StorageConfiguration storageConfiguration = dataTableMetaClient.getStorageConf();
Option finalWriterSchemaOpt = writerSchemaOpt;
return engineContext.parallelize(allWriteStats, parallelism)
.flatMap(writeStat -> {
HoodieStorage storage = HoodieStorageUtils.getStorage(new StoragePath(writeStat.getPath()), storageConfiguration);
StoragePath fullFilePath = new StoragePath(dataTableMetaClient.getBasePath(), writeStat.getPath());
// handle base files
if (writeStat.getPath().endsWith(baseFileFormat.getFileExtension())) {
return BaseFileRecordParsingUtils.getRecordKeysDeletedOrUpdated(basePath, writeStat, storage).iterator();
} else if (FSUtils.isLogFile(fullFilePath)) {
// for logs, every entry is either an update or a delete
return getRecordKeys(fullFilePath.toString(), dataTableMetaClient, finalWriterSchemaOpt, maxBufferSize, instantTime, true, true)
.iterator();
} else {
throw new HoodieIOException("Found unsupported file type " + fullFilePath.toString() + ", while generating MDT records");
}
}).collectAsList();
} catch (Exception e) {
throw new HoodieException("Failed to fetch deleted record keys while preparing MDT records", e);
}
}
private static void reAddLogFilesFromRollbackPlan(HoodieTableMetaClient dataTableMetaClient, String instantTime,
Map> partitionToFilesMap) {
InstantGenerator factory = dataTableMetaClient.getInstantGenerator();
HoodieInstant rollbackInstant = factory.createNewInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.ROLLBACK_ACTION, instantTime);
HoodieInstant requested = factory.getRollbackRequestedInstant(rollbackInstant);
try {
HoodieRollbackPlan rollbackPlan = TimelineMetadataUtils.deserializeAvroMetadata(
dataTableMetaClient.getActiveTimeline().readRollbackInfoAsBytes(requested).get(), HoodieRollbackPlan.class);
rollbackPlan.getRollbackRequests().forEach(rollbackRequest -> {
final String partitionId = getPartitionIdentifierForFilesPartition(rollbackRequest.getPartitionPath());
partitionToFilesMap.computeIfAbsent(partitionId, s -> new HashMap<>());
// fetch only log files that are expected to be RB'd in DT as part of this rollback. these log files will not be deleted, but rendered
// invalid once rollback is complete.
if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
Map logFiles = new HashMap<>();
rollbackRequest.getLogBlocksToBeDeleted().forEach((k,v) -> {
String fileName = k.substring(k.lastIndexOf("/") + 1);
// rollback plan may not have size for log files to be rolled back. but while merging w/ original commits, the size will get adjusted.
logFiles.put(fileName, 1L);
});
partitionToFilesMap.get(partitionId).putAll(logFiles);
}
});
} catch (IOException e) {
throw new HoodieMetadataException("Parsing rollback plan for " + rollbackInstant + " failed ");
}
}
/**
* Convert rollback action metadata to files partition records.
* Consider only new log files added.
*/
private static List convertMetadataToRollbackRecords(HoodieRollbackMetadata rollbackMetadata,
String instantTime,
HoodieTableMetaClient dataTableMetaClient) {
Map> partitionToAppendedFiles = new HashMap<>();
processRollbackMetadata(rollbackMetadata, partitionToAppendedFiles);
reAddLogFilesFromRollbackPlan(dataTableMetaClient, instantTime, partitionToAppendedFiles);
return convertFilesToFilesPartitionRecords(Collections.emptyMap(), partitionToAppendedFiles, instantTime, "Rollback");
}
/**
* Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
*
* During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
* function will extract this change file for each partition.
*
* @param rollbackMetadata {@code HoodieRollbackMetadata}
* @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
*/
private static void processRollbackMetadata(HoodieRollbackMetadata rollbackMetadata,
Map> partitionToAppendedFiles) {
rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
// Has this rollback produced new files?
boolean hasRollbackLogFiles = pm.getRollbackLogFiles() != null && !pm.getRollbackLogFiles().isEmpty();
final String partition = pm.getPartitionPath();
final String partitionId = getPartitionIdentifierForFilesPartition(partition);
BiFunction fileMergeFn = (oldSize, newSizeCopy) -> {
// if a file exists in both written log files and rollback log files, we want to pick the one that is higher
// as rollback file could have been updated after written log files are computed.
return oldSize > newSizeCopy ? oldSize : newSizeCopy;
};
if (hasRollbackLogFiles) {
if (!partitionToAppendedFiles.containsKey(partitionId)) {
partitionToAppendedFiles.put(partitionId, new HashMap<>());
}
// Extract appended file name from the absolute paths saved in getAppendFiles()
pm.getRollbackLogFiles().forEach((path, size) -> {
String fileName = new StoragePath(path).getName();
partitionToAppendedFiles.get(partitionId).merge(fileName, size, fileMergeFn);
});
}
});
}
/**
* Convert rollback action metadata to files partition records.
*/
protected static List convertFilesToFilesPartitionRecords(Map> partitionToDeletedFiles,
Map> partitionToAppendedFiles,
String instantTime, String operation) {
List records = new ArrayList<>(partitionToDeletedFiles.size() + partitionToAppendedFiles.size());
int[] fileChangeCount = {0, 0}; // deletes, appends
partitionToDeletedFiles.forEach((partitionName, deletedFiles) -> {
fileChangeCount[0] += deletedFiles.size();
Map filesAdded = Collections.emptyMap();
if (partitionToAppendedFiles.containsKey(partitionName)) {
filesAdded = partitionToAppendedFiles.remove(partitionName);
}
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partitionName, filesAdded,
deletedFiles);
records.add(record);
});
partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> {
final String partition = getPartitionIdentifierForFilesPartition(partitionName);
fileChangeCount[1] += appendedFileMap.size();
// Validate that no appended file has been deleted
checkState(
!appendedFileMap.keySet().removeAll(partitionToDeletedFiles.getOrDefault(partition, Collections.emptyList())),
"Rollback file cannot both be appended and deleted");
// New files added to a partition
HoodieRecord record = HoodieMetadataPayload.createPartitionFilesRecord(partition, appendedFileMap,
Collections.emptyList());
records.add(record);
});
LOG.info("Found at {} from {}. #partitions_updated={}, #files_deleted={}, #files_appended={}",
instantTime, operation, records.size(), fileChangeCount[0], fileChangeCount[1]);
return records;
}
public static String getColumnStatsIndexPartitionIdentifier(String partitionName) {
return getPartitionIdentifier(partitionName);
}
public static String getBloomFilterIndexPartitionIdentifier(String partitionName) {
return getPartitionIdentifier(partitionName);
}
public static String getPartitionIdentifierForFilesPartition(String relativePartitionPath) {
return getPartitionIdentifier(relativePartitionPath);
}
/**
* Returns partition name for the given path.
*/
public static String getPartitionIdentifier(@Nonnull String relativePartitionPath) {
return EMPTY_PARTITION_NAME.equals(relativePartitionPath) ? NON_PARTITIONED_NAME : relativePartitionPath;
}
/**
* Convert added and deleted files metadata to bloom filter index records.
*/
public static HoodieData convertFilesToBloomFilterRecords(HoodieEngineContext engineContext,
Map> partitionToDeletedFiles,
Map> partitionToAppendedFiles,
String instantTime,
HoodieTableMetaClient dataMetaClient,
int bloomIndexParallelism,
String bloomFilterType) {
// Create the tuple (partition, filename, isDeleted) to handle both deletes and appends
final List> partitionFileFlagTupleList = fetchPartitionFileInfoTriplets(partitionToDeletedFiles, partitionToAppendedFiles);
// Create records MDT
int parallelism = Math.max(Math.min(partitionFileFlagTupleList.size(), bloomIndexParallelism), 1);
return engineContext.parallelize(partitionFileFlagTupleList, parallelism).flatMap(partitionFileFlagTuple -> {
final String partitionName = partitionFileFlagTuple.f0;
final String filename = partitionFileFlagTuple.f1;
final boolean isDeleted = partitionFileFlagTuple.f2;
if (!FSUtils.isBaseFile(new StoragePath(filename))) {
LOG.warn("Ignoring file {} as it is not a base file", filename);
return Stream.empty().iterator();
}
// Read the bloom filter from the base file if the file is being added
ByteBuffer bloomFilterBuffer = ByteBuffer.allocate(0);
if (!isDeleted) {
final String pathWithPartition = partitionName + "/" + filename;
final StoragePath addedFilePath = new StoragePath(dataMetaClient.getBasePath(), pathWithPartition);
bloomFilterBuffer = readBloomFilter(dataMetaClient.getStorage(), addedFilePath);
// If reading the bloom filter failed then do not add a record for this file
if (bloomFilterBuffer == null) {
LOG.error("Failed to read bloom filter from {}", addedFilePath);
return Stream.empty().iterator();
}
}
return Stream.of(HoodieMetadataPayload.createBloomFilterMetadataRecord(
partitionName, filename, instantTime, bloomFilterType, bloomFilterBuffer, partitionFileFlagTuple.f2))
.iterator();
});
}
/**
* Convert added and deleted action metadata to column stats index records.
*/
public static HoodieData convertFilesToColumnStatsRecords(HoodieEngineContext engineContext,
Map> partitionToDeletedFiles,
Map> partitionToAppendedFiles,
HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig,
int columnStatsIndexParallelism,
int maxReaderBufferSize) {
// Find the columns to index
final List columnsToIndex = getColumnsToIndex(dataMetaClient.getTableConfig(),
metadataConfig, Lazy.lazily(() -> tryResolveSchemaForTable(dataMetaClient)));
if (columnsToIndex.isEmpty()) {
// In case there are no columns to index, bail
LOG.warn("No columns to index for column stats index.");
return engineContext.emptyHoodieData();
}
LOG.info("Indexing {} columns for column stats index", columnsToIndex.size());
// Create the tuple (partition, filename, isDeleted) to handle both deletes and appends
final List> partitionFileFlagTupleList = fetchPartitionFileInfoTriplets(partitionToDeletedFiles, partitionToAppendedFiles);
// Create records MDT
int parallelism = Math.max(Math.min(partitionFileFlagTupleList.size(), columnStatsIndexParallelism), 1);
return engineContext.parallelize(partitionFileFlagTupleList, parallelism).flatMap(partitionFileFlagTuple -> {
final String partitionPath = partitionFileFlagTuple.f0;
final String filename = partitionFileFlagTuple.f1;
final boolean isDeleted = partitionFileFlagTuple.f2;
return getColumnStatsRecords(partitionPath, filename, dataMetaClient, columnsToIndex, isDeleted, maxReaderBufferSize).iterator();
});
}
private static ByteBuffer readBloomFilter(HoodieStorage storage, StoragePath filePath) throws IOException {
HoodieConfig hoodieConfig = getReaderConfigs(storage.getConf());
try (HoodieFileReader fileReader = HoodieIOFactory.getIOFactory(storage).getReaderFactory(HoodieRecordType.AVRO)
.getFileReader(hoodieConfig, filePath)) {
final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
if (fileBloomFilter == null) {
return null;
}
return ByteBuffer.wrap(getUTF8Bytes(fileBloomFilter.serializeToString()));
}
}
private static List> fetchPartitionFileInfoTriplets(
Map> partitionToDeletedFiles,
Map> partitionToAppendedFiles) {
// Total number of files which are added or deleted
final int totalFiles = partitionToDeletedFiles.values().stream().mapToInt(List::size).sum()
+ partitionToAppendedFiles.values().stream().mapToInt(Map::size).sum();
final List> partitionFileFlagTupleList = new ArrayList<>(totalFiles);
partitionToDeletedFiles.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(deletedFile -> Tuple3.of(entry.getKey(), deletedFile, true)))
.collect(Collectors.toCollection(() -> partitionFileFlagTupleList));
partitionToAppendedFiles.entrySet().stream()
.flatMap(
entry -> entry.getValue().keySet().stream().map(addedFile -> Tuple3.of(entry.getKey(), addedFile, false)))
.collect(Collectors.toCollection(() -> partitionFileFlagTupleList));
return partitionFileFlagTupleList;
}
/**
* Map a record key to a file group in partition of interest.
*
* Note: For hashing, the algorithm is same as String.hashCode() but is being defined here as hashCode()
* implementation is not guaranteed by the JVM to be consistent across JVM versions and implementations.
*
* @param recordKey record key for which the file group index is looked up for.
* @return An integer hash of the given string
*/
public static int mapRecordKeyToFileGroupIndex(String recordKey, int numFileGroups) {
int h = 0;
for (int i = 0; i < recordKey.length(); ++i) {
h = 31 * h + recordKey.charAt(i);
}
return Math.abs(Math.abs(h) % numFileGroups);
}
/**
* Get the latest file slices for a Metadata Table partition. If the file slice is
* because of pending compaction instant, then merge the file slice with the one
* just before the compaction instant time. The list of file slices returned is
* sorted in the correct order of file group name.
*
* @param metaClient Instance of {@link HoodieTableMetaClient}.
* @param fsView Metadata table filesystem view.
* @param partition The name of the partition whose file groups are to be loaded.
* @return List of latest file slices for all file groups in a given partition.
*/
public static List getPartitionLatestMergedFileSlices(
HoodieTableMetaClient metaClient, HoodieTableFileSystemView fsView, String partition) {
LOG.info("Loading latest merged file slices for metadata table partition {}", partition);
return getPartitionFileSlices(metaClient, Option.of(fsView), partition, true);
}
/**
* Get the latest file slices for a Metadata Table partition. The list of file slices
* returned is sorted in the correct order of file group name.
*
* @param metaClient - Instance of {@link HoodieTableMetaClient}.
* @param fsView - Metadata table filesystem view
* @param partition - The name of the partition whose file groups are to be loaded.
* @return List of latest file slices for all file groups in a given partition.
*/
public static List getPartitionLatestFileSlices(HoodieTableMetaClient metaClient,
Option fsView, String partition) {
LOG.info("Loading latest file slices for metadata table partition {}", partition);
return getPartitionFileSlices(metaClient, fsView, partition, false);
}
/**
* Get metadata table file system view.
*
* @param metaClient - Metadata table meta client
* @return Filesystem view for the metadata table
*/
public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) {
// If there are no commits on the metadata table then the table's
// default FileSystemView will not return any file slices even
// though we may have initialized them.
HoodieTimeline timeline = metaClient.getActiveTimeline();
TimelineFactory factory = metaClient.getTimelineLayout().getTimelineFactory();
if (timeline.empty()) {
final HoodieInstant instant = metaClient.createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION,
metaClient.createNewInstantTime(false));
timeline = factory.createDefaultTimeline(Stream.of(instant), metaClient.getActiveTimeline()::getInstantDetails);
}
return new HoodieTableFileSystemView(metaClient, timeline);
}
/**
* Get the latest file slices for a given partition.
*
* @param metaClient - Instance of {@link HoodieTableMetaClient}.
* @param partition - The name of the partition whose file groups are to be loaded.
* @param mergeFileSlices - When enabled, will merge the latest file slices with the last known
* completed instant. This is useful for readers when there are pending
* compactions. MergeFileSlices when disabled, will return the latest file
* slices without any merging, and this is needed for the writers.
* @return List of latest file slices for all file groups in a given partition.
*/
private static List getPartitionFileSlices(HoodieTableMetaClient metaClient,
Option fileSystemView,
String partition,
boolean mergeFileSlices) {
HoodieTableFileSystemView fsView = null;
try {
fsView = fileSystemView.orElseGet(() -> getFileSystemView(metaClient));
Stream fileSliceStream;
if (mergeFileSlices) {
if (metaClient.getActiveTimeline().filterCompletedInstants().lastInstant().isPresent()) {
fileSliceStream = fsView.getLatestMergedFileSlicesBeforeOrOn(
// including pending compaction instant as the last instant so that the finished delta commits
// that start earlier than the compaction can be queried.
partition, metaClient.getActiveTimeline().filterCompletedAndCompactionInstants().lastInstant().get().requestedTime());
} else {
return Collections.emptyList();
}
} else {
fileSliceStream = fsView.getLatestFileSlices(partition);
}
return fileSliceStream.sorted(Comparator.comparing(FileSlice::getFileId)).collect(Collectors.toList());
} finally {
if (!fileSystemView.isPresent()) {
fsView.close();
}
}
}
/**
* Get the latest file slices for a given partition including the inflight ones.
*
* @param metaClient - instance of {@link HoodieTableMetaClient}
* @param fileSystemView - hoodie table file system view, which will be fetched from meta client if not already present
* @param partition - name of the partition whose file groups are to be loaded
* @return
*/
public static List getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient,
Option fileSystemView,
String partition) {
HoodieTableFileSystemView fsView = null;
try {
fsView = fileSystemView.orElseGet(() -> getFileSystemView(metaClient));
Stream fileSliceStream = fsView.getLatestFileSlicesIncludingInflight(partition);
return fileSliceStream
.sorted(Comparator.comparing(FileSlice::getFileId))
.collect(Collectors.toList());
} finally {
if (!fileSystemView.isPresent()) {
fsView.close();
}
}
}
public static HoodieData convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata,
HoodieEngineContext engineContext,
HoodieTableMetaClient dataMetaClient,
HoodieMetadataConfig metadataConfig) {
List allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream()
.flatMap(Collection::stream).collect(Collectors.toList());
if (allWriteStats.isEmpty()) {
return engineContext.emptyHoodieData();
}
try {
Option writerSchema =
Option.ofNullable(commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY))
.flatMap(writerSchemaStr ->
isNullOrEmpty(writerSchemaStr)
? Option.empty()
: Option.of(new Schema.Parser().parse(writerSchemaStr)));
HoodieTableConfig tableConfig = dataMetaClient.getTableConfig();
// NOTE: Writer schema added to commit metadata will not contain Hudi's metadata fields
Option tableSchema = writerSchema.map(schema ->
tableConfig.populateMetaFields() ? addMetadataFields(schema) : schema);
List columnsToIndex = getColumnsToIndex(dataMetaClient.getTableConfig(), metadataConfig,
Lazy.eagerly(tableSchema));
if (columnsToIndex.isEmpty()) {
// In case there are no columns to index, bail
return engineContext.emptyHoodieData();
}
int parallelism = Math.max(Math.min(allWriteStats.size(), metadataConfig.getColumnStatsIndexParallelism()), 1);
return engineContext.parallelize(allWriteStats, parallelism)
.flatMap(writeStat ->
translateWriteStatToColumnStats(writeStat, dataMetaClient, columnsToIndex).iterator());
} catch (Exception e) {
throw new HoodieException("Failed to generate column stats records for metadata table", e);
}
}
@VisibleForTesting
static final String[] META_COLS_TO_ALWAYS_INDEX = {COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD};
@VisibleForTesting
public static final Set META_COL_SET_TO_INDEX = new HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
public static List getColumnsToIndex(HoodieTableConfig tableConfig,
HoodieMetadataConfig metadataConfig,
List columnNames) {
return getColumnsToIndex(tableConfig, metadataConfig, Either.left(columnNames), Option.empty());
}
public static List getColumnsToIndex(HoodieTableConfig tableConfig,
HoodieMetadataConfig metadataConfig,
Lazy