org.apache.hudi.metadata.HoodieMetadataPayload Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.metadata;
import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.avro.model.HoodieRecordIndexInfo;
import org.apache.hudi.avro.model.HoodieSecondaryIndexInfo;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordGlobalLocation;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.hash.ColumnIndexID;
import org.apache.hudi.common.util.hash.FileIndexID;
import org.apache.hudi.common.util.hash.PartitionIndexID;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.util.Lazy;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getBloomFilterIndexPartitionIdentifier;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStatsIndexPartitionIdentifier;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getLocationFromRecordIndexInfo;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionIdentifierForFilesPartition;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getPartitionStatsIndexKey;
/**
* MetadataTable records are persisted with the schema defined in HoodieMetadata.avsc.
* This class represents the payload for the MetadataTable.
*
* This single metadata payload is shared by all the partitions under the metadata table.
* The partition specific records are determined by the field "type" saved within the record.
* The following types are supported:
*
* METADATA_TYPE_PARTITION_LIST (1):
* -- List of all partitions. There is a single such record
* -- key = @{@link HoodieTableMetadata#RECORDKEY_PARTITION_LIST}
*
* METADATA_TYPE_FILE_LIST (2):
* -- List of all files in a partition. There is one such record for each partition
* -- key = partition name
*
* METADATA_TYPE_COLUMN_STATS (3):
* -- This is an index for column stats in the table
*
* METADATA_TYPE_BLOOM_FILTER (4):
* -- This is an index for base file bloom filters. This is a map of FileID to its BloomFilter byte[].
*
* During compaction on the table, the deletions are merged with additions and hence records are pruned.
*/
public class HoodieMetadataPayload implements HoodieRecordPayload {
/**
* HoodieMetadata schema field ids
*/
public static final String KEY_FIELD_NAME = HoodieAvroHFileReaderImplBase.KEY_FIELD_NAME;
public static final String SCHEMA_FIELD_NAME_TYPE = "type";
public static final String SCHEMA_FIELD_NAME_METADATA = "filesystemMetadata";
public static final String SCHEMA_FIELD_ID_COLUMN_STATS = "ColumnStatsMetadata";
public static final String SCHEMA_FIELD_ID_BLOOM_FILTER = "BloomFilterMetadata";
public static final String SCHEMA_FIELD_ID_RECORD_INDEX = "recordIndexMetadata";
public static final String SCHEMA_FIELD_ID_SECONDARY_INDEX = "SecondaryIndexMetadata";
/**
* HoodieMetadata bloom filter payload field ids
*/
public static final String FIELD_IS_DELETED = "isDeleted";
public static final String BLOOM_FILTER_FIELD_TYPE = "type";
public static final String BLOOM_FILTER_FIELD_TIMESTAMP = "timestamp";
public static final String BLOOM_FILTER_FIELD_BLOOM_FILTER = "bloomFilter";
public static final String BLOOM_FILTER_FIELD_IS_DELETED = FIELD_IS_DELETED;
/**
* HoodieMetadata column stats payload field ids
*/
public static final String COLUMN_STATS_FIELD_MIN_VALUE = "minValue";
public static final String COLUMN_STATS_FIELD_MAX_VALUE = "maxValue";
public static final String COLUMN_STATS_FIELD_NULL_COUNT = "nullCount";
public static final String COLUMN_STATS_FIELD_VALUE_COUNT = "valueCount";
public static final String COLUMN_STATS_FIELD_TOTAL_SIZE = "totalSize";
public static final String COLUMN_STATS_FIELD_FILE_NAME = "fileName";
public static final String COLUMN_STATS_FIELD_COLUMN_NAME = "columnName";
public static final String COLUMN_STATS_FIELD_TOTAL_UNCOMPRESSED_SIZE = "totalUncompressedSize";
public static final String COLUMN_STATS_FIELD_IS_DELETED = FIELD_IS_DELETED;
public static final String COLUMN_STATS_FIELD_IS_TIGHT_BOUND = "isTightBound";
/**
* HoodieMetadata record index payload field ids
*/
public static final String RECORD_INDEX_FIELD_PARTITION = "partitionName";
public static final String RECORD_INDEX_FIELD_FILEID_HIGH_BITS = "fileIdHighBits";
public static final String RECORD_INDEX_FIELD_FILEID_LOW_BITS = "fileIdLowBits";
public static final String RECORD_INDEX_FIELD_FILE_INDEX = "fileIndex";
public static final String RECORD_INDEX_FIELD_INSTANT_TIME = "instantTime";
public static final String RECORD_INDEX_FIELD_FILEID = "fileId";
public static final String RECORD_INDEX_FIELD_FILEID_ENCODING = "fileIdEncoding";
public static final int RECORD_INDEX_FIELD_FILEID_ENCODING_UUID = 0;
public static final int RECORD_INDEX_FIELD_FILEID_ENCODING_RAW_STRING = 1;
public static final String RECORD_INDEX_FIELD_POSITION = "position";
/**
* FileIndex value saved in record index record when the fileId has no index (old format of base filename)
*/
public static final int RECORD_INDEX_MISSING_FILEINDEX_FALLBACK = -1;
/**
* HoodieMetadata secondary index payload field ids
*/
public static final String SECONDARY_INDEX_RECORD_KEY_ESCAPE_CHAR = "\\";
public static final String SECONDARY_INDEX_RECORD_KEY_SEPARATOR = "$";
public static final String SECONDARY_INDEX_FIELD_IS_DELETED = FIELD_IS_DELETED;
/**
* NOTE: PLEASE READ CAREFULLY
*
* In Avro 1.10 generated builders rely on {@code SpecificData.getForSchema} invocation that in turn
* does use reflection to load the code-gen'd class corresponding to the Avro record model. This has
* serious adverse effects in terms of performance when gets executed on the hot-path (both, in terms
* of runtime and efficiency).
*
* To work this around instead of using default code-gen'd builder invoking {@code SpecificData.getForSchema},
* we instead rely on overloaded ctor accepting another instance of the builder: {@code Builder(Builder)},
* which bypasses such invocation. Following corresponding builder's stubs are statically initialized
* to be used exactly for that purpose.
*
* You can find more details in HUDI-3834.
*/
public static final Lazy METADATA_COLUMN_STATS_BUILDER_STUB = Lazy.lazily(HoodieMetadataColumnStats::newBuilder);
private static final HoodieMetadataFileInfo DELETE_FILE_METADATA = new HoodieMetadataFileInfo(0L, true);
protected String key = null;
protected int type = 0;
protected Map filesystemMetadata = null;
protected HoodieMetadataBloomFilter bloomFilterMetadata = null;
protected HoodieMetadataColumnStats columnStatMetadata = null;
protected HoodieRecordIndexInfo recordIndexMetadata;
protected HoodieSecondaryIndexInfo secondaryIndexMetadata;
private boolean isDeletedRecord = false;
public HoodieMetadataPayload(@Nullable GenericRecord record, Comparable> orderingVal) {
this(Option.ofNullable(record));
}
public HoodieMetadataPayload(Option recordOpt) {
if (recordOpt.isPresent()) {
GenericRecord record = recordOpt.get();
// This can be simplified using SpecificData.deepcopy once this bug is fixed
// https://issues.apache.org/jira/browse/AVRO-1811
//
// NOTE: {@code HoodieMetadataRecord} has to always carry both "key" and "type" fields
// for it to be handled appropriately, therefore these fields have to be reflected
// in any (read-)projected schema
key = record.get(KEY_FIELD_NAME).toString();
type = (int) record.get(SCHEMA_FIELD_NAME_TYPE);
MetadataPartitionType.get(type).constructMetadataPayload(this, record);
} else {
this.isDeletedRecord = true;
}
}
protected HoodieMetadataPayload(String key, int type, Map filesystemMetadata) {
this(key, type, filesystemMetadata, null, null, null, null, false);
}
protected HoodieMetadataPayload(String key, HoodieMetadataBloomFilter metadataBloomFilter) {
this(key, MetadataPartitionType.BLOOM_FILTERS.getRecordType(), null, metadataBloomFilter, null, null, null, metadataBloomFilter.getIsDeleted());
}
protected HoodieMetadataPayload(String key, HoodieMetadataColumnStats columnStats, int recordType) {
this(key, recordType, null, null, columnStats, null, null, columnStats.getIsDeleted());
}
private HoodieMetadataPayload(String key, HoodieRecordIndexInfo recordIndexMetadata) {
this(key, MetadataPartitionType.RECORD_INDEX.getRecordType(), null, null, null, recordIndexMetadata, null, false);
}
protected HoodieMetadataPayload(String key, HoodieSecondaryIndexInfo secondaryIndexMetadata) {
this(key, MetadataPartitionType.SECONDARY_INDEX.getRecordType(), null, null, null, null, secondaryIndexMetadata, secondaryIndexMetadata.getIsDeleted());
}
protected HoodieMetadataPayload(String key, int type,
Map filesystemMetadata,
HoodieMetadataBloomFilter metadataBloomFilter,
HoodieMetadataColumnStats columnStats,
HoodieRecordIndexInfo recordIndexMetadata,
HoodieSecondaryIndexInfo secondaryIndexMetadata,
boolean isDeletedRecord) {
this.key = key;
this.type = type;
this.filesystemMetadata = filesystemMetadata;
this.bloomFilterMetadata = metadataBloomFilter;
this.columnStatMetadata = columnStats;
this.recordIndexMetadata = recordIndexMetadata;
this.secondaryIndexMetadata = secondaryIndexMetadata;
this.isDeletedRecord = isDeletedRecord;
}
/**
* Create and return a {@code HoodieMetadataPayload} to save list of partitions.
*
* @param partitions The list of partitions
*/
public static HoodieRecord createPartitionListRecord(List partitions) {
return createPartitionListRecord(partitions, false);
}
/**
* Create and return a {@code HoodieMetadataPayload} to save list of partitions.
*
* @param partitions The list of partitions
*/
public static HoodieRecord createPartitionListRecord(List partitions, boolean isDeleted) {
Map fileInfo = new HashMap<>();
partitions.forEach(partition -> fileInfo.put(getPartitionIdentifierForFilesPartition(partition), new HoodieMetadataFileInfo(0L, isDeleted)));
HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.ALL_PARTITIONS.getPartitionPath());
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), MetadataPartitionType.ALL_PARTITIONS.getRecordType(), fileInfo);
return new HoodieAvroRecord<>(key, payload);
}
/**
* Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
*
* @param partition The name of the partition
* @param filesAdded Mapping of files to their sizes for files which have been added to this partition
* @param filesDeleted List of files which have been deleted from this partition
*/
public static HoodieRecord createPartitionFilesRecord(String partition,
Map filesAdded,
List filesDeleted) {
String partitionIdentifier = getPartitionIdentifierForFilesPartition(partition);
int size = filesAdded.size() + filesDeleted.size();
Map fileInfo = new HashMap<>(size, 1);
filesAdded.forEach((fileName, fileSize) -> fileInfo.put(fileName, new HoodieMetadataFileInfo(fileSize, false)));
filesDeleted.forEach(fileName -> fileInfo.put(fileName, DELETE_FILE_METADATA));
HoodieKey key = new HoodieKey(partitionIdentifier, MetadataPartitionType.FILES.getPartitionPath());
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), MetadataPartitionType.FILES.getRecordType(), fileInfo);
return new HoodieAvroRecord<>(key, payload);
}
public static HoodieRecord createBloomFilterMetadataRecord(final String partitionName,
final String baseFileName,
final String timestamp,
final String bloomFilterType,
final ByteBuffer bloomFilter,
final boolean isDeleted) {
return createBloomFilterMetadataRecord(partitionName, baseFileName, timestamp, bloomFilterType, bloomFilter, isDeleted, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
}
/**
* Create bloom filter metadata record.
*
* @param partitionName - Partition name
* @param baseFileName - Base file name for which the bloom filter needs to persisted
* @param timestamp - Instant timestamp responsible for this record
* @param bloomFilter - Bloom filter for the File
* @param isDeleted - Is the bloom filter no more valid
* @return Metadata payload containing the fileID and its bloom filter record
*/
public static HoodieRecord createBloomFilterMetadataRecord(final String partitionName,
final String baseFileName,
final String timestamp,
final String bloomFilterType,
final ByteBuffer bloomFilter,
final boolean isDeleted,
String metadataPartitionName) {
checkArgument(!baseFileName.contains(StoragePath.SEPARATOR)
&& FSUtils.isBaseFile(new StoragePath(baseFileName)),
"Invalid base file '" + baseFileName + "' for MetaIndexBloomFilter!");
final String bloomFilterIndexKey = getBloomFilterRecordKey(partitionName, baseFileName);
HoodieKey key = new HoodieKey(bloomFilterIndexKey, metadataPartitionName);
HoodieMetadataBloomFilter metadataBloomFilter =
new HoodieMetadataBloomFilter(bloomFilterType, timestamp, bloomFilter, isDeleted);
HoodieMetadataPayload metadataPayload = new HoodieMetadataPayload(key.getRecordKey(), metadataBloomFilter);
return new HoodieAvroRecord<>(key, metadataPayload);
}
@Override
public HoodieMetadataPayload preCombine(HoodieMetadataPayload previousRecord) {
if (this.isDeletedRecord) {
// This happens when a record has been deleted. The previous version of the record should be ignored.
return this;
}
if (previousRecord.isDeletedRecord) {
// This happens when a record with same key is added after a deletion.
return this;
}
// Validation of record merge scenario. Only records of same type and key can be combined.
checkArgument(previousRecord.type == type,
"Cannot combine " + previousRecord.type + " with " + type);
checkArgument(previousRecord.key.equals(key),
"Cannot combine " + previousRecord.key + " with " + key + " as the keys differ");
return MetadataPartitionType.get(type).combineMetadataPayloads(previousRecord, this);
}
private static String getBloomFilterRecordKey(String partitionName, String fileName) {
return new PartitionIndexID(getBloomFilterIndexPartitionIdentifier(partitionName)).asBase64EncodedString()
.concat(new FileIndexID(fileName).asBase64EncodedString());
}
public static Option> combineSecondaryIndexRecord(
HoodieRecord oldRecord,
HoodieRecord newRecord) {
// If the new record is tombstone, we can discard it
if (newRecord.getData().isDeleted() || newRecord.getData().secondaryIndexMetadata.getIsDeleted()) {
return Option.empty();
}
return Option.of(newRecord);
}
@Override
public Option combineAndGetUpdateValue(IndexedRecord oldRecord, Schema schema, Properties properties) throws IOException {
HoodieMetadataPayload anotherPayload = new HoodieMetadataPayload(Option.of((GenericRecord) oldRecord));
HoodieRecordPayload combinedPayload = preCombine(anotherPayload);
return combinedPayload.getInsertValue(schema, properties);
}
@Override
public Option combineAndGetUpdateValue(IndexedRecord oldRecord, Schema schema) throws IOException {
return combineAndGetUpdateValue(oldRecord, schema, new Properties());
}
@Override
public Option getInsertValue(Schema schemaIgnored, Properties propertiesIgnored) throws IOException {
if (key == null || this.isDeletedRecord) {
return Option.empty();
}
HoodieMetadataRecord record = new HoodieMetadataRecord(key, type, filesystemMetadata, bloomFilterMetadata,
columnStatMetadata, recordIndexMetadata, secondaryIndexMetadata);
return Option.of(record);
}
@Override
public Option getInsertValue(Schema schema) throws IOException {
return getInsertValue(schema, new Properties());
}
/**
* Returns the list of filenames added as part of this record.
*/
public List getFilenames() {
return filterFileInfoEntries(false).map(Map.Entry::getKey).sorted().collect(Collectors.toList());
}
/**
* Returns the list of filenames deleted as part of this record.
*/
public List getDeletions() {
return filterFileInfoEntries(true).map(Map.Entry::getKey).sorted().collect(Collectors.toList());
}
/**
* Get the bloom filter metadata from this payload.
*/
public Option getBloomFilterMetadata() {
if (bloomFilterMetadata == null) {
return Option.empty();
}
return Option.of(bloomFilterMetadata);
}
/**
* Get the bloom filter metadata from this payload.
*/
public Option getColumnStatMetadata() {
if (columnStatMetadata == null) {
return Option.empty();
}
return Option.of(columnStatMetadata);
}
/**
* Returns the files added as part of this record.
*/
public List getFileList(HoodieStorage storage, StoragePath partitionPath) {
long blockSize = storage.getDefaultBlockSize(partitionPath);
return filterFileInfoEntries(false)
.map(e -> {
// NOTE: Since we know that the Metadata Table's Payload is simply a file-name we're
// creating Hadoop's Path using more performant unsafe variant
return new StoragePathInfo(new StoragePath(partitionPath, e.getKey()), e.getValue().getSize(),
false, (short) 0, blockSize, 0);
})
.collect(Collectors.toList());
}
private Stream> filterFileInfoEntries(boolean isDeleted) {
if (filesystemMetadata == null) {
return Stream.empty();
}
return filesystemMetadata.entrySet().stream().filter(e -> e.getValue().getIsDeleted() == isDeleted);
}
/**
* Get bloom filter index key.
*
* @param partitionIndexID - Partition index id
* @param fileIndexID - File index id
* @return Bloom filter index key
*/
public static String getBloomFilterIndexKey(PartitionIndexID partitionIndexID, FileIndexID fileIndexID) {
return partitionIndexID.asBase64EncodedString()
.concat(fileIndexID.asBase64EncodedString());
}
/**
* Get column stats index key.
*
* @param partitionIndexID - Partition index id
* @param fileIndexID - File index id
* @param columnIndexID - Column index id
* @return Column stats index key
*/
public static String getColumnStatsIndexKey(PartitionIndexID partitionIndexID, FileIndexID fileIndexID, ColumnIndexID columnIndexID) {
return columnIndexID.asBase64EncodedString()
.concat(partitionIndexID.asBase64EncodedString())
.concat(fileIndexID.asBase64EncodedString());
}
/**
* Get column stats index key from the column range metadata.
*
* @param partitionName - Partition name
* @param columnRangeMetadata - Column range metadata
* @return Column stats index key
*/
public static String getColumnStatsIndexKey(String partitionName, HoodieColumnRangeMetadata columnRangeMetadata) {
final PartitionIndexID partitionIndexID = new PartitionIndexID(getColumnStatsIndexPartitionIdentifier(partitionName));
final FileIndexID fileIndexID = new FileIndexID(new StoragePath(columnRangeMetadata.getFilePath()).getName());
final ColumnIndexID columnIndexID = new ColumnIndexID(columnRangeMetadata.getColumnName());
return getColumnStatsIndexKey(partitionIndexID, fileIndexID, columnIndexID);
}
public static Stream createColumnStatsRecords(String partitionName,
Collection> columnRangeMetadataList,
boolean isDeleted) {
return columnRangeMetadataList.stream().map(
columnRangeMetadata -> createColumnStatsRecord(partitionName, columnRangeMetadata, isDeleted,
MetadataPartitionType.COLUMN_STATS.getPartitionPath(), MetadataPartitionType.COLUMN_STATS.getRecordType()));
}
public static Stream createColumnStatsRecords(String partitionName,
Collection> columnRangeMetadataList,
boolean isDeleted,
String metadataPartitionName,
int recordType) {
return columnRangeMetadataList.stream().map(
columnRangeMetadata -> createColumnStatsRecord(partitionName, columnRangeMetadata, isDeleted,
metadataPartitionName, recordType));
}
private static HoodieAvroRecord createColumnStatsRecord(String partitionName,
HoodieColumnRangeMetadata columnRangeMetadata,
boolean isDeleted,
String metadataPartitionName,
int recordType) {
HoodieKey key = new HoodieKey(getColumnStatsIndexKey(partitionName, columnRangeMetadata), metadataPartitionName);
HoodieMetadataPayload payload = new HoodieMetadataPayload(
key.getRecordKey(),
HoodieMetadataColumnStats.newBuilder()
.setFileName(new StoragePath(columnRangeMetadata.getFilePath()).getName())
.setColumnName(columnRangeMetadata.getColumnName())
.setMinValue(wrapValueIntoAvro(columnRangeMetadata.getMinValue()))
.setMaxValue(wrapValueIntoAvro(columnRangeMetadata.getMaxValue()))
.setNullCount(columnRangeMetadata.getNullCount())
.setValueCount(columnRangeMetadata.getValueCount())
.setTotalSize(columnRangeMetadata.getTotalSize())
.setTotalUncompressedSize(columnRangeMetadata.getTotalUncompressedSize())
.setIsDeleted(isDeleted)
.build(),
recordType);
return new HoodieAvroRecord<>(key, payload);
}
public static Stream createPartitionStatsRecords(String partitionPath,
Collection> columnRangeMetadataList,
boolean isDeleted, boolean isTightBound) {
return columnRangeMetadataList.stream().map(columnRangeMetadata -> {
HoodieKey key = new HoodieKey(getPartitionStatsIndexKey(partitionPath, columnRangeMetadata.getColumnName()),
MetadataPartitionType.PARTITION_STATS.getPartitionPath());
HoodieMetadataPayload payload = new HoodieMetadataPayload(
key.getRecordKey(),
HoodieMetadataColumnStats.newBuilder()
.setFileName(columnRangeMetadata.getFilePath())
.setColumnName(columnRangeMetadata.getColumnName())
.setMinValue(wrapValueIntoAvro(columnRangeMetadata.getMinValue()))
.setMaxValue(wrapValueIntoAvro(columnRangeMetadata.getMaxValue()))
.setNullCount(columnRangeMetadata.getNullCount())
.setValueCount(columnRangeMetadata.getValueCount())
.setTotalSize(columnRangeMetadata.getTotalSize())
.setTotalUncompressedSize(columnRangeMetadata.getTotalUncompressedSize())
.setIsDeleted(isDeleted)
.setIsTightBound(isTightBound)
.build(),
MetadataPartitionType.PARTITION_STATS.getRecordType());
return new HoodieAvroRecord<>(key, payload);
});
}
/**
* Create and return a {@code HoodieMetadataPayload} to insert or update an entry for the record index.
*
* Each entry maps the key of a single record in HUDI to its location.
*
* @param recordKey Key of the record
* @param partition Name of the partition which contains the record
* @param fileId fileId which contains the record
* @param instantTime instantTime when the record was added
*/
public static HoodieRecord createRecordIndexUpdate(String recordKey, String partition,
String fileId, String instantTime, int fileIdEncoding) {
HoodieKey key = new HoodieKey(recordKey, MetadataPartitionType.RECORD_INDEX.getPartitionPath());
long instantTimeMillis = -1;
try {
instantTimeMillis = TimelineUtils.parseDateFromInstantTime(instantTime).getTime();
} catch (Exception e) {
throw new HoodieMetadataException("Failed to create metadata payload for record index. Instant time parsing for " + instantTime + " failed ", e);
}
if (fileIdEncoding == 0) {
// Data file names have a -D suffix to denote the index (D = integer) of the file written
// In older HUID versions the file index was missing
final UUID uuid;
final int fileIndex;
try {
if (fileId.length() == 36) {
uuid = UUID.fromString(fileId);
fileIndex = RECORD_INDEX_MISSING_FILEINDEX_FALLBACK;
} else {
final int index = fileId.lastIndexOf("-");
uuid = UUID.fromString(fileId.substring(0, index));
fileIndex = Integer.parseInt(fileId.substring(index + 1));
}
} catch (Exception e) {
throw new HoodieMetadataException(String.format("Invalid UUID or index: fileID=%s, partition=%s, instantTIme=%s",
fileId, partition, instantTime), e);
}
HoodieMetadataPayload payload = new HoodieMetadataPayload(recordKey,
new HoodieRecordIndexInfo(
partition,
uuid.getMostSignificantBits(),
uuid.getLeastSignificantBits(),
fileIndex,
EMPTY_STRING,
instantTimeMillis,
0,
null));
return new HoodieAvroRecord<>(key, payload);
} else {
HoodieMetadataPayload payload = new HoodieMetadataPayload(recordKey,
new HoodieRecordIndexInfo(
partition,
-1L,
-1L,
-1,
fileId,
instantTimeMillis,
1,
null));
return new HoodieAvroRecord<>(key, payload);
}
}
/**
* Create and return a {@code HoodieMetadataPayload} to insert or update an entry for the secondary index.
*
* Each entry maps the secondary key of a single record in HUDI to its record (or primary) key
*
* @param recordKey Primary key of the record
* @param secondaryKey Secondary key of the record
* @param isDeleted true if this record is deleted
*/
public static HoodieRecord createSecondaryIndexRecord(String recordKey, String secondaryKey, String partitionPath, Boolean isDeleted) {
// the payload key is in the format of "secondaryKey$primaryKey"
HoodieKey key = new HoodieKey(SecondaryIndexKeyUtils.constructSecondaryIndexKey(secondaryKey, recordKey), partitionPath);
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), new HoodieSecondaryIndexInfo(isDeleted));
return new HoodieAvroRecord<>(key, payload);
}
public boolean isSecondaryIndexDeleted() {
return secondaryIndexMetadata.getIsDeleted();
}
/**
* Create and return a {@code HoodieMetadataPayload} to delete a record in the Metadata Table's record index.
*
* @param recordKey Key of the record to be deleted
*/
public static HoodieRecord createRecordIndexDelete(String recordKey) {
HoodieKey key = new HoodieKey(recordKey, MetadataPartitionType.RECORD_INDEX.getPartitionPath());
return new HoodieAvroRecord<>(key, new EmptyHoodieRecordPayload());
}
/**
* If this is a record-level index entry, returns the file to which this is mapped.
*/
public HoodieRecordGlobalLocation getRecordGlobalLocation() {
return getLocationFromRecordIndexInfo(recordIndexMetadata);
}
public boolean isDeleted() {
return isDeletedRecord;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
} else if (!(other instanceof HoodieMetadataPayload)) {
return false;
}
HoodieMetadataPayload otherMetadataPayload = (HoodieMetadataPayload) other;
return this.type == otherMetadataPayload.type
&& Objects.equals(this.key, otherMetadataPayload.key)
&& Objects.equals(this.filesystemMetadata, otherMetadataPayload.filesystemMetadata)
&& Objects.equals(this.bloomFilterMetadata, otherMetadataPayload.bloomFilterMetadata)
&& Objects.equals(this.columnStatMetadata, otherMetadataPayload.columnStatMetadata)
&& Objects.equals(this.recordIndexMetadata, otherMetadataPayload.recordIndexMetadata);
}
@Override
public int hashCode() {
return Objects.hash(key, type, filesystemMetadata, bloomFilterMetadata, columnStatMetadata);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieMetadataPayload {");
sb.append(KEY_FIELD_NAME + "=").append(key).append(", ");
sb.append(SCHEMA_FIELD_NAME_TYPE + "=").append(type).append(", ");
if (type == MetadataPartitionType.FILES.getRecordType() || type == MetadataPartitionType.ALL_PARTITIONS.getRecordType()) {
sb.append("Files: {");
sb.append("creations=").append(Arrays.toString(getFilenames().toArray())).append(", ");
sb.append("deletions=").append(Arrays.toString(getDeletions().toArray())).append(", ");
sb.append("}");
} else if (type == MetadataPartitionType.BLOOM_FILTERS.getRecordType()) {
checkState(getBloomFilterMetadata().isPresent());
sb.append("BloomFilter: {");
sb.append("bloom size: ").append(getBloomFilterMetadata().get().getBloomFilter().array().length).append(", ");
sb.append("timestamp: ").append(getBloomFilterMetadata().get().getTimestamp()).append(", ");
sb.append("deleted: ").append(getBloomFilterMetadata().get().getIsDeleted());
sb.append("}");
} else if (type == MetadataPartitionType.COLUMN_STATS.getRecordType()) {
checkState(getColumnStatMetadata().isPresent());
sb.append("ColStats: {");
sb.append(getColumnStatMetadata().get());
sb.append("}");
} else if (type == MetadataPartitionType.RECORD_INDEX.getRecordType()) {
sb.append("RecordIndex: {");
sb.append("location=").append(getRecordGlobalLocation());
sb.append("}");
}
sb.append('}');
return sb.toString();
}
}