Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hudi.metadata.HoodieBackedTableMetadata Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.metadata;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.data.HoodieListData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.InstantComparison;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.SpillableMapUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.expression.BindVisitor;
import org.apache.hudi.expression.Expression;
import org.apache.hudi.internal.schema.Types;
import org.apache.hudi.io.storage.HoodieIOFactory;
import org.apache.hudi.io.storage.HoodieSeekingFileReader;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.util.Transient;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES;
import static org.apache.hudi.common.util.CollectionUtils.toStream;
import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FILES;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getFileSystemView;
/**
* Table metadata provided by an internal DFS backed Hudi metadata table.
*/
public class HoodieBackedTableMetadata extends BaseTableMetadata {
private static final Logger LOG = LoggerFactory.getLogger(HoodieBackedTableMetadata.class);
private final String metadataBasePath;
private HoodieTableMetaClient metadataMetaClient;
private HoodieTableConfig metadataTableConfig;
private HoodieTableFileSystemView metadataFileSystemView;
// should we reuse the open file handles, across calls
private final boolean reuse;
// Readers for the latest file slice corresponding to file groups in the metadata partition
private final Transient, Pair, HoodieMetadataLogRecordReader>>> partitionReaders =
Transient.lazy(ConcurrentHashMap::new);
// Latest file slices in the metadata partitions
private final Map> partitionFileSliceMap = new ConcurrentHashMap<>();
public HoodieBackedTableMetadata(HoodieEngineContext engineContext,
HoodieStorage storage,
HoodieMetadataConfig metadataConfig,
String datasetBasePath) {
this(engineContext, storage, metadataConfig, datasetBasePath, false);
}
public HoodieBackedTableMetadata(HoodieEngineContext engineContext,
HoodieStorage storage,
HoodieMetadataConfig metadataConfig,
String datasetBasePath, boolean reuse) {
super(engineContext, storage, metadataConfig, datasetBasePath);
this.reuse = reuse;
this.metadataBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataBasePath.toString());
initIfNeeded();
}
private void initIfNeeded() {
if (!isMetadataTableInitialized) {
if (!HoodieTableMetadata.isMetadataTable(metadataBasePath)) {
LOG.info("Metadata table is disabled.");
}
} else if (this.metadataMetaClient == null) {
try {
this.metadataMetaClient = HoodieTableMetaClient.builder()
.setStorage(storage)
.setBasePath(metadataBasePath)
.build();
this.metadataFileSystemView = getFileSystemView(metadataMetaClient);
this.metadataTableConfig = metadataMetaClient.getTableConfig();
} catch (TableNotFoundException e) {
LOG.warn("Metadata table was not found at path " + metadataBasePath);
this.isMetadataTableInitialized = false;
this.metadataMetaClient = null;
this.metadataFileSystemView = null;
this.metadataTableConfig = null;
} catch (Exception e) {
LOG.error("Failed to initialize metadata table at path " + metadataBasePath, e);
this.isMetadataTableInitialized = false;
this.metadataMetaClient = null;
this.metadataFileSystemView = null;
this.metadataTableConfig = null;
}
}
}
@Override
protected Option> getRecordByKey(String key, String partitionName) {
Map> recordsByKeys = getRecordsByKeys(Collections.singletonList(key), partitionName);
return Option.ofNullable(recordsByKeys.get(key));
}
@Override
public List getPartitionPathWithPathPrefixUsingFilterExpression(List relativePathPrefixes,
Types.RecordType partitionFields,
Expression expression) throws IOException {
Expression boundedExpr = expression.accept(new BindVisitor(partitionFields, caseSensitive));
List selectedPartitionPaths = getPartitionPathWithPathPrefixes(relativePathPrefixes);
// Can only prune partitions if the number of partition levels matches partition fields
// Here we'll check the first selected partition to see whether the numbers match.
if (hiveStylePartitioningEnabled
&& getPathPartitionLevel(partitionFields, selectedPartitionPaths.get(0)) == partitionFields.fields().size()) {
return selectedPartitionPaths.stream()
.filter(p ->
(boolean) boundedExpr.eval(extractPartitionValues(partitionFields, p, urlEncodePartitioningEnabled)))
.collect(Collectors.toList());
}
return selectedPartitionPaths;
}
@Override
public List getPartitionPathWithPathPrefixes(List relativePathPrefixes) throws IOException {
// TODO: consider skipping this method for non-partitioned table and simplify the checks
return getAllPartitionPaths().stream()
.filter(p -> relativePathPrefixes.stream().anyMatch(relativePathPrefix ->
// Partition paths stored in metadata table do not have the slash at the end.
// If the relativePathPrefix is empty, return all partition paths;
// else if the relative path prefix is the same as the path, this is an exact match;
// else, we need to make sure the path is a subdirectory of relativePathPrefix, by
// checking if the path starts with relativePathPrefix appended by a slash ("/").
StringUtils.isNullOrEmpty(relativePathPrefix)
|| p.equals(relativePathPrefix) || p.startsWith(relativePathPrefix + "/")))
.collect(Collectors.toList());
}
@Override
public HoodieData> getRecordsByKeyPrefixes(List keyPrefixes,
String partitionName,
boolean shouldLoadInMemory) {
// Sort the prefixes so that keys are looked up in order
List sortedKeyPrefixes = new ArrayList<>(keyPrefixes);
Collections.sort(sortedKeyPrefixes);
// NOTE: Since we partition records to a particular file-group by full key, we will have
// to scan all file-groups for all key-prefixes as each of these might contain some
// records matching the key-prefix
List partitionFileSlices = partitionFileSliceMap.computeIfAbsent(partitionName,
k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, metadataFileSystemView, partitionName));
checkState(!partitionFileSlices.isEmpty(), "Number of file slices for partition " + partitionName + " should be > 0");
return (shouldLoadInMemory ? HoodieListData.lazy(partitionFileSlices) :
getEngineContext().parallelize(partitionFileSlices))
.flatMap(
(SerializableFunction>>) fileSlice -> {
// NOTE: Since this will be executed by executors, we can't access previously cached
// readers, and therefore have to always open new ones
Pair, HoodieMetadataLogRecordReader> readers =
openReaders(partitionName, fileSlice);
try {
List timings = new ArrayList<>();
HoodieSeekingFileReader> baseFileReader = readers.getKey();
HoodieMetadataLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
// TODO: what do we do if both does not exist? should we throw an exception and let caller do the fallback ?
return Collections.emptyIterator();
}
boolean fullKeys = false;
Map> logRecords =
readLogRecords(logRecordScanner, sortedKeyPrefixes, fullKeys, timings);
Map> mergedRecords =
readFromBaseAndMergeWithLogRecords(baseFileReader, sortedKeyPrefixes, fullKeys, logRecords, timings, partitionName);
LOG.debug(String.format("Metadata read for %s keys took [baseFileRead, logMerge] %s ms",
sortedKeyPrefixes.size(), timings));
return mergedRecords.values().iterator();
} catch (IOException ioe) {
throw new HoodieIOException("Error merging records from metadata table for " + sortedKeyPrefixes.size() + " key : ", ioe);
} finally {
closeReader(readers);
}
});
}
@Override
protected Map> getRecordsByKeys(List keys, String partitionName) {
if (keys.isEmpty()) {
return Collections.emptyMap();
}
Map> result;
// Load the file slices for the partition. Each file slice is a shard which saves a portion of the keys.
List partitionFileSlices = partitionFileSliceMap.computeIfAbsent(partitionName,
k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, metadataFileSystemView, partitionName));
final int numFileSlices = partitionFileSlices.size();
checkState(numFileSlices > 0, "Number of file slices for partition " + partitionName + " should be > 0");
// Lookup keys from each file slice
if (numFileSlices == 1) {
// Optimization for a single slice for smaller metadata table partitions
result = lookupKeysFromFileSlice(partitionName, keys, partitionFileSlices.get(0));
} else {
// Parallel lookup for large sized partitions with many file slices
// Partition the keys by the file slice which contains it
ArrayList> partitionedKeys = partitionKeysByFileSlices(keys, numFileSlices);
result = new HashMap<>(keys.size());
getEngineContext().setJobStatus(this.getClass().getSimpleName(), "Reading keys from metadata table partition " + partitionName);
getEngineContext().map(partitionedKeys, keysList -> {
if (keysList.isEmpty()) {
return Collections.>emptyMap();
}
int shardIndex = HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(keysList.get(0), numFileSlices);
return lookupKeysFromFileSlice(partitionName, keysList, partitionFileSlices.get(shardIndex));
}, partitionedKeys.size()).forEach(result::putAll);
}
return result;
}
private static ArrayList> partitionKeysByFileSlices(List keys, int numFileSlices) {
ArrayList> partitionedKeys = new ArrayList<>(numFileSlices);
for (int i = 0; i < numFileSlices; ++i) {
partitionedKeys.add(new ArrayList<>());
}
keys.forEach(key -> {
int shardIndex = HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(key, numFileSlices);
partitionedKeys.get(shardIndex).add(key);
});
return partitionedKeys;
}
@Override
public Map>> getAllRecordsByKeys(List keys, String partitionName) {
if (keys.isEmpty()) {
return Collections.emptyMap();
}
Map>> result;
// Load the file slices for the partition. Each file slice is a shard which saves a portion of the keys.
List partitionFileSlices = partitionFileSliceMap.computeIfAbsent(partitionName,
k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, metadataFileSystemView, partitionName));
final int numFileSlices = partitionFileSlices.size();
checkState(numFileSlices > 0, "Number of file slices for partition " + partitionName + " should be > 0");
// Lookup keys from each file slice
if (numFileSlices == 1) {
// Optimization for a single slice for smaller metadata table partitions
result = lookupAllKeysFromFileSlice(partitionName, keys, partitionFileSlices.get(0));
} else {
// Parallel lookup for large sized partitions with many file slices
// Partition the keys by the file slice which contains it
ArrayList> partitionedKeys = partitionKeysByFileSlices(keys, numFileSlices);
result = new HashMap<>(keys.size());
getEngineContext().setJobStatus(this.getClass().getSimpleName(), "Reading keys from metadata table partition " + partitionName);
getEngineContext().map(partitionedKeys, keysList -> {
if (keysList.isEmpty()) {
return Collections.>emptyMap();
}
int shardIndex = HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(keysList.get(0), numFileSlices);
return lookupAllKeysFromFileSlice(partitionName, keysList, partitionFileSlices.get(shardIndex));
}, partitionedKeys.size()).forEach(map -> result.putAll((Map>>) map));
}
return result;
}
/**
* Lookup list of keys from a single file slice.
*
* @param partitionName Name of the partition
* @param keys The list of keys to lookup
* @param fileSlice The file slice to read
* @return A {@code Map} of key name to {@code HoodieRecord} for the keys which were found in the file slice
*/
private Map> lookupKeysFromFileSlice(String partitionName, List keys, FileSlice fileSlice) {
Pair, HoodieMetadataLogRecordReader> readers = getOrCreateReaders(partitionName, fileSlice);
try {
HoodieSeekingFileReader> baseFileReader = readers.getKey();
HoodieMetadataLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
return Collections.emptyMap();
}
// Sort it here once so that we don't need to sort individually for base file and for each individual log files.
List sortedKeys = new ArrayList<>(keys);
Collections.sort(sortedKeys);
boolean fullKeys = true;
List timings = new ArrayList<>(1);
Map> logRecords = readLogRecords(logRecordScanner, sortedKeys, fullKeys, timings);
return readFromBaseAndMergeWithLogRecords(baseFileReader, sortedKeys, fullKeys, logRecords, timings, partitionName);
} catch (IOException ioe) {
throw new HoodieIOException("Error merging records from metadata table for " + keys.size() + " key : ", ioe);
} finally {
if (!reuse) {
closeReader(readers);
}
}
}
private Map> readLogRecords(HoodieMetadataLogRecordReader logRecordReader,
List sortedKeys,
boolean fullKey,
List timings) {
HoodieTimer timer = HoodieTimer.start();
if (logRecordReader == null) {
timings.add(timer.endTimer());
return Collections.emptyMap();
}
try {
return fullKey ? logRecordReader.getRecordsByKeys(sortedKeys) : logRecordReader.getRecordsByKeyPrefixes(sortedKeys);
} finally {
timings.add(timer.endTimer());
}
}
private Map> readFromBaseAndMergeWithLogRecords(HoodieSeekingFileReader> reader,
List sortedKeys,
boolean fullKeys,
Map> logRecords,
List timings,
String partitionName) throws IOException {
HoodieTimer timer = HoodieTimer.start();
if (reader == null) {
// No base file at all
timings.add(timer.endTimer());
return logRecords;
}
HoodieTimer readTimer = HoodieTimer.start();
Map> records =
fetchBaseFileRecordsByKeys(reader, sortedKeys, fullKeys, partitionName);
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
// Iterate over all provided log-records, merging them into existing records
logRecords.values().forEach(logRecord ->
records.merge(
logRecord.getRecordKey(),
logRecord,
(oldRecord, newRecord) -> {
HoodieMetadataPayload mergedPayload = newRecord.getData().preCombine(oldRecord.getData());
return mergedPayload.isDeleted() ? null : new HoodieAvroRecord<>(oldRecord.getKey(), mergedPayload);
}
));
timings.add(timer.endTimer());
return records;
}
@SuppressWarnings("unchecked")
private Map> fetchBaseFileRecordsByKeys(HoodieSeekingFileReader reader,
List sortedKeys,
boolean fullKeys,
String partitionName) throws IOException {
Map> result;
try (ClosableIterator> records = fullKeys
? reader.getRecordsByKeysIterator(sortedKeys)
: reader.getRecordsByKeyPrefixIterator(sortedKeys)) {
result = toStream(records)
.map(record -> {
GenericRecord data = (GenericRecord) record.getData();
return Pair.of(
(String) (data).get(HoodieMetadataPayload.KEY_FIELD_NAME),
composeRecord(data, partitionName));
})
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
}
return result;
}
private Map>> lookupAllKeysFromFileSlice(String partitionName, List keys, FileSlice fileSlice) {
Pair, HoodieMetadataLogRecordReader> readers = getOrCreateReaders(partitionName, fileSlice);
try {
List timings = new ArrayList<>();
HoodieSeekingFileReader> baseFileReader = readers.getKey();
HoodieMetadataLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
return Collections.emptyMap();
}
// Sort it here once so that we don't need to sort individually for base file and for each individual log files.
List sortedKeys = new ArrayList<>(keys);
Collections.sort(sortedKeys);
Map>> logRecords = readAllLogRecords(logRecordScanner, sortedKeys, timings);
return readFromBaseAndMergeWithAllLogRecords(baseFileReader, sortedKeys, true, logRecords, timings, partitionName);
} catch (IOException ioe) {
throw new HoodieIOException("Error merging records from metadata table for " + keys.size() + " key : ", ioe);
} finally {
if (!reuse) {
closeReader(readers);
}
}
}
private Map>> readAllLogRecords(HoodieMetadataLogRecordReader logRecordReader,
List sortedKeys,
List timings) {
HoodieTimer timer = HoodieTimer.start();
if (logRecordReader == null) {
timings.add(timer.endTimer());
return Collections.emptyMap();
}
try {
return logRecordReader.getAllRecordsByKeys(sortedKeys);
} finally {
timings.add(timer.endTimer());
}
}
private Map>> readFromBaseAndMergeWithAllLogRecords(HoodieSeekingFileReader> reader,
List sortedKeys,
boolean fullKeys,
Map>> logRecords,
List timings,
String partitionName) throws IOException {
HoodieTimer timer = HoodieTimer.start();
if (reader == null) {
// No base file at all
timings.add(timer.endTimer());
return logRecords;
}
HoodieTimer readTimer = HoodieTimer.start();
Map>> records =
fetchBaseFileAllRecordsByKeys(reader, sortedKeys, fullKeys, partitionName);
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
// Iterate over all provided log-records, merging them into existing records
logRecords.entrySet().forEach(kv -> {
records.merge(
kv.getKey(),
kv.getValue(),
(oldRecordList, newRecordList) -> {
List> mergedRecordList = new ArrayList<>();
HoodieMetadataPayload mergedPayload = null;
HoodieKey key = null;
if (!oldRecordList.isEmpty() && !newRecordList.isEmpty()) {
mergedPayload = newRecordList.get(0).getData().preCombine(oldRecordList.get(0).getData());
key = newRecordList.get(0).getKey();
} else if (!oldRecordList.isEmpty()) {
mergedPayload = oldRecordList.get(0).getData();
key = oldRecordList.get(0).getKey();
} else if (!newRecordList.isEmpty()) {
mergedPayload = newRecordList.get(0).getData();
key = newRecordList.get(0).getKey();
}
if (mergedPayload != null && !mergedPayload.isDeleted()) {
mergedRecordList.add(new HoodieAvroRecord<>(key, mergedPayload));
}
return mergedRecordList;
}
);
});
timings.add(timer.endTimer());
return records;
}
private Map>> fetchBaseFileAllRecordsByKeys(HoodieSeekingFileReader reader,
List sortedKeys,
boolean fullKeys,
String partitionName) throws IOException {
ClosableIterator> records = fullKeys
? reader.getRecordsByKeysIterator(sortedKeys)
: reader.getRecordsByKeyPrefixIterator(sortedKeys);
return toStream(records)
.map(record -> {
GenericRecord data = (GenericRecord) record.getData();
return Pair.of(
(String) (data).get(HoodieMetadataPayload.KEY_FIELD_NAME),
composeRecord(data, partitionName));
})
.collect(Collectors.groupingBy(Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toList())));
}
private HoodieRecord composeRecord(GenericRecord avroRecord, String partitionName) {
if (metadataTableConfig.populateMetaFields()) {
return SpillableMapUtils.convertToHoodieRecordPayload(avroRecord,
metadataTableConfig.getPayloadClass(), metadataTableConfig.getPreCombineField(), false);
}
return SpillableMapUtils.convertToHoodieRecordPayload(avroRecord,
metadataTableConfig.getPayloadClass(), metadataTableConfig.getPreCombineField(),
Pair.of(metadataTableConfig.getRecordKeyFieldProp(), metadataTableConfig.getPartitionFieldProp()),
false, Option.of(partitionName), Option.empty());
}
/**
* Create a file reader and the record scanner for a given partition and file slice
* if readers are not already available.
*
* @param partitionName - Partition name
* @param slice - The file slice to open readers for
* @return File reader and the record scanner pair for the requested file slice
*/
private Pair, HoodieMetadataLogRecordReader> getOrCreateReaders(String partitionName, FileSlice slice) {
if (reuse) {
Pair key = Pair.of(partitionName, slice.getFileId());
return partitionReaders.get().computeIfAbsent(key, ignored -> openReaders(partitionName, slice));
} else {
return openReaders(partitionName, slice);
}
}
private Pair, HoodieMetadataLogRecordReader> openReaders(String partitionName, FileSlice slice) {
try {
HoodieTimer timer = HoodieTimer.start();
// Open base file reader
Pair, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
HoodieSeekingFileReader> baseFileReader = baseFileReaderOpenTimePair.getKey();
final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
// Open the log record scanner using the log files from the latest file slice
List logFiles = slice.getLogFiles().collect(Collectors.toList());
Pair logRecordScannerOpenTimePair =
getLogRecordScanner(logFiles, partitionName, Option.empty(), Option.empty());
HoodieMetadataLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR,
baseFileOpenMs + logScannerOpenMs));
return Pair.of(baseFileReader, logRecordScanner);
} catch (IOException e) {
throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
}
}
private Pair, Long> getBaseFileReader(FileSlice slice, HoodieTimer timer) throws IOException {
HoodieSeekingFileReader> baseFileReader;
long baseFileOpenMs;
// If the base file is present then create a reader
Option baseFile = slice.getBaseFile();
if (baseFile.isPresent()) {
StoragePath baseFilePath = baseFile.get().getStoragePath();
baseFileReader = (HoodieSeekingFileReader>) HoodieIOFactory.getIOFactory(metadataMetaClient.getStorage())
.getReaderFactory(HoodieRecordType.AVRO)
.getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, baseFilePath);
baseFileOpenMs = timer.endTimer();
LOG.info(String.format("Opened metadata base file from %s at instant %s in %d ms", baseFilePath,
baseFile.get().getCommitTime(), baseFileOpenMs));
} else {
baseFileReader = null;
baseFileOpenMs = 0L;
timer.endTimer();
}
return Pair.of(baseFileReader, baseFileOpenMs);
}
public Pair getLogRecordScanner(List logFiles,
String partitionName,
Option allowFullScanOverride,
Option timeTravelInstant) {
HoodieTimer timer = HoodieTimer.start();
List sortedLogFilePaths = logFiles.stream()
.sorted(HoodieLogFile.getLogFileComparator())
.map(o -> o.getPath().toString())
.collect(Collectors.toList());
// Only those log files which have a corresponding completed instant on the dataset should be read
// This is because the metadata table is updated before the dataset instants are committed.
Set validInstantTimestamps = HoodieTableMetadataUtil
.getValidInstantTimestamps(dataMetaClient, metadataMetaClient);
Option latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
String latestMetadataInstantTime = latestMetadataInstant.map(HoodieInstant::requestedTime).orElse(SOLO_COMMIT_TIMESTAMP);
if (timeTravelInstant.isPresent()) {
latestMetadataInstantTime = InstantComparison.minTimestamp(latestMetadataInstantTime, timeTravelInstant.get());
}
boolean allowFullScan = allowFullScanOverride.orElseGet(() -> isFullScanAllowedForPartition(partitionName));
// Load the schema
Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
HoodieMetadataLogRecordReader logRecordScanner = HoodieMetadataLogRecordReader.newBuilder()
.withStorage(metadataMetaClient.getStorage())
.withBasePath(metadataBasePath)
.withLogFilePaths(sortedLogFilePaths)
.withReaderSchema(schema)
.withLatestInstantTime(latestMetadataInstantTime)
.withMaxMemorySizeInBytes(metadataConfig.getMaxReaderMemory())
.withBufferSize(metadataConfig.getMaxReaderBufferSize())
.withSpillableMapBasePath(metadataConfig.getSplliableMapDir())
.withDiskMapType(commonConfig.getSpillableDiskMapType())
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
.withLogBlockTimestamps(validInstantTimestamps)
.enableFullScan(allowFullScan)
.withPartition(partitionName)
.withEnableOptimizedLogBlocksScan(metadataConfig.isOptimizedLogBlocksScanEnabled())
.withTableMetaClient(metadataMetaClient)
.build();
Long logScannerOpenMs = timer.endTimer();
LOG.info(String.format("Opened %d metadata log files (dataset instant=%s, metadata instant=%s) in %d ms",
sortedLogFilePaths.size(), getLatestDataInstantTime(), latestMetadataInstantTime, logScannerOpenMs));
return Pair.of(logRecordScanner, logScannerOpenMs);
}
// NOTE: We're allowing eager full-scan of the log-files only for "files" partition.
// Other partitions (like "column_stats", "bloom_filters") will have to be fetched
// t/h point-lookups
private boolean isFullScanAllowedForPartition(String partitionName) {
switch (partitionName) {
case PARTITION_NAME_FILES:
return DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES;
case PARTITION_NAME_COLUMN_STATS:
case PARTITION_NAME_BLOOM_FILTERS:
default:
return false;
}
}
@Override
public void close() {
closePartitionReaders();
partitionFileSliceMap.clear();
if (this.metadataFileSystemView != null) {
this.metadataFileSystemView.close();
this.metadataFileSystemView = null;
}
}
/**
* Close the file reader and the record scanner for the given file slice.
*
* @param partitionFileSlicePair - Partition and FileSlice
*/
private synchronized void close(Pair partitionFileSlicePair) {
Pair, HoodieMetadataLogRecordReader> readers =
partitionReaders.get().remove(partitionFileSlicePair);
closeReader(readers);
}
/**
* Close and clear all the partitions readers.
*/
private void closePartitionReaders() {
for (Pair partitionFileSlicePair : partitionReaders.get().keySet()) {
close(partitionFileSlicePair);
}
partitionReaders.get().clear();
}
private void closeReader(Pair, HoodieMetadataLogRecordReader> readers) {
if (readers != null) {
try {
if (readers.getKey() != null) {
readers.getKey().close();
}
if (readers.getValue() != null) {
readers.getValue().close();
}
} catch (Exception e) {
throw new HoodieException("Error closing resources during metadata table merge", e);
}
}
}
public boolean enabled() {
return isMetadataTableInitialized;
}
public HoodieTableMetaClient getMetadataMetaClient() {
return metadataMetaClient;
}
public HoodieTableFileSystemView getMetadataFileSystemView() {
return metadataFileSystemView;
}
public Map stats() {
Set allMetadataPartitionPaths = Arrays.stream(MetadataPartitionType.getValidValues()).map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet());
return metrics.map(m -> m.getStats(true, metadataMetaClient, this, allMetadataPartitionPaths)).orElseGet(HashMap::new);
}
@Override
public Option getSyncedInstantTime() {
if (metadataMetaClient != null) {
Option latestInstant = metadataMetaClient.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
if (latestInstant.isPresent()) {
return Option.of(latestInstant.get().requestedTime());
}
}
return Option.empty();
}
@Override
public Option getLatestCompactionTime() {
if (metadataMetaClient != null) {
Option latestCompaction = metadataMetaClient.getActiveTimeline().getCommitAndReplaceTimeline().filterCompletedInstants().lastInstant();
if (latestCompaction.isPresent()) {
return Option.of(latestCompaction.get().requestedTime());
}
}
return Option.empty();
}
@Override
public void reset() {
initIfNeeded();
dataMetaClient.reloadActiveTimeline();
if (metadataMetaClient != null) {
metadataMetaClient.reloadActiveTimeline();
metadataFileSystemView.close();
metadataFileSystemView = getFileSystemView(metadataMetaClient);
}
// the cached reader has max instant time restriction, they should be cleared
// because the metadata timeline may have changed.
closePartitionReaders();
partitionFileSliceMap.clear();
}
@Override
public int getNumFileGroupsForPartition(MetadataPartitionType partition) {
partitionFileSliceMap.computeIfAbsent(partition.getPartitionPath(),
k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient,
metadataFileSystemView, partition.getPartitionPath()));
return partitionFileSliceMap.get(partition.getPartitionPath()).size();
}
@Override
protected Map getSecondaryKeysForRecordKeys(List recordKeys, String partitionName) {
if (recordKeys.isEmpty()) {
return Collections.emptyMap();
}
// Load the file slices for the partition. Each file slice is a shard which saves a portion of the keys.
List partitionFileSlices =
partitionFileSliceMap.computeIfAbsent(partitionName, k -> HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, metadataFileSystemView, partitionName));
if (partitionFileSlices.isEmpty()) {
return Collections.emptyMap();
}
// Parallel lookup keys from each file slice
Map reverseSecondaryKeyMap = new HashMap<>(recordKeys.size());
getEngineContext().setJobStatus(this.getClass().getSimpleName(), "Lookup secondary keys from metadata table partition " + partitionName);
List> secondaryToRecordKeyPairList = getEngineContext().flatMap(partitionFileSlices,
(SerializableFunction>>) v1 -> reverseLookupSecondaryKeys(partitionName, recordKeys, v1)
.entrySet().stream()
.map(entry -> Pair.of(entry.getKey(), entry.getValue())).collect(Collectors.toList()).stream(), partitionFileSlices.size());
secondaryToRecordKeyPairList.forEach(pair -> reverseSecondaryKeyMap.put(pair.getKey(), pair.getValue()));
return reverseSecondaryKeyMap;
}
private Map reverseLookupSecondaryKeys(String partitionName, List recordKeys, FileSlice fileSlice) {
Map recordKeyMap = new HashMap<>();
Pair, HoodieMetadataLogRecordReader> readers = getOrCreateReaders(partitionName, fileSlice);
try {
HoodieSeekingFileReader> baseFileReader = readers.getKey();
HoodieMetadataLogRecordReader logRecordScanner = readers.getRight();
if (baseFileReader == null && logRecordScanner == null) {
return Collections.emptyMap();
}
Set keySet = new TreeSet<>(recordKeys);
Set deletedRecordsFromLogs = new HashSet<>();
// Map of recordKey (primaryKey) -> log record that is not deleted for all input recordKeys
Map> logRecordsMap = new HashMap<>();
logRecordScanner.getRecords().forEach(record -> {
String recordKey = SecondaryIndexKeyUtils.getRecordKeyFromSecondaryIndexKey(record.getRecordKey());
HoodieMetadataPayload payload = record.getData();
if (!payload.isDeleted()) { // process only valid records.
if (keySet.contains(recordKey)) {
logRecordsMap.put(recordKey, record);
}
} else {
deletedRecordsFromLogs.add(recordKey);
}
});
// Map of (record-key, secondary-index-record)
Map> baseFileRecords = fetchBaseFileAllRecordsByPayloadForSecIndex(baseFileReader, keySet, partitionName);
if (baseFileRecords == null || baseFileRecords.isEmpty()) {
logRecordsMap.forEach((key1, value1) -> {
if (!value1.getData().isDeleted()) {
recordKeyMap.put(key1, SecondaryIndexKeyUtils.getSecondaryKeyFromSecondaryIndexKey(value1.getRecordKey()));
}
});
} else {
// Iterate over all provided log-records, merging them into existing records
logRecordsMap.forEach((key1, value1) -> baseFileRecords.merge(key1, value1, (oldRecord, newRecord) -> {
Option> mergedRecord = HoodieMetadataPayload.combineSecondaryIndexRecord(oldRecord, newRecord);
return mergedRecord.orElse(null);
}));
baseFileRecords.forEach((key, value) -> {
if (!deletedRecordsFromLogs.contains(key)) {
recordKeyMap.put(key, SecondaryIndexKeyUtils.getSecondaryKeyFromSecondaryIndexKey(value.getRecordKey()));
}
});
}
} catch (IOException ioe) {
throw new HoodieIOException("Error merging records from metadata table for " + recordKeys.size() + " key : ", ioe);
} finally {
if (!reuse) {
closeReader(readers);
}
}
return recordKeyMap;
}
@Override
public Map> getSecondaryIndexRecords(List keys, String partitionName) {
if (keys.isEmpty()) {
return Collections.emptyMap();
}
return getRecordsByKeyPrefixes(keys, partitionName, false).map(
record -> {
if (!record.getData().isDeleted()) {
String recordKey = SecondaryIndexKeyUtils.getRecordKeyFromSecondaryIndexKey(record.getRecordKey());
String secondaryKey = SecondaryIndexKeyUtils.getSecondaryKeyFromSecondaryIndexKey(record.getRecordKey());
return Pair.of(secondaryKey, recordKey);
}
return null;
})
.filter(Objects::nonNull)
.collectAsList()
.stream()
.collect(Collectors.groupingBy(Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet())));
}
private Map> fetchBaseFileAllRecordsByPayloadForSecIndex(HoodieSeekingFileReader reader, Set keySet, String partitionName) throws IOException {
if (reader == null) {
// No base file at all
return Collections.emptyMap();
}
ClosableIterator> recordIterator = reader.getRecordIterator();
return toStream(recordIterator).map(record -> {
GenericRecord data = (GenericRecord) record.getData();
return composeRecord(data, partitionName);
}).filter(record -> {
return keySet.contains(SecondaryIndexKeyUtils.getRecordKeyFromSecondaryIndexKey(record.getRecordKey()));
}).collect(Collectors.toMap(record -> {
return SecondaryIndexKeyUtils.getRecordKeyFromSecondaryIndexKey(record.getRecordKey());
}, record -> record));
}
}