org.apache.hudi.common.table.view.AbstractTableFileSystemView Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.table.view;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.BootstrapBaseFileMapping;
import org.apache.hudi.common.model.BootstrapFileMapping;
import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.ClusteringUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS;
/**
* Common thread-safe implementation for multiple TableFileSystemView Implementations.
* Provides uniform handling of:
*
* - Loading file-system views from underlying file-system;
* - Pending compaction operations and changing file-system views based on that;
* - Thread-safety in loading and managing file system views for this table;
* - resetting file-system views.
*
* The actual mechanism of fetching file slices from different view storages is delegated to sub-classes.
*/
public abstract class AbstractTableFileSystemView implements SyncableFileSystemView, Serializable {
private static final Logger LOG = LoggerFactory.getLogger(AbstractTableFileSystemView.class);
protected HoodieTableMetaClient metaClient;
// This is the commits timeline that will be visible for all views extending this view
// This is nothing but the write timeline, which contains both ingestion and compaction(major and minor) writers.
private HoodieTimeline visibleCommitsAndCompactionTimeline;
// Used to concurrently load and populate partition views
private final ConcurrentHashMap addedPartitions = new ConcurrentHashMap<>(4096);
// Locks to control concurrency. Sync operations use write-lock blocking all fetch operations.
// For the common-case, we allow concurrent read of single or multiple partitions
private final ReentrantReadWriteLock globalLock = new ReentrantReadWriteLock();
protected final ReadLock readLock = globalLock.readLock();
protected final WriteLock writeLock = globalLock.writeLock();
private BootstrapIndex bootstrapIndex;
private String getPartitionPathFor(HoodieBaseFile baseFile) {
return FSUtils.getRelativePartitionPath(metaClient.getBasePathV2(), baseFile.getHadoopPath().getParent());
}
/**
* Initialize the view.
*/
protected void init(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline) {
this.metaClient = metaClient;
refreshTimeline(visibleActiveTimeline);
resetFileGroupsReplaced(visibleCommitsAndCompactionTimeline);
this.bootstrapIndex = BootstrapIndex.getBootstrapIndex(metaClient);
// Load Pending Compaction Operations
resetPendingCompactionOperations(CompactionUtils.getAllPendingCompactionOperations(metaClient).values().stream()
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
// Load Pending LogCompaction Operations.
resetPendingLogCompactionOperations(CompactionUtils.getAllPendingLogCompactionOperations(metaClient).values().stream()
.map(e -> Pair.of(e.getKey(), CompactionOperation.convertFromAvroRecordInstance(e.getValue()))));
resetBootstrapBaseFileMapping(Stream.empty());
resetFileGroupsInPendingClustering(ClusteringUtils.getAllFileGroupsInPendingClusteringPlans(metaClient));
}
/**
* Refresh commits timeline.
*
* @param visibleActiveTimeline Visible Active Timeline
*/
protected void refreshTimeline(HoodieTimeline visibleActiveTimeline) {
this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getWriteTimeline();
}
/**
* Adds the provided statuses into the file system view, and also caches it inside this object.
*/
public List addFilesToView(FileStatus[] statuses) {
HoodieTimer timer = HoodieTimer.start();
List fileGroups = buildFileGroups(statuses, visibleCommitsAndCompactionTimeline, true);
long fgBuildTimeTakenMs = timer.endTimer();
timer.startTimer();
// Group by partition for efficient updates for both InMemory and DiskBased structures.
fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).forEach((partition, value) -> {
if (!isPartitionAvailableInStore(partition)) {
if (bootstrapIndex.useIndex()) {
try (BootstrapIndex.IndexReader reader = bootstrapIndex.createReader()) {
LOG.info("Bootstrap Index available for partition " + partition);
List sourceFileMappings =
reader.getSourceFileMappingForPartition(partition);
addBootstrapBaseFileMapping(sourceFileMappings.stream()
.map(s -> new BootstrapBaseFileMapping(new HoodieFileGroupId(s.getPartitionPath(),
s.getFileId()), s.getBootstrapFileStatus())));
}
}
storePartitionView(partition, value);
}
});
long storePartitionsTs = timer.endTimer();
LOG.debug("addFilesToView: NumFiles=" + statuses.length + ", NumFileGroups=" + fileGroups.size()
+ ", FileGroupsCreationTime=" + fgBuildTimeTakenMs
+ ", StoreTimeTaken=" + storePartitionsTs);
return fileGroups;
}
/**
* Build FileGroups from passed in file-status.
*/
protected List buildFileGroups(FileStatus[] statuses, HoodieTimeline timeline,
boolean addPendingCompactionFileSlice) {
return buildFileGroups(convertFileStatusesToBaseFiles(statuses), convertFileStatusesToLogFiles(statuses), timeline,
addPendingCompactionFileSlice);
}
protected List buildFileGroups(Stream baseFileStream,
Stream logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
Map, List> baseFiles =
baseFileStream.collect(Collectors.groupingBy(baseFile -> {
String partitionPathStr = getPartitionPathFor(baseFile);
return Pair.of(partitionPathStr, baseFile.getFileId());
}));
Map, List> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr =
FSUtils.getRelativePartitionPath(metaClient.getBasePathV2(), logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Set> fileIdSet = new HashSet<>(baseFiles.keySet());
fileIdSet.addAll(logFiles.keySet());
List fileGroups = new ArrayList<>();
fileIdSet.forEach(pair -> {
String fileId = pair.getValue();
String partitionPath = pair.getKey();
HoodieFileGroup group = new HoodieFileGroup(partitionPath, fileId, timeline);
if (baseFiles.containsKey(pair)) {
baseFiles.get(pair).forEach(group::addBaseFile);
}
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(group::addLogFile);
}
if (addPendingCompactionFileSlice) {
Option> pendingCompaction =
getPendingCompactionOperationWithInstant(group.getFileGroupId());
if (pendingCompaction.isPresent()) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
// so that any new ingestion uses the correct base-instant
group.addNewFileSliceAtInstant(pendingCompaction.get().getKey());
}
}
fileGroups.add(group);
});
return fileGroups;
}
/**
* Get replaced instant for each file group by looking at all commit instants.
*/
private void resetFileGroupsReplaced(HoodieTimeline timeline) {
HoodieTimer hoodieTimer = HoodieTimer.start();
// for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
HoodieTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
Stream> resultStream = replacedTimeline.getInstantsAsStream().flatMap(instant -> {
try {
HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(instant).get(),
HoodieReplaceCommitMetadata.class);
// get replace instant mapping for each partition, fileId
return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().flatMap(entry -> entry.getValue().stream().map(e ->
new AbstractMap.SimpleEntry<>(new HoodieFileGroupId(entry.getKey(), e), instant)));
} catch (HoodieIOException ex) {
if (ex.getIOException() instanceof FileNotFoundException) {
// Replace instant could be deleted by archive and FileNotFoundException could be threw during getInstantDetails function
// So that we need to catch the FileNotFoundException here and continue
LOG.warn(ex.getMessage());
return Stream.empty();
} else {
throw ex;
}
} catch (IOException e) {
throw new HoodieIOException("error reading commit metadata for " + instant);
}
});
// Duplicate key error when insert_overwrite same partition in multi writer, keep the instant with greater timestamp when the file group id conflicts
Map replacedFileGroups = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue,
(instance1, instance2) -> HoodieTimeline.compareTimestamps(instance1.getTimestamp(), HoodieTimeline.LESSER_THAN, instance2.getTimestamp()) ? instance2 : instance1));
resetReplacedFileGroups(replacedFileGroups);
LOG.info("Took " + hoodieTimer.endTimer() + " ms to read " + replacedTimeline.countInstants() + " instants, "
+ replacedFileGroups.size() + " replaced file groups");
}
@Override
public void close() {
try {
writeLock.lock();
this.metaClient = null;
this.visibleCommitsAndCompactionTimeline = null;
clear();
} finally {
writeLock.unlock();
}
}
/**
* Clears the partition Map and reset view states.
*
* NOTE: The logic MUST BE guarded by the write lock.
*/
@Override
public void reset() {
try {
writeLock.lock();
clear();
// Initialize with new Hoodie timeline.
init(metaClient, getTimeline());
} finally {
writeLock.unlock();
}
}
/**
* Clear the resource.
*/
protected void clear() {
addedPartitions.clear();
resetViewState();
bootstrapIndex = null;
}
/**
* Allows all view metadata in file system view storage to be reset by subclasses.
*/
protected abstract void resetViewState();
/**
* Batch loading all the partitions if needed.
*
* @return A list of relative partition paths of all partitions.
*/
private List ensureAllPartitionsLoadedCorrectly() {
ValidationUtils.checkArgument(!isClosed(), "View is already closed");
try {
List formattedPartitionList = getAllPartitionPaths().stream()
.map(this::formatPartitionKey).collect(Collectors.toList());
ensurePartitionsLoadedCorrectly(formattedPartitionList);
return formattedPartitionList;
} catch (IOException e) {
throw new HoodieIOException("Failed to get all partition paths", e);
}
}
/**
* Allows lazily loading the partitions if needed.
*
* @param partitionList list of partitions to be loaded if not present.
*/
private void ensurePartitionsLoadedCorrectly(List partitionList) {
ValidationUtils.checkArgument(!isClosed(), "View is already closed");
Set partitionSet = new HashSet<>();
synchronized (addedPartitions) {
partitionList.forEach(partition -> {
if (!addedPartitions.containsKey(partition) && !isPartitionAvailableInStore(partition)) {
partitionSet.add(partition);
}
});
if (!partitionSet.isEmpty()) {
long beginTs = System.currentTimeMillis();
// Not loaded yet
try {
LOG.debug("Building file system view for partitions: " + partitionSet);
// Pairs of relative partition path and absolute partition path
List> absolutePartitionPathList = partitionSet.stream()
.map(partition -> Pair.of(
partition, FSUtils.getPartitionPath(metaClient.getBasePathV2(), partition)))
.collect(Collectors.toList());
long beginLsTs = System.currentTimeMillis();
Map, FileStatus[]> statusesMap =
listPartitions(absolutePartitionPathList);
long endLsTs = System.currentTimeMillis();
LOG.debug("Time taken to list partitions " + partitionSet + " =" + (endLsTs - beginLsTs));
statusesMap.forEach((partitionPair, statuses) -> {
String relativePartitionStr = partitionPair.getLeft();
List groups = addFilesToView(statuses);
if (groups.isEmpty()) {
storePartitionView(relativePartitionStr, new ArrayList<>());
}
LOG.debug("#files found in partition (" + relativePartitionStr + ") =" + statuses.length);
});
} catch (IOException e) {
throw new HoodieIOException("Failed to list base files in partitions " + partitionSet, e);
}
long endTs = System.currentTimeMillis();
LOG.debug("Time to load partition " + partitionSet + " =" + (endTs - beginTs));
}
partitionSet.forEach(partition ->
addedPartitions.computeIfAbsent(partition, partitionPathStr -> true)
);
}
}
/**
* @return A list of relative partition paths of all partitions.
* @throws IOException upon error.
*/
protected List getAllPartitionPaths() throws IOException {
throw new HoodieException("Getting all partition paths with file system listing sequentially "
+ "can be very slow. This should not be invoked.");
}
/**
* @param partitionPathList A list of pairs of the relative and absolute paths of the partitions.
* @return all the files from the partitions.
* @throws IOException upon error.
*/
protected Map, FileStatus[]> listPartitions(
List> partitionPathList) throws IOException {
Map, FileStatus[]> fileStatusMap = new HashMap<>();
for (Pair partitionPair : partitionPathList) {
Path absolutePartitionPath = partitionPair.getRight();
try {
fileStatusMap.put(partitionPair, metaClient.getFs().listStatus(absolutePartitionPath));
} catch (IOException e) {
// Create the path if it does not exist already
if (!metaClient.getFs().exists(absolutePartitionPath)) {
metaClient.getFs().mkdirs(absolutePartitionPath);
fileStatusMap.put(partitionPair, new FileStatus[0]);
} else {
// in case the partition path was created by another caller
fileStatusMap.put(partitionPair, metaClient.getFs().listStatus(absolutePartitionPath));
}
}
}
return fileStatusMap;
}
/**
* Allows lazily loading the partitions if needed.
*
* @param partition partition to be loaded if not present
*/
private void ensurePartitionLoadedCorrectly(String partition) {
ValidationUtils.checkArgument(!isClosed(), "View is already closed");
// ensure we list files only once even in the face of concurrency
addedPartitions.computeIfAbsent(partition, (partitionPathStr) -> {
long beginTs = System.currentTimeMillis();
if (!isPartitionAvailableInStore(partitionPathStr)) {
// Not loaded yet
try {
LOG.info("Building file system view for partition (" + partitionPathStr + ")");
Path partitionPath = FSUtils.getPartitionPath(metaClient.getBasePathV2(), partitionPathStr);
long beginLsTs = System.currentTimeMillis();
FileStatus[] statuses = listPartition(partitionPath);
long endLsTs = System.currentTimeMillis();
LOG.debug("#files found in partition (" + partitionPathStr + ") =" + statuses.length + ", Time taken ="
+ (endLsTs - beginLsTs));
List groups = addFilesToView(statuses);
if (groups.isEmpty()) {
storePartitionView(partitionPathStr, new ArrayList<>());
}
} catch (IOException e) {
throw new HoodieIOException("Failed to list base files in partition " + partitionPathStr, e);
}
} else {
LOG.debug("View already built for Partition :" + partitionPathStr + ", FOUND is ");
}
long endTs = System.currentTimeMillis();
LOG.debug("Time to load partition (" + partitionPathStr + ") =" + (endTs - beginTs));
return true;
});
}
/**
* Return all the files from the partition.
*
* @param partitionPath The absolute path of the partition
* @throws IOException
*/
protected FileStatus[] listPartition(Path partitionPath) throws IOException {
try {
return metaClient.getFs().listStatus(partitionPath);
} catch (IOException e) {
// Create the path if it does not exist already
if (!metaClient.getFs().exists(partitionPath)) {
metaClient.getFs().mkdirs(partitionPath);
return new FileStatus[0];
} else {
// in case the partition path was created by another caller
return metaClient.getFs().listStatus(partitionPath);
}
}
}
/**
* Helper to convert file-status to base-files.
*
* @param statuses List of File-Status
*/
private Stream convertFileStatusesToBaseFiles(FileStatus[] statuses) {
Predicate roFilePredicate = fileStatus -> fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieBaseFile::new);
}
/**
* Helper to convert file-status to log-files.
*
* @param statuses List of File-Status
*/
private Stream convertFileStatusesToLogFiles(FileStatus[] statuses) {
Predicate rtFilePredicate = fileStatus -> {
String fileName = fileStatus.getPath().getName();
Matcher matcher = FSUtils.LOG_FILE_PATTERN.matcher(fileName);
return matcher.find() && fileName.contains(metaClient.getTableConfig().getLogFileFormat().getFileExtension());
};
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
}
/**
* With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
* base-files.
*
* @param baseFile base File
*/
protected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile) {
final String partitionPath = getPartitionPathFor(baseFile);
Option> compactionWithInstantTime =
getPendingCompactionOperationWithInstant(new HoodieFileGroupId(partitionPath, baseFile.getFileId()));
return (compactionWithInstantTime.isPresent()) && (null != compactionWithInstantTime.get().getKey())
&& baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
}
/**
* With async clustering, it is possible to see partial/complete base-files due to inflight-clustering, Ignore those
* base-files.
*
* @param baseFile base File
*/
protected boolean isBaseFileDueToPendingClustering(HoodieBaseFile baseFile) {
List pendingReplaceInstants =
metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstantsAsStream().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
return !pendingReplaceInstants.isEmpty() && pendingReplaceInstants.contains(baseFile.getCommitTime());
}
/**
* Returns true if the file-group is under pending-compaction and the file-slice' baseInstant matches compaction
* Instant.
*
* @param fileSlice File Slice
*/
protected boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
Option> compactionWithInstantTime =
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
return (compactionWithInstantTime.isPresent())
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.get().getKey());
}
/**
* With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
* base-files.
*
* @param fileSlice File Slice
* @param includeEmptyFileSlice include empty file-slice
*/
protected Stream filterBaseFileAfterPendingCompaction(FileSlice fileSlice, boolean includeEmptyFileSlice) {
if (isFileSliceAfterPendingCompaction(fileSlice)) {
LOG.debug("File Slice (" + fileSlice + ") is in pending compaction");
// Base file is filtered out of the file-slice as the corresponding compaction
// instant not completed yet.
FileSlice transformed = new FileSlice(fileSlice.getPartitionPath(), fileSlice.getBaseInstantTime(), fileSlice.getFileId());
fileSlice.getLogFiles().forEach(transformed::addLogFile);
if (transformed.isEmpty() && !includeEmptyFileSlice) {
return Stream.of();
}
return Stream.of(transformed);
}
return Stream.of(fileSlice);
}
protected HoodieFileGroup addBootstrapBaseFileIfPresent(HoodieFileGroup fileGroup) {
boolean hasBootstrapBaseFile = fileGroup.getAllFileSlices()
.anyMatch(fs -> fs.getBaseInstantTime().equals(METADATA_BOOTSTRAP_INSTANT_TS));
if (hasBootstrapBaseFile) {
HoodieFileGroup newFileGroup = new HoodieFileGroup(fileGroup);
newFileGroup.getAllFileSlices().filter(fs -> fs.getBaseInstantTime().equals(METADATA_BOOTSTRAP_INSTANT_TS))
.forEach(fs -> fs.setBaseFile(
addBootstrapBaseFileIfPresent(fs.getFileGroupId(), fs.getBaseFile().get())));
return newFileGroup;
}
return fileGroup;
}
protected FileSlice addBootstrapBaseFileIfPresent(FileSlice fileSlice) {
if (fileSlice.getBaseInstantTime().equals(METADATA_BOOTSTRAP_INSTANT_TS)) {
FileSlice copy = new FileSlice(fileSlice);
copy.getBaseFile().ifPresent(dataFile -> {
Option edf = getBootstrapBaseFile(copy.getFileGroupId());
edf.ifPresent(e -> dataFile.setBootstrapBaseFile(e.getBootstrapBaseFile()));
});
return copy;
}
return fileSlice;
}
protected HoodieBaseFile addBootstrapBaseFileIfPresent(HoodieFileGroupId fileGroupId, HoodieBaseFile baseFile) {
if (baseFile.getCommitTime().equals(METADATA_BOOTSTRAP_INSTANT_TS)) {
HoodieBaseFile copy = new HoodieBaseFile(baseFile);
Option edf = getBootstrapBaseFile(fileGroupId);
edf.ifPresent(e -> copy.setBootstrapBaseFile(e.getBootstrapBaseFile()));
return copy;
}
return baseFile;
}
@Override
public final Stream> getPendingCompactionOperations() {
try {
readLock.lock();
return fetchPendingCompactionOperations();
} finally {
readLock.unlock();
}
}
public final List getPartitionPaths() {
try {
readLock.lock();
return fetchAllStoredFileGroups()
.filter(fg -> !isFileGroupReplaced(fg))
.map(HoodieFileGroup::getPartitionPath)
.distinct()
.map(name -> name.isEmpty() ? metaClient.getBasePathV2() : new Path(metaClient.getBasePathV2(), name))
.collect(Collectors.toList());
} finally {
readLock.unlock();
}
}
@Override
public final Stream> getPendingLogCompactionOperations() {
try {
readLock.lock();
return fetchPendingLogCompactionOperations();
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestBaseFiles(String partitionStr) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchLatestBaseFiles(partitionPath)
.filter(df -> !isFileGroupReplaced(partitionPath, df.getFileId()))
.map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, df.getFileId()), df));
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestBaseFiles() {
try {
readLock.lock();
return fetchLatestBaseFiles();
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestBaseFilesBeforeOrOn(String partitionStr, String maxCommitTime) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return getLatestBaseFilesBeforeOrOnFromCache(partitionPath, maxCommitTime);
} finally {
readLock.unlock();
}
}
@Override
public final Map> getAllLatestBaseFilesBeforeOrOn(String maxCommitTime) {
try {
readLock.lock();
List formattedPartitionList = ensureAllPartitionsLoadedCorrectly();
return formattedPartitionList.stream().collect(Collectors.toMap(
Function.identity(),
partitionPath -> getLatestBaseFilesBeforeOrOnFromCache(partitionPath, maxCommitTime)
));
} finally {
readLock.unlock();
}
}
private Stream getLatestBaseFilesBeforeOrOnFromCache(String partitionPath, String maxCommitTime) {
return fetchAllStoredFileGroups(partitionPath)
.filter(fileGroup -> !isFileGroupReplacedBeforeOrOn(fileGroup.getFileGroupId(), maxCommitTime))
.map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllBaseFiles()
.filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.LESSER_THAN_OR_EQUALS, maxCommitTime
))
.filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst()))
.filter(Option::isPresent).map(Option::get)
.map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, df.getFileId()), df));
}
@Override
public final Option getBaseFileOn(String partitionStr, String instantTime, String fileId) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
if (isFileGroupReplacedBeforeOrOn(new HoodieFileGroupId(partitionPath, fileId), instantTime)) {
return Option.empty();
} else {
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllBaseFiles()
.filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.EQUALS,
instantTime)).filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst().orElse(null))
.map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, fileId), df));
}
} finally {
readLock.unlock();
}
}
/**
* Get Latest base file for a partition and file-Id.
*/
@Override
public final Option getLatestBaseFile(String partitionStr, String fileId) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
if (isFileGroupReplaced(partitionPath, fileId)) {
return Option.empty();
} else {
return fetchLatestBaseFile(partitionPath, fileId)
.map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, fileId), df));
}
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestBaseFilesInRange(List commitsToReturn) {
try {
readLock.lock();
return fetchAllStoredFileGroups()
.filter(fileGroup -> !isFileGroupReplacedBeforeAny(fileGroup.getFileGroupId(), commitsToReturn))
.map(fileGroup -> Pair.of(fileGroup.getFileGroupId(), Option.fromJavaOptional(
fileGroup.getAllBaseFiles().filter(baseFile -> commitsToReturn.contains(baseFile.getCommitTime())
&& !isBaseFileDueToPendingCompaction(baseFile) && !isBaseFileDueToPendingClustering(baseFile)).findFirst()))).filter(p -> p.getValue().isPresent())
.map(p -> addBootstrapBaseFileIfPresent(p.getKey(), p.getValue().get()));
} finally {
readLock.unlock();
}
}
@Override
public Void loadAllPartitions() {
try {
readLock.lock();
ensureAllPartitionsLoadedCorrectly();
return null;
} finally {
readLock.unlock();
}
}
@Override
public final Stream getAllBaseFiles(String partitionStr) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchAllBaseFiles(partitionPath)
.filter(df -> !isFileGroupReplaced(partitionPath, df.getFileId()))
.filter(df -> visibleCommitsAndCompactionTimeline.containsOrBeforeTimelineStarts(df.getCommitTime()))
.filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df))
.map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, df.getFileId()), df));
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestFileSlices(String partitionStr) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchLatestFileSlices(partitionPath)
.filter(slice -> !isFileGroupReplaced(slice.getFileGroupId()))
.flatMap(slice -> this.filterBaseFileAfterPendingCompaction(slice, true))
.map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
/**
* Get Latest File Slice for a given fileId in a given partition.
*/
@Override
public final Option getLatestFileSlice(String partitionStr, String fileId) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
if (isFileGroupReplaced(partitionPath, fileId)) {
return Option.empty();
} else {
Option fs = fetchLatestFileSlice(partitionPath, fileId);
if (!fs.isPresent()) {
return Option.empty();
}
return Option.ofNullable(filterBaseFileAfterPendingCompaction(fs.get(), true).map(this::addBootstrapBaseFileIfPresent).findFirst().orElse(null));
}
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestUnCompactedFileSlices(String partitionStr) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
return fetchAllStoredFileGroups(partitionPath)
.filter(fg -> !isFileGroupReplaced(fg.getFileGroupId()))
.map(fileGroup -> {
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
Option> compactionWithInstantPair =
getPendingCompactionOperationWithInstant(fileSlice.getFileGroupId());
if (compactionWithInstantPair.isPresent()) {
String compactionInstantTime = compactionWithInstantPair.get().getLeft();
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Option.of(fileSlice);
}).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestFileSlicesBeforeOrOn(String partitionStr, String maxCommitTime,
boolean includeFileSlicesInPendingCompaction) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
Stream> allFileSliceStream = fetchAllStoredFileGroups(partitionPath)
.filter(slice -> !isFileGroupReplacedBeforeOrOn(slice.getFileGroupId(), maxCommitTime))
.map(fg -> fg.getAllFileSlicesBeforeOn(maxCommitTime));
if (includeFileSlicesInPendingCompaction) {
return allFileSliceStream.map(sliceStream -> sliceStream.flatMap(slice -> this.filterBaseFileAfterPendingCompaction(slice, false)))
.map(sliceStream -> Option.fromJavaOptional(sliceStream.findFirst())).filter(Option::isPresent).map(Option::get)
.map(this::addBootstrapBaseFileIfPresent);
} else {
return allFileSliceStream
.map(sliceStream ->
Option.fromJavaOptional(sliceStream
.filter(slice -> !isPendingCompactionScheduledForFileId(slice.getFileGroupId()))
.filter(slice -> !slice.isEmpty())
.findFirst()))
.filter(Option::isPresent).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
}
} finally {
readLock.unlock();
}
}
@Override
public final Map> getAllLatestFileSlicesBeforeOrOn(String maxCommitTime) {
try {
readLock.lock();
List formattedPartitionList = ensureAllPartitionsLoadedCorrectly();
return formattedPartitionList.stream().collect(Collectors.toMap(
Function.identity(),
partitionPath -> fetchAllStoredFileGroups(partitionPath)
.filter(slice -> !isFileGroupReplacedBeforeOrOn(slice.getFileGroupId(), maxCommitTime))
.map(fg -> fg.getAllFileSlicesBeforeOn(maxCommitTime))
.map(sliceStream -> sliceStream.flatMap(slice -> this.filterBaseFileAfterPendingCompaction(slice, false)))
.map(sliceStream -> Option.fromJavaOptional(sliceStream.findFirst())).filter(Option::isPresent).map(Option::get)
.map(this::addBootstrapBaseFileIfPresent)
));
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestMergedFileSlicesBeforeOrOn(String partitionStr, String maxInstantTime) {
try {
readLock.lock();
String partition = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partition);
return fetchAllStoredFileGroups(partition)
.filter(fg -> !isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxInstantTime))
.map(fileGroup -> {
Option fileSlice = fileGroup.getLatestFileSliceBeforeOrOn(maxInstantTime);
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileSlice.isPresent()) {
fileSlice = Option.of(fetchMergedFileSlice(fileGroup, fileSlice.get()));
}
return fileSlice;
}).filter(Option::isPresent).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
/**
* Stream all "merged" file-slices before on an instant time
* for a MERGE_ON_READ table with index that can index log files(which means it writes pure logs first).
*
* In streaming read scenario, in order for better reading efficiency, the user can choose to skip the
* base files that are produced by compaction. That is to say, we allow the users to consumer only from
* these partitioned log files, these log files keep the record sequence just like the normal message queue.
*
*
NOTE: only local view is supported.
*
* @param partitionStr Partition Path
* @param maxInstantTime Max Instant Time
*/
public final Stream getAllLogsMergedFileSliceBeforeOrOn(String partitionStr, String maxInstantTime) {
try {
readLock.lock();
String partition = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partition);
return fetchAllStoredFileGroups(partition)
.filter(fg -> !isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxInstantTime))
.map(fileGroup -> fetchAllLogsMergedFileSlice(fileGroup, maxInstantTime))
.filter(Option::isPresent).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
@Override
public final Stream getLatestFileSliceInRange(List commitsToReturn) {
try {
readLock.lock();
return fetchLatestFileSliceInRange(commitsToReturn)
.filter(slice -> !isFileGroupReplacedBeforeAny(slice.getFileGroupId(), commitsToReturn))
.map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
@Override
public final Stream getAllFileSlices(String partitionStr) {
try {
readLock.lock();
String partition = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partition);
return fetchAllFileSlices(partition).filter(slice -> !isFileGroupReplaced(slice.getFileGroupId())).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
/**
* Ensure there is consistency in handling trailing slash in partition-path. Always trim it which is what is done in
* other places.
*/
private String formatPartitionKey(String partitionStr) {
return partitionStr.endsWith("/") ? partitionStr.substring(0, partitionStr.length() - 1) : partitionStr;
}
@Override
public final Stream getAllFileGroups(String partitionStr) {
return getAllFileGroupsIncludingReplaced(partitionStr).filter(fg -> !isFileGroupReplaced(fg));
}
private Stream getAllFileGroupsIncludingReplaced(final String partitionStr) {
try {
readLock.lock();
// Ensure there is consistency in handling trailing slash in partition-path. Always trim it which is what is done
// in other places.
String partition = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partition);
return fetchAllStoredFileGroups(partition).map(this::addBootstrapBaseFileIfPresent);
} finally {
readLock.unlock();
}
}
@Override
public Stream getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBeforeOrOn(fg.getFileGroupId(), maxCommitTime));
}
@Override
public Stream getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedBefore(fg.getFileGroupId(), maxCommitTime));
}
@Override
public Stream getReplacedFileGroupsAfterOrOn(String minCommitTime, String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplacedAfterOrOn(fg.getFileGroupId(), minCommitTime));
}
@Override
public Stream getAllReplacedFileGroups(String partitionPath) {
return getAllFileGroupsIncludingReplaced(partitionPath).filter(fg -> isFileGroupReplaced(fg.getFileGroupId()));
}
@Override
public final Stream> getFileGroupsInPendingClustering() {
try {
readLock.lock();
return fetchFileGroupsInPendingClustering();
} finally {
readLock.unlock();
}
}
// Fetch APIs to be implemented by concrete sub-classes
/**
* Check if there is an outstanding compaction scheduled for this file.
*
* @param fgId File-Group Id
* @return true if there is a pending compaction, false otherwise
*/
protected abstract boolean isPendingCompactionScheduledForFileId(HoodieFileGroupId fgId);
/**
* resets the pending compaction operation and overwrite with the new list.
*
* @param operations Pending Compaction Operations
*/
abstract void resetPendingCompactionOperations(Stream> operations);
/**
* Add pending compaction operations to store.
*
* @param operations Pending compaction operations to be added
*/
abstract void addPendingCompactionOperations(Stream> operations);
/**
* Remove pending compaction operations from store.
*
* @param operations Pending compaction operations to be removed
*/
abstract void removePendingCompactionOperations(Stream> operations);
/**
* Check if there is an outstanding log compaction scheduled for this file.
*
* @param fgId File-Group Id
* @return true if there is a pending log compaction, false otherwise
*/
protected abstract boolean isPendingLogCompactionScheduledForFileId(HoodieFileGroupId fgId);
/**
* resets the pending Log compaction operation and overwrite with the new list.
*
* @param operations Pending Log Compaction Operations
*/
abstract void resetPendingLogCompactionOperations(Stream> operations);
/**
* Add pending Log compaction operations to store.
*
* @param operations Pending Log compaction operations to be added
*/
abstract void addPendingLogCompactionOperations(Stream> operations);
/**
* Remove pending Log compaction operations from store.
*
* @param operations Pending Log compaction operations to be removed
*/
abstract void removePendingLogCompactionOperations(Stream> operations);
/**
* Check if there is an outstanding clustering operation (requested/inflight) scheduled for this file.
*
* @param fgId File-Group Id
* @return true if there is a pending clustering, false otherwise
*/
protected abstract boolean isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId);
/**
* Get pending clustering instant time for specified file group. Return None if file group is not in pending
* clustering operation.
*/
protected abstract Option getPendingClusteringInstant(final HoodieFileGroupId fileGroupId);
/**
* Fetch all file groups in pending clustering.
*/
protected abstract Stream> fetchFileGroupsInPendingClustering();
/**
* resets the pending clustering operation and overwrite with the new list.
*/
abstract void resetFileGroupsInPendingClustering(Map fgIdToInstantMap);
/**
* Add metadata for file groups in pending clustering operations to the view.
*/
abstract void addFileGroupsInPendingClustering(Stream> fileGroups);
/**
* Remove metadata for file groups in pending clustering operations from the view.
*/
abstract void removeFileGroupsInPendingClustering(Stream> fileGroups);
/**
* Return pending compaction operation for a file-group.
*
* @param fileGroupId File-Group Id
*/
protected abstract Option> getPendingCompactionOperationWithInstant(
HoodieFileGroupId fileGroupId);
/**
* Return pending Log compaction operation for a file-group.
*
* @param fileGroupId File-Group Id
*/
protected abstract Option> getPendingLogCompactionOperationWithInstant(
HoodieFileGroupId fileGroupId);
/**
* Fetch all pending compaction operations.
*/
abstract Stream> fetchPendingCompactionOperations();
/**
* Fetch all pending log compaction operations.
*/
abstract Stream> fetchPendingLogCompactionOperations();
/**
* Check if there is an bootstrap base file present for this file.
*
* @param fgId File-Group Id
* @return true if there is associated bootstrap base-file, false otherwise
*/
protected abstract boolean isBootstrapBaseFilePresentForFileId(HoodieFileGroupId fgId);
/**
* Resets the bootstrap base file stream and overwrite with the new list.
*
* @param bootstrapBaseFileStream bootstrap Base File Stream
*/
abstract void resetBootstrapBaseFileMapping(Stream bootstrapBaseFileStream);
/**
* Add bootstrap base file stream to store.
*
* @param bootstrapBaseFileStream bootstrap Base File Stream to be added
*/
abstract void addBootstrapBaseFileMapping(Stream bootstrapBaseFileStream);
/**
* Remove bootstrap base file stream from store.
*
* @param bootstrapBaseFileStream bootstrap Base File Stream to be removed
*/
abstract void removeBootstrapBaseFileMapping(Stream bootstrapBaseFileStream);
/**
* Return pending compaction operation for a file-group.
*
* @param fileGroupId File-Group Id
*/
protected abstract Option getBootstrapBaseFile(HoodieFileGroupId fileGroupId);
/**
* Fetch all bootstrap data files.
*/
abstract Stream fetchBootstrapBaseFiles();
/**
* Checks if partition is pre-loaded and available in store.
*
* @param partitionPath Partition Path
*/
abstract boolean isPartitionAvailableInStore(String partitionPath);
/**
* Add a complete partition view to store.
*
* @param partitionPath Partition Path
* @param fileGroups File Groups for the partition path
*/
abstract void storePartitionView(String partitionPath, List fileGroups);
/**
* Fetch all file-groups stored for a partition-path.
*
* @param partitionPath Partition path for which the file-groups needs to be retrieved.
* @return file-group stream
*/
abstract Stream fetchAllStoredFileGroups(String partitionPath);
/**
* Fetch all Stored file-groups across all partitions loaded.
*
* @return file-group stream
*/
abstract Stream fetchAllStoredFileGroups();
/**
* Track instant time for file groups replaced.
*/
protected abstract void resetReplacedFileGroups(final Map replacedFileGroups);
/**
* Track instant time for new file groups replaced.
*/
protected abstract void addReplacedFileGroups(final Map replacedFileGroups);
/**
* Remove file groups that are replaced in any of the specified instants.
*/
protected abstract void removeReplacedFileIdsAtInstants(Set instants);
/**
* Track instant time for file groups replaced.
*/
protected abstract Option getReplaceInstant(final HoodieFileGroupId fileGroupId);
/**
* Check if the view is already closed.
*/
abstract boolean isClosed();
/**
* Default implementation for fetching latest file-slice in commit range.
*
* @param commitsToReturn Commits
*/
Stream fetchLatestFileSliceInRange(List commitsToReturn) {
return fetchAllStoredFileGroups().map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
.map(Option::get).map(this::addBootstrapBaseFileIfPresent);
}
/**
* Default implementation for fetching all file-slices for a partition-path.
*
* @param partitionPath Partition path
* @return file-slice stream
*/
Stream fetchAllFileSlices(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath).map(this::addBootstrapBaseFileIfPresent)
.flatMap(HoodieFileGroup::getAllFileSlices);
}
/**
* Default implementation for fetching latest base-files for the partition-path.
*/
public Stream fetchLatestBaseFiles(final String partitionPath) {
return fetchAllStoredFileGroups(partitionPath)
.filter(fg -> !isFileGroupReplaced(fg))
.map(fg -> Pair.of(fg.getFileGroupId(), getLatestBaseFile(fg)))
.filter(p -> p.getValue().isPresent())
.map(p -> addBootstrapBaseFileIfPresent(p.getKey(), p.getValue().get()));
}
protected Option getLatestBaseFile(HoodieFileGroup fileGroup) {
return Option
.fromJavaOptional(fileGroup.getAllBaseFiles().filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst());
}
/**
* Fetch latest base-files across all partitions.
*/
private Stream fetchLatestBaseFiles() {
return fetchAllStoredFileGroups()
.filter(fg -> !isFileGroupReplaced(fg))
.map(fg -> Pair.of(fg.getFileGroupId(), getLatestBaseFile(fg)))
.filter(p -> p.getValue().isPresent())
.map(p -> addBootstrapBaseFileIfPresent(p.getKey(), p.getValue().get()));
}
/**
* Default implementation for fetching all base-files for a partition.
*
* @param partitionPath partition-path
*/
Stream fetchAllBaseFiles(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath).flatMap(HoodieFileGroup::getAllBaseFiles);
}
/**
* Default implementation for fetching file-group.
*/
Option fetchHoodieFileGroup(String partitionPath, String fileId) {
return Option.fromJavaOptional(fetchAllStoredFileGroups(partitionPath)
.filter(fileGroup -> fileGroup.getFileGroupId().getFileId().equals(fileId)).findFirst());
}
/**
* Default implementation for fetching latest file-slices for a partition path.
*/
Stream fetchLatestFileSlices(String partitionPath) {
return fetchAllStoredFileGroups(partitionPath).map(HoodieFileGroup::getLatestFileSlice).filter(Option::isPresent)
.map(Option::get);
}
/**
* Helper to merge last 2 file-slices. These 2 file-slices do not have compaction done yet.
*
* @param lastSlice Latest File slice for a file-group
* @param penultimateSlice Penultimate file slice for a file-group in commit timeline order
*/
private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) {
FileSlice merged = new FileSlice(penultimateSlice.getPartitionPath(), penultimateSlice.getBaseInstantTime(),
penultimateSlice.getFileId());
if (penultimateSlice.getBaseFile().isPresent()) {
merged.setBaseFile(penultimateSlice.getBaseFile().get());
}
// Add Log files from penultimate and last slices
penultimateSlice.getLogFiles().forEach(merged::addLogFile);
lastSlice.getLogFiles().forEach(merged::addLogFile);
return merged;
}
/**
* If the file-slice is because of pending compaction instant, this method merges the file-slice with the one before
* the compaction instant time.
*
* @param fileGroup File Group for which the file slice belongs to
* @param fileSlice File Slice which needs to be merged
*/
private FileSlice fetchMergedFileSlice(HoodieFileGroup fileGroup, FileSlice fileSlice) {
// if the file-group is under construction, pick the latest before compaction instant time.
Option> compactionOpWithInstant =
getPendingCompactionOperationWithInstant(fileGroup.getFileGroupId());
if (compactionOpWithInstant.isPresent()) {
String compactionInstantTime = compactionOpWithInstant.get().getKey();
if (fileSlice.getBaseInstantTime().equals(compactionInstantTime)) {
Option prevFileSlice = fileGroup.getLatestFileSliceBefore(compactionInstantTime);
if (prevFileSlice.isPresent()) {
return mergeCompactionPendingFileSlices(fileSlice, prevFileSlice.get());
}
}
}
return fileSlice;
}
/**
* Returns the file slice with all the file slice log files merged.
* CAUTION: the method requires that all the file slices must only contain log files.
*
* @param fileGroup File Group for which the file slice belongs to
* @param maxInstantTime The max instant time
*/
private Option fetchAllLogsMergedFileSlice(HoodieFileGroup fileGroup, String maxInstantTime) {
List fileSlices = fileGroup.getAllFileSlicesBeforeOn(maxInstantTime).collect(Collectors.toList());
if (fileSlices.size() == 0) {
return Option.empty();
}
if (fileSlices.size() == 1) {
return Option.of(fileSlices.get(0));
}
final FileSlice latestSlice = fileSlices.get(0);
FileSlice merged = new FileSlice(latestSlice.getPartitionPath(), latestSlice.getBaseInstantTime(),
latestSlice.getFileId());
// add log files from the latest slice to the earliest
fileSlices.forEach(slice -> slice.getLogFiles().forEach(merged::addLogFile));
return Option.of(merged);
}
/**
* Default implementation for fetching latest base-file.
*
* @param partitionPath Partition path
* @param fileId File Id
* @return base File if present
*/
protected Option fetchLatestBaseFile(String partitionPath, String fileId) {
return Option.fromJavaOptional(fetchLatestBaseFiles(partitionPath)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
}
/**
* Default implementation for fetching file-slice.
*
* @param partitionPath Partition path
* @param fileId File Id
* @return File Slice if present
*/
public Option fetchLatestFileSlice(String partitionPath, String fileId) {
return Option
.fromJavaOptional(fetchLatestFileSlices(partitionPath).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
}
private boolean isFileGroupReplaced(String partitionPath, String fileId) {
return isFileGroupReplaced(new HoodieFileGroupId(partitionPath, fileId));
}
private boolean isFileGroupReplaced(HoodieFileGroup fileGroup) {
return isFileGroupReplaced(fileGroup.getFileGroupId());
}
private boolean isFileGroupReplaced(HoodieFileGroupId fileGroup) {
return getReplaceInstant(fileGroup).isPresent();
}
private boolean isFileGroupReplacedBeforeAny(HoodieFileGroupId fileGroupId, List instants) {
return isFileGroupReplacedBeforeOrOn(fileGroupId, instants.stream().max(Comparator.naturalOrder()).get());
}
private boolean isFileGroupReplacedBefore(HoodieFileGroupId fileGroupId, String instant) {
Option hoodieInstantOption = getReplaceInstant(fileGroupId);
if (!hoodieInstantOption.isPresent()) {
return false;
}
return HoodieTimeline.compareTimestamps(instant, GREATER_THAN, hoodieInstantOption.get().getTimestamp());
}
private boolean isFileGroupReplacedBeforeOrOn(HoodieFileGroupId fileGroupId, String instant) {
Option hoodieInstantOption = getReplaceInstant(fileGroupId);
if (!hoodieInstantOption.isPresent()) {
return false;
}
return HoodieTimeline.compareTimestamps(instant, GREATER_THAN_OR_EQUALS, hoodieInstantOption.get().getTimestamp());
}
private boolean isFileGroupReplacedAfterOrOn(HoodieFileGroupId fileGroupId, String instant) {
Option hoodieInstantOption = getReplaceInstant(fileGroupId);
if (!hoodieInstantOption.isPresent()) {
return false;
}
return HoodieTimeline.compareTimestamps(instant, LESSER_THAN_OR_EQUALS, hoodieInstantOption.get().getTimestamp());
}
@Override
public Option getLastInstant() {
return getTimeline().lastInstant();
}
@Override
public HoodieTimeline getTimeline() {
return visibleCommitsAndCompactionTimeline;
}
/**
* Syncs the file system view from storage to memory. Performs complete reset of file-system
* view. Subsequent partition view calls will load file slices against the latest timeline.
*
* NOTE: The logic MUST BE guarded by the write lock.
*/
@Override
public void sync() {
try {
writeLock.lock();
HoodieTimeline newTimeline = metaClient.reloadActiveTimeline().filterCompletedOrMajorOrMinorCompactionInstants();
clear();
// Initialize with new Hoodie timeline.
init(metaClient, newTimeline);
} finally {
writeLock.unlock();
}
}
/**
* Return Only Commits and Compaction timeline for building file-groups.
*
* @return {@code HoodieTimeline}
*/
public HoodieTimeline getVisibleCommitsAndCompactionTimeline() {
return visibleCommitsAndCompactionTimeline;
}
}