org.apache.hudi.source.IncrementalInputSplits Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.source;
import org.apache.hudi.common.model.BaseFile;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.cdc.HoodieCDCExtractor;
import org.apache.hudi.common.table.cdc.HoodieCDCFileSplit;
import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
import org.apache.hudi.common.table.log.InstantRange;
import org.apache.hudi.common.table.read.IncrementalQueryAnalyzer;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.InstantComparison;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.configuration.OptionsResolver;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
import org.apache.hudi.source.prune.PartitionPruners;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.table.format.cdc.CdcInputSplit;
import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.table.types.logical.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Utilities to generate incremental input splits {@link MergeOnReadInputSplit}.
* The input splits are used for streaming and incremental read.
*
* How to generate the input splits:
*
* - first fetch all the commit metadata for the incremental instants;
* - resolve the incremental commit file paths;
* - filter the full file paths by required partitions;
* - use the file paths from #step 3 as the back-up of the filesystem view.
*
*/
public class IncrementalInputSplits implements Serializable {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(IncrementalInputSplits.class);
private final Configuration conf;
private final Path path;
private final RowType rowType;
private final long maxCompactionMemoryInBytes;
// for partition pruning
private final PartitionPruners.PartitionPruner partitionPruner;
// skip compaction
private final boolean skipCompaction;
// skip clustering
private final boolean skipClustering;
// skip insert overwrite
private final boolean skipInsertOverwrite;
private IncrementalInputSplits(
Configuration conf,
Path path,
RowType rowType,
long maxCompactionMemoryInBytes,
@Nullable PartitionPruners.PartitionPruner partitionPruner,
boolean skipCompaction,
boolean skipClustering,
boolean skipInsertOverwrite) {
this.conf = conf;
this.path = path;
this.rowType = rowType;
this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
this.partitionPruner = partitionPruner;
this.skipCompaction = skipCompaction;
this.skipClustering = skipClustering;
this.skipInsertOverwrite = skipInsertOverwrite;
}
/**
* Returns the builder.
*/
public static Builder builder() {
return new Builder();
}
/**
* Returns the incremental input splits.
*
* @param metaClient The meta client
* @param cdcEnabled Whether cdc is enabled
*
* @return The list of incremental input splits or empty if there are no new instants
*/
public Result inputSplits(
HoodieTableMetaClient metaClient,
boolean cdcEnabled) {
IncrementalQueryAnalyzer analyzer = IncrementalQueryAnalyzer.builder()
.metaClient(metaClient)
.startCompletionTime(this.conf.getString(FlinkOptions.READ_START_COMMIT))
.endCompletionTime(this.conf.getString(FlinkOptions.READ_END_COMMIT))
.rangeType(InstantRange.RangeType.CLOSED_CLOSED)
.skipCompaction(skipCompaction)
.skipClustering(skipClustering)
.skipInsertOverwrite(skipInsertOverwrite)
.readCdcFromChangelog(this.conf.getBoolean(FlinkOptions.READ_CDC_FROM_CHANGELOG))
.build();
IncrementalQueryAnalyzer.QueryContext analyzingResult = analyzer.analyze();
if (analyzingResult.isEmpty()) {
LOG.info("No new instant found for the table under path " + path + ", skip reading");
return Result.EMPTY;
}
final HoodieTimeline commitTimeline = analyzingResult.getActiveTimeline();
final boolean startFromEarliest = analyzingResult.isConsumingFromEarliest();
final boolean hasArchivedInstants = !analyzingResult.getArchivedInstants().isEmpty();
// We better add another premise: whether the endCommit is cleaned.
boolean fullTableScan = startFromEarliest || hasArchivedInstants;
// Step1: generates the instant range
// if the specified end commit is archived, still uses the specified timestamp,
// else uses the latest filtered instant time
// (would be the latest instant time if the specified end commit is greater than the latest instant time)
final InstantRange instantRange = analyzingResult.getInstantRange().orElse(null);
// Step2: decides the read end commit
final String endInstant = analyzingResult.getLastInstant();
// Step3: find out the files to read, tries to read the files from the commit metadata first,
// fallback to full table scan if any of the following conditions matches:
// 1. there are files in metadata be deleted;
// 2. read from earliest
// 3. the start commit is archived
// 4. the end commit is archived
Set readPartitions;
final List fileInfoList;
if (fullTableScan) {
// scans the partitions and files directly.
FileIndex fileIndex = getFileIndex();
readPartitions = new TreeSet<>(fileIndex.getOrBuildPartitionPaths());
if (readPartitions.size() == 0) {
LOG.warn("No partitions found for reading in user provided path.");
return Result.EMPTY;
}
fileInfoList = fileIndex.getFilesInPartitions();
} else {
if (cdcEnabled) {
// case1: cdc change log enabled
List inputSplits = getCdcInputSplits(metaClient, instantRange);
return Result.instance(inputSplits, endInstant);
}
// case2: normal incremental read
String tableName = conf.getString(FlinkOptions.TABLE_NAME);
List instants = analyzingResult.getActiveInstants();
List metadataList = instants.stream()
.map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, commitTimeline))
.collect(Collectors.toList());
readPartitions = getReadPartitions(metadataList);
if (readPartitions.size() == 0) {
LOG.warn("No partitions found for reading in user provided path.");
return Result.EMPTY;
}
List files = WriteProfiles.getFilesFromMetadata(
path, (org.apache.hadoop.conf.Configuration) metaClient.getStorageConf().unwrap(),
metadataList, metaClient.getTableType(), false);
if (files == null) {
LOG.warn("Found deleted files in metadata, fall back to full table scan.");
// fallback to full table scan
// reading from the earliest, scans the partitions and files directly.
FileIndex fileIndex = getFileIndex();
readPartitions = new TreeSet<>(fileIndex.getOrBuildPartitionPaths());
if (readPartitions.size() == 0) {
LOG.warn("No partitions found for reading in user provided path.");
return Result.EMPTY;
}
fileInfoList = fileIndex.getFilesInPartitions();
} else {
fileInfoList = files;
}
}
if (fileInfoList.size() == 0) {
LOG.warn("No files found for reading in user provided path.");
return Result.EMPTY;
}
List inputSplits = getInputSplits(metaClient, commitTimeline,
fileInfoList, readPartitions, endInstant, analyzingResult.getMaxCompletionTime(), instantRange, false);
return Result.instance(inputSplits, endInstant);
}
/**
* Returns the incremental input splits.
*
* @param metaClient The meta client
* @param issuedOffset The last issued offset, only valid in streaming read
* @param cdcEnabled Whether cdc is enabled
*
* @return The list of incremental input splits or empty if there are no new instants
*/
public Result inputSplits(
HoodieTableMetaClient metaClient,
@Nullable String issuedOffset,
boolean cdcEnabled) {
metaClient.reloadActiveTimeline();
IncrementalQueryAnalyzer analyzer = IncrementalQueryAnalyzer.builder()
.metaClient(metaClient)
.startCompletionTime(issuedOffset != null ? issuedOffset : this.conf.getString(FlinkOptions.READ_START_COMMIT))
.endCompletionTime(this.conf.getString(FlinkOptions.READ_END_COMMIT))
.rangeType(issuedOffset != null ? InstantRange.RangeType.OPEN_CLOSED : InstantRange.RangeType.CLOSED_CLOSED)
.skipCompaction(skipCompaction)
.skipClustering(skipClustering)
.skipInsertOverwrite(skipInsertOverwrite)
.readCdcFromChangelog(this.conf.getBoolean(FlinkOptions.READ_CDC_FROM_CHANGELOG))
.limit(OptionsResolver.getReadCommitsLimit(conf))
.build();
IncrementalQueryAnalyzer.QueryContext queryContext = analyzer.analyze();
if (queryContext.isEmpty()) {
LOG.info("No new instant found for the table under path " + path + ", skip reading");
return Result.EMPTY;
}
HoodieTimeline commitTimeline = queryContext.getActiveTimeline();
// get the latest instant that satisfies condition
final String endInstant = queryContext.getLastInstant();
final Option instantRange = queryContext.getInstantRange();
// version number should be monotonically increasing
// fetch the instant offset by completion time
String offsetToIssue = queryContext.getMaxCompletionTime();
if (instantRange.isEmpty()) {
// reading from the earliest, scans the partitions and files directly.
FileIndex fileIndex = getFileIndex();
Set readPartitions = new TreeSet<>(fileIndex.getOrBuildPartitionPaths());
if (readPartitions.size() == 0) {
LOG.warn("No partitions found for reading under path: " + path);
return Result.EMPTY;
}
List pathInfoList = fileIndex.getFilesInPartitions();
if (pathInfoList.size() == 0) {
LOG.warn("No files found for reading under path: " + path);
return Result.EMPTY;
}
List inputSplits = getInputSplits(metaClient, commitTimeline,
pathInfoList, readPartitions, endInstant, offsetToIssue, null, false);
return Result.instance(inputSplits, endInstant, offsetToIssue);
} else {
List inputSplits = getIncInputSplits(
metaClient, (org.apache.hadoop.conf.Configuration) metaClient.getStorageConf().unwrap(),
commitTimeline, queryContext, instantRange.get(), endInstant, cdcEnabled);
return Result.instance(inputSplits, endInstant, offsetToIssue);
}
}
/**
* Returns the input splits for streaming incremental read.
*/
private List getIncInputSplits(
HoodieTableMetaClient metaClient,
org.apache.hadoop.conf.Configuration hadoopConf,
HoodieTimeline commitTimeline,
IncrementalQueryAnalyzer.QueryContext queryContext,
InstantRange instantRange,
String endInstant,
boolean cdcEnabled) {
// streaming read
if (cdcEnabled) {
// case1: cdc change log enabled
return getCdcInputSplits(metaClient, instantRange);
}
// case2: normal streaming read
String tableName = conf.getString(FlinkOptions.TABLE_NAME);
List activeMetadataList = queryContext.getActiveInstants().stream()
.map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, commitTimeline)).collect(Collectors.toList());
List archivedMetadataList = queryContext.getArchivedInstants().stream()
.map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, queryContext.getArchivedTimeline())).collect(Collectors.toList());
if (archivedMetadataList.size() > 0) {
LOG.warn("\n"
+ "--------------------------------------------------------------------------------\n"
+ "---------- caution: the reader has fall behind too much from the writer,\n"
+ "---------- tweak 'read.tasks' option to add parallelism of read tasks.\n"
+ "--------------------------------------------------------------------------------");
}
// IMPORTANT: the merged metadata list must be in ascending order by instant time
List metadataList = mergeList(archivedMetadataList, activeMetadataList);
Set readPartitions = getReadPartitions(metadataList);
if (readPartitions.size() == 0) {
LOG.warn("No partitions found for reading under path: " + path);
return Collections.emptyList();
}
List pathInfoList = WriteProfiles.getFilesFromMetadata(
path, hadoopConf, metadataList, metaClient.getTableType());
if (pathInfoList.size() == 0) {
LOG.warn("No files found for reading under path: " + path);
return Collections.emptyList();
}
return getInputSplits(metaClient, commitTimeline,
pathInfoList, readPartitions, endInstant, queryContext.getMaxCompletionTime(), instantRange, skipCompaction);
}
private List getInputSplits(
HoodieTableMetaClient metaClient,
HoodieTimeline commitTimeline,
List pathInfoList,
Set readPartitions,
String endInstant,
String maxCompletionTime,
InstantRange instantRange,
boolean skipBaseFiles) {
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, commitTimeline, pathInfoList);
final AtomicInteger cnt = new AtomicInteger(0);
final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
return readPartitions.stream()
.map(relPartitionPath -> getFileSlices(fsView, relPartitionPath, maxCompletionTime, skipBaseFiles)
.map(fileSlice -> {
Option> logPaths = Option.ofNullable(fileSlice.getLogFiles()
.sorted(HoodieLogFile.getLogFileComparator())
.map(logFile -> logFile.getPath().toString())
.filter(logPath -> !logPath.endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX))
.collect(Collectors.toList()));
String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
// the latest commit is used as the limit of the log reader instant upper threshold,
// it must be at least the latest instant time of the file slice to avoid data loss.
String latestCommit = InstantComparison.minInstant(fileSlice.getLatestInstantTime(), endInstant);
return new MergeOnReadInputSplit(cnt.getAndAdd(1),
basePath, logPaths, latestCommit,
metaClient.getBasePath().toString(), maxCompactionMemoryInBytes, mergeType, instantRange, fileSlice.getFileId());
}).collect(Collectors.toList()))
.flatMap(Collection::stream)
.sorted(Comparator.comparing(MergeOnReadInputSplit::getLatestCommit))
.collect(Collectors.toList());
}
private List getCdcInputSplits(
HoodieTableMetaClient metaClient,
InstantRange instantRange) {
HoodieCDCExtractor extractor = new HoodieCDCExtractor(metaClient, instantRange, OptionsResolver.readCDCFromChangelog(this.conf));
Map> fileSplits = extractor.extractCDCFileSplits();
if (fileSplits.isEmpty()) {
LOG.warn("No change logs found for reading in path: " + path);
return Collections.emptyList();
}
final AtomicInteger cnt = new AtomicInteger(0);
return fileSplits.entrySet().stream()
.map(splits ->
new CdcInputSplit(cnt.getAndAdd(1), metaClient.getBasePath().toString(), maxCompactionMemoryInBytes,
splits.getKey().getFileId(), splits.getValue().stream().sorted().toArray(HoodieCDCFileSplit[]::new)))
.collect(Collectors.toList());
}
private static Stream getFileSlices(
HoodieTableFileSystemView fsView,
String relPartitionPath,
String endInstant,
boolean skipBaseFiles) {
return skipBaseFiles ? fsView.getAllLogsMergedFileSliceBeforeOrOn(relPartitionPath, endInstant)
: fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, endInstant);
}
private FileIndex getFileIndex() {
return FileIndex.builder()
.path(new StoragePath(path.toUri()))
.conf(conf)
.rowType(rowType)
.partitionPruner(partitionPruner)
.build();
}
/**
* Returns the partitions to read with given metadata list.
* The partitions would be filtered by the pushed down required partitions.
*
* @param metadataList The metadata list
* @return the set of read partitions
*/
private Set getReadPartitions(List metadataList) {
Set partitions = HoodieTableMetadataUtil.getWritePartitionPaths(metadataList);
// apply partition push down
if (this.partitionPruner != null) {
Set selectedPartitions = this.partitionPruner.filter(partitions);
double total = partitions.size();
double selectedNum = selectedPartitions.size();
double percentPruned = total == 0 ? 0 : (1 - selectedNum / total) * 100;
LOG.info("Selected " + selectedNum + " partitions out of " + total
+ ", pruned " + percentPruned + "% partitions.");
return selectedPartitions;
}
return partitions;
}
private static List mergeList(List list1, List list2) {
if (list1.isEmpty()) {
return list2;
}
if (list2.isEmpty()) {
return list1;
}
List merged = new ArrayList<>(list1);
merged.addAll(list2);
return merged;
}
// -------------------------------------------------------------------------
// Inner Class
// -------------------------------------------------------------------------
/**
* Represents a result of calling {@link #inputSplits}.
*/
public static class Result {
private final List inputSplits; // input splits
private final String endInstant; // end instant to consume to
private final String offset; // monotonic increasing consumption offset
public static final Result EMPTY = instance(Collections.emptyList(), "");
public boolean isEmpty() {
return this.inputSplits.size() == 0;
}
public List getInputSplits() {
return this.inputSplits;
}
public String getEndInstant() {
return this.endInstant;
}
@Nullable
public String getOffset() {
return offset;
}
private Result(List inputSplits, String endInstant, @Nullable String offset) {
this.inputSplits = inputSplits;
this.endInstant = endInstant;
this.offset = offset;
}
public static Result instance(List inputSplits, String endInstant) {
return new Result(inputSplits, endInstant, null);
}
public static Result instance(List inputSplits, String endInstant, String offset) {
return new Result(inputSplits, endInstant, offset);
}
}
/**
* Builder for {@link IncrementalInputSplits}.
*/
public static class Builder {
private Configuration conf;
private Path path;
private RowType rowType;
private long maxCompactionMemoryInBytes;
// for partition pruning
private PartitionPruners.PartitionPruner partitionPruner;
// skip compaction
private boolean skipCompaction = false;
// skip clustering
private boolean skipClustering = false;
// skip insert overwrite
private boolean skipInsertOverwrite = false;
public Builder() {
}
public Builder conf(Configuration conf) {
this.conf = conf;
return this;
}
public Builder path(Path path) {
this.path = path;
return this;
}
public Builder rowType(RowType rowType) {
this.rowType = rowType;
return this;
}
public Builder maxCompactionMemoryInBytes(long maxCompactionMemoryInBytes) {
this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
return this;
}
public Builder partitionPruner(@Nullable PartitionPruners.PartitionPruner partitionPruner) {
this.partitionPruner = partitionPruner;
return this;
}
public Builder skipCompaction(boolean skipCompaction) {
this.skipCompaction = skipCompaction;
return this;
}
public Builder skipClustering(boolean skipClustering) {
this.skipClustering = skipClustering;
return this;
}
public Builder skipInsertOverwrite(boolean skipInsertOverwrite) {
this.skipInsertOverwrite = skipInsertOverwrite;
return this;
}
public IncrementalInputSplits build() {
return new IncrementalInputSplits(
Objects.requireNonNull(this.conf), Objects.requireNonNull(this.path), Objects.requireNonNull(this.rowType),
this.maxCompactionMemoryInBytes, this.partitionPruner, this.skipCompaction, this.skipClustering, this.skipInsertOverwrite);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy