org.apache.iceberg.MergingSnapshotProducer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-core Show documentation
Show all versions of iceberg-core Show documentation
A table format for huge analytic datasets
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.ManifestEvaluator;
import org.apache.iceberg.expressions.Projections;
import org.apache.iceberg.expressions.StrictMetricsEvaluator;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.util.BinPacking.ListPacker;
import org.apache.iceberg.util.CharSequenceWrapper;
import org.apache.iceberg.util.ManifestFileUtil;
import org.apache.iceberg.util.StructLikeWrapper;
import org.apache.iceberg.util.Tasks;
import org.apache.iceberg.util.ThreadPools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT;
import static org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT_DEFAULT;
import static org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES;
import static org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT;
abstract class MergingSnapshotProducer extends SnapshotProducer {
private static final Logger LOG = LoggerFactory.getLogger(MergingSnapshotProducer.class);
private static final Joiner COMMA = Joiner.on(",");
protected static class DeleteException extends ValidationException {
private final String partition;
private DeleteException(String partition) {
super("Operation would delete existing data");
this.partition = partition;
}
public String partition() {
return partition;
}
}
private final TableOperations ops;
private final PartitionSpec spec;
private final long manifestTargetSizeBytes;
private final int minManifestsCountToMerge;
private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder();
private final boolean mergeEnabled;
// update data
private final AtomicInteger manifestCount = new AtomicInteger(0);
private final List newFiles = Lists.newArrayList();
private final List appendManifests = Lists.newArrayList();
private final SnapshotSummary.Builder appendedManifestsSummary = SnapshotSummary.builder();
private final Set deletePaths = Sets.newHashSet();
private final Set deleteFilePartitions = Sets.newHashSet();
private final Set dropPartitions = Sets.newHashSet();
private Expression deleteExpression = Expressions.alwaysFalse();
private boolean hasPathOnlyDeletes = false;
private boolean failAnyDelete = false;
private boolean failMissingDeletePaths = false;
// cache the new manifest once it is written
private ManifestFile cachedNewManifest = null;
private boolean hasNewFiles = false;
// cache merge results to reuse when retrying
private final Map, ManifestFile> mergeManifests = Maps.newConcurrentMap();
// cache filtered manifests to avoid extra work when commits fail.
private final Map filteredManifests = Maps.newConcurrentMap();
// tracking where files were deleted to validate retries quickly
private final Map> filteredManifestToDeletedFiles =
Maps.newConcurrentMap();
private boolean filterUpdated = false; // used to clear caches of filtered and merged manifests
MergingSnapshotProducer(TableOperations ops) {
super(ops);
this.ops = ops;
this.spec = ops.current().spec();
this.manifestTargetSizeBytes = ops.current()
.propertyAsLong(MANIFEST_TARGET_SIZE_BYTES, MANIFEST_TARGET_SIZE_BYTES_DEFAULT);
this.minManifestsCountToMerge = ops.current()
.propertyAsInt(MANIFEST_MIN_MERGE_COUNT, MANIFEST_MIN_MERGE_COUNT_DEFAULT);
this.mergeEnabled = ops.current()
.propertyAsBoolean(TableProperties.MANIFEST_MERGE_ENABLED, TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT);
}
@Override
public ThisT set(String property, String value) {
summaryBuilder.set(property, value);
return self();
}
protected PartitionSpec writeSpec() {
// the spec is set when the write is started
return spec;
}
protected Expression rowFilter() {
return deleteExpression;
}
protected List addedFiles() {
return ImmutableList.copyOf(newFiles);
}
protected void failAnyDelete() {
this.failAnyDelete = true;
}
protected void failMissingDeletePaths() {
this.failMissingDeletePaths = true;
}
/**
* Add a filter to match files to delete. A file will be deleted if all of the rows it contains
* match this or any other filter passed to this method.
*
* @param expr an expression to match rows.
*/
protected void deleteByRowFilter(Expression expr) {
Preconditions.checkNotNull(expr, "Cannot delete files using filter: null");
this.filterUpdated = true;
this.deleteExpression = Expressions.or(deleteExpression, expr);
}
/**
* Add a partition tuple to drop from the table during the delete phase.
*/
protected void dropPartition(StructLike partition) {
dropPartitions.add(StructLikeWrapper.wrap(partition));
}
/**
* Add a specific path to be deleted in the new snapshot.
*/
protected void delete(DataFile file) {
Preconditions.checkNotNull(file, "Cannot delete file: null");
this.filterUpdated = true;
deletePaths.add(CharSequenceWrapper.wrap(file.path()));
deleteFilePartitions.add(StructLikeWrapper.wrap(file.partition()));
}
/**
* Add a specific path to be deleted in the new snapshot.
*/
protected void delete(CharSequence path) {
Preconditions.checkNotNull(path, "Cannot delete file path: null");
this.filterUpdated = true;
this.hasPathOnlyDeletes = true;
deletePaths.add(CharSequenceWrapper.wrap(path));
}
/**
* Add a file to the new snapshot.
*/
protected void add(DataFile file) {
hasNewFiles = true;
newFiles.add(file);
}
/**
* Add all files in a manifest to the new snapshot.
*/
protected void add(ManifestFile manifest) {
// the manifest must be rewritten with this update's snapshot ID
try (ManifestReader reader = ManifestReader.read(
ops.io().newInputFile(manifest.path()), ops.current()::spec)) {
appendManifests.add(ManifestWriter.copyAppendManifest(
reader, manifestPath(manifestCount.getAndIncrement()), snapshotId(), appendedManifestsSummary));
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to close manifest: %s", manifest);
}
}
@Override
protected Map summary() {
return summaryBuilder.build();
}
@Override
public List apply(TableMetadata base) {
summaryBuilder.clear();
summaryBuilder.merge(appendedManifestsSummary);
if (filterUpdated) {
cleanUncommittedFilters(SnapshotProducer.EMPTY_SET);
this.filterUpdated = false;
}
Snapshot current = base.currentSnapshot();
Map> groups = Maps.newTreeMap(Comparator.reverseOrder());
// use a common metrics evaluator for all manifests because it is bound to the table schema
StrictMetricsEvaluator metricsEvaluator = new StrictMetricsEvaluator(
ops.current().schema(), deleteExpression);
// add the current spec as the first group. files are added to the beginning.
try {
Iterable newManifests;
if (newFiles.size() > 0) {
// add all of the new files to the summary builder
for (DataFile file : newFiles) {
summaryBuilder.addedFile(spec, file);
}
newManifests = Iterables.concat(ImmutableList.of(newFilesAsManifest()), appendManifests);
} else {
newManifests = appendManifests;
}
// filter any existing manifests
List filtered;
if (current != null) {
List manifests = current.manifests();
filtered = Arrays.asList(filterManifests(metricsEvaluator, manifests));
} else {
filtered = ImmutableList.of();
}
Iterable unmergedManifests = Iterables.filter(
Iterables.concat(newManifests, filtered),
// only keep manifests that have live data files or that were written by this commit
manifest -> manifest.hasAddedFiles() || manifest.hasExistingFiles() || manifest.snapshotId() == snapshotId());
Set deletedFiles = deletedFiles(unmergedManifests);
List manifests = Lists.newArrayList();
if (mergeEnabled) {
groupManifestsByPartitionSpec(groups, unmergedManifests);
for (Map.Entry> entry : groups.entrySet()) {
Iterables.addAll(manifests, mergeGroup(entry.getKey(), entry.getValue()));
}
} else {
Iterables.addAll(manifests, unmergedManifests);
}
ValidationException.check(!failMissingDeletePaths || deletedFiles.containsAll(deletePaths),
"Missing required files to delete: %s",
COMMA.join(Iterables.transform(Iterables.filter(deletePaths,
path -> !deletedFiles.contains(path)),
CharSequenceWrapper::get)));
return manifests;
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to create snapshot manifest list");
}
}
private ManifestFile[] filterManifests(StrictMetricsEvaluator metricsEvaluator, List manifests)
throws IOException {
ManifestFile[] filtered = new ManifestFile[manifests.size()];
// open all of the manifest files in parallel, use index to avoid reordering
Tasks.range(filtered.length)
.stopOnFailure().throwFailureWhenFinished()
.executeWith(ThreadPools.getWorkerPool())
.run(index -> {
ManifestFile manifest = filterManifest(metricsEvaluator, manifests.get(index));
filtered[index] = manifest;
}, IOException.class);
return filtered;
}
private Set deletedFiles(Iterable manifests) {
Set deletedFiles = Sets.newHashSet();
for (ManifestFile manifest : manifests) {
PartitionSpec manifestSpec = ops.current().spec(manifest.partitionSpecId());
Iterable manifestDeletes = filteredManifestToDeletedFiles.get(manifest);
if (manifestDeletes != null) {
for (DataFile file : manifestDeletes) {
summaryBuilder.deletedFile(manifestSpec, file);
deletedFiles.add(CharSequenceWrapper.wrap(file.path()));
}
}
}
return deletedFiles;
}
private void groupManifestsByPartitionSpec(Map> groups, Iterable filtered) {
for (ManifestFile manifest : filtered) {
List group = groups.get(manifest.partitionSpecId());
if (group != null) {
group.add(manifest);
} else {
group = Lists.newArrayList();
group.add(manifest);
groups.put(manifest.partitionSpecId(), group);
}
}
}
private void cleanUncommittedMerges(Set committed) {
// iterate over a copy of entries to avoid concurrent modification
List, ManifestFile>> entries =
Lists.newArrayList(mergeManifests.entrySet());
for (Map.Entry, ManifestFile> entry : entries) {
// delete any new merged manifests that aren't in the committed list
ManifestFile merged = entry.getValue();
if (!committed.contains(merged)) {
deleteFile(merged.path());
// remove the deleted file from the cache
mergeManifests.remove(entry.getKey());
}
}
}
private void cleanUncommittedFilters(Set committed) {
// iterate over a copy of entries to avoid concurrent modification
List> filterEntries =
Lists.newArrayList(filteredManifests.entrySet());
for (Map.Entry entry : filterEntries) {
// remove any new filtered manifests that aren't in the committed list
ManifestFile manifest = entry.getKey();
ManifestFile filtered = entry.getValue();
if (!committed.contains(filtered)) {
// only delete if the filtered copy was created
if (!manifest.equals(filtered)) {
deleteFile(filtered.path());
}
// remove the entry from the cache
filteredManifests.remove(manifest);
}
}
}
private void cleanUncommittedAppends(Set committed) {
if (cachedNewManifest != null && !committed.contains(cachedNewManifest)) {
deleteFile(cachedNewManifest.path());
this.cachedNewManifest = null;
}
for (ManifestFile manifest : appendManifests) {
if (!committed.contains(manifest)) {
deleteFile(manifest.path());
}
}
}
@Override
protected void cleanUncommitted(Set committed) {
cleanUncommittedMerges(committed);
cleanUncommittedFilters(committed);
cleanUncommittedAppends(committed);
}
private boolean canContainDeletedFiles(ManifestFile manifest) {
boolean canContainExpressionDeletes;
if (deleteExpression != null && deleteExpression != Expressions.alwaysFalse()) {
ManifestEvaluator manifestEvaluator =
ManifestEvaluator.forRowFilter(deleteExpression, ops.current().spec(), true);
canContainExpressionDeletes = manifestEvaluator.eval(manifest);
} else {
canContainExpressionDeletes = false;
}
boolean canContainDroppedPartitions;
if (dropPartitions.size() > 0) {
canContainDroppedPartitions = ManifestFileUtil.canContainAny(
manifest,
Iterables.transform(dropPartitions, StructLikeWrapper::get),
specId -> ops.current().spec(specId));
} else {
canContainDroppedPartitions = false;
}
boolean canContainDroppedFiles;
if (hasPathOnlyDeletes) {
canContainDroppedFiles = true;
} else if (deletePaths.size() > 0) {
// because there were no path-only deletes, the set of deleted file partitions is valid
canContainDroppedFiles = ManifestFileUtil.canContainAny(
manifest,
Iterables.transform(deleteFilePartitions, StructLikeWrapper::get),
specId -> ops.current().spec(specId));
} else {
canContainDroppedFiles = false;
}
return canContainExpressionDeletes || canContainDroppedPartitions || canContainDroppedFiles;
}
/**
* @return a ManifestReader that is a filtered version of the input manifest.
*/
private ManifestFile filterManifest(StrictMetricsEvaluator metricsEvaluator,
ManifestFile manifest) throws IOException {
ManifestFile cached = filteredManifests.get(manifest);
if (cached != null) {
return cached;
}
boolean hasLiveFiles = manifest.hasAddedFiles() || manifest.hasExistingFiles();
if (!hasLiveFiles || !canContainDeletedFiles(manifest)) {
filteredManifests.put(manifest, manifest);
return manifest;
}
try (ManifestReader reader = ManifestReader.read(
ops.io().newInputFile(manifest.path()), ops.current()::spec)) {
// this is reused to compare file paths with the delete set
CharSequenceWrapper pathWrapper = CharSequenceWrapper.wrap("");
// reused to compare file partitions with the drop set
StructLikeWrapper partitionWrapper = StructLikeWrapper.wrap(null);
// this assumes that the manifest doesn't have files to remove and streams through the
// manifest without copying data. if a manifest does have a file to remove, this will break
// out of the loop and move on to filtering the manifest.
boolean hasDeletedFiles =
manifestHasDeletedFiles(metricsEvaluator, reader, pathWrapper, partitionWrapper);
if (!hasDeletedFiles) {
filteredManifests.put(manifest, manifest);
return manifest;
}
return filterManifestWithDeletedFiles(metricsEvaluator, manifest, reader, pathWrapper,
partitionWrapper);
}
}
private boolean manifestHasDeletedFiles(
StrictMetricsEvaluator metricsEvaluator, ManifestReader reader,
CharSequenceWrapper pathWrapper, StructLikeWrapper partitionWrapper) {
Evaluator inclusive = extractInclusiveDeleteExpression(reader);
Evaluator strict = extractStrictDeleteExpression(reader);
boolean hasDeletedFiles = false;
for (ManifestEntry entry : reader.entries()) {
DataFile file = entry.file();
boolean fileDelete = deletePaths.contains(pathWrapper.set(file.path())) ||
dropPartitions.contains(partitionWrapper.set(file.partition()));
if (fileDelete || inclusive.eval(file.partition())) {
ValidationException.check(
fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file),
"Cannot delete file where some, but not all, rows match filter %s: %s",
this.deleteExpression, file.path());
hasDeletedFiles = true;
if (failAnyDelete) {
throw new DeleteException(writeSpec().partitionToPath(file.partition()));
}
break; // as soon as a deleted file is detected, stop scanning
}
}
return hasDeletedFiles;
}
private ManifestFile filterManifestWithDeletedFiles(
StrictMetricsEvaluator metricsEvaluator, ManifestFile manifest, ManifestReader reader,
CharSequenceWrapper pathWrapper, StructLikeWrapper partitionWrapper) throws IOException {
Evaluator inclusive = extractInclusiveDeleteExpression(reader);
Evaluator strict = extractStrictDeleteExpression(reader);
// when this point is reached, there is at least one file that will be deleted in the
// manifest. produce a copy of the manifest with all deleted files removed.
List deletedFiles = Lists.newArrayList();
Set deletedPaths = Sets.newHashSet();
OutputFile filteredCopy = manifestPath(manifestCount.getAndIncrement());
ManifestWriter writer = new ManifestWriter(reader.spec(), filteredCopy, snapshotId());
try {
reader.entries().forEach(entry -> {
DataFile file = entry.file();
boolean fileDelete = deletePaths.contains(pathWrapper.set(file.path())) ||
dropPartitions.contains(partitionWrapper.set(file.partition()));
if (entry.status() != Status.DELETED) {
if (fileDelete || inclusive.eval(file.partition())) {
ValidationException.check(
fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file),
"Cannot delete file where some, but not all, rows match filter %s: %s",
this.deleteExpression, file.path());
writer.delete(entry);
CharSequenceWrapper wrapper = CharSequenceWrapper.wrap(entry.file().path());
if (deletedPaths.contains(wrapper)) {
LOG.warn("Deleting a duplicate path from manifest {}: {}",
manifest.path(), wrapper.get());
summaryBuilder.incrementDuplicateDeletes();
} else {
// only add the file to deletes if it is a new delete
// this keeps the snapshot summary accurate for non-duplicate data
deletedFiles.add(entry.file().copyWithoutStats());
}
deletedPaths.add(wrapper);
} else {
writer.existing(entry);
}
}
});
} finally {
writer.close();
}
// return the filtered manifest as a reader
ManifestFile filtered = writer.toManifestFile();
// update caches
filteredManifests.put(manifest, filtered);
filteredManifestToDeletedFiles.put(filtered, deletedFiles);
return filtered;
}
private Evaluator extractStrictDeleteExpression(ManifestReader reader) {
Expression strictExpr = Projections
.strict(reader.spec())
.project(deleteExpression);
return new Evaluator(reader.spec().partitionType(), strictExpr);
}
private Evaluator extractInclusiveDeleteExpression(ManifestReader reader) {
Expression inclusiveExpr = Projections
.inclusive(reader.spec())
.project(deleteExpression);
return new Evaluator(reader.spec().partitionType(), inclusiveExpr);
}
@SuppressWarnings("unchecked")
private Iterable mergeGroup(int specId, List group)
throws IOException {
// use a lookback of 1 to avoid reordering the manifests. using 1 also means this should pack
// from the end so that the manifest that gets under-filled is the first one, which will be
// merged the next time.
ListPacker packer = new ListPacker<>(manifestTargetSizeBytes, 1, false);
List> bins = packer.packEnd(group, manifest -> manifest.length());
// process bins in parallel, but put results in the order of the bins into an array to preserve
// the order of manifests and contents. preserving the order helps avoid random deletes when
// data files are eventually aged off.
List[] binResults = (List[])
Array.newInstance(List.class, bins.size());
Tasks.range(bins.size())
.stopOnFailure().throwFailureWhenFinished()
.executeWith(ThreadPools.getWorkerPool())
.run(index -> {
List bin = bins.get(index);
List outputManifests = Lists.newArrayList();
binResults[index] = outputManifests;
if (bin.size() == 1) {
// no need to rewrite
outputManifests.add(bin.get(0));
return;
}
// if the bin has a new manifest (the new data files) then only merge it if the number of
// manifests is above the minimum count. this is applied only to bins with an in-memory
// manifest so that large manifests don't prevent merging older groups.
if (bin.contains(cachedNewManifest) && bin.size() < minManifestsCountToMerge) {
// not enough to merge, add all manifest files to the output list
outputManifests.addAll(bin);
} else {
// merge the group
outputManifests.add(createManifest(specId, bin));
}
}, IOException.class);
return Iterables.concat(binResults);
}
private ManifestFile createManifest(int specId, List bin) throws IOException {
// if this merge was already rewritten, use the existing file.
// if the new files are in this merge, then the ManifestFile for the new files has changed and
// will be a cache miss.
if (mergeManifests.containsKey(bin)) {
return mergeManifests.get(bin);
}
OutputFile out = manifestPath(manifestCount.getAndIncrement());
ManifestWriter writer = new ManifestWriter(ops.current().spec(specId), out, snapshotId());
try {
for (ManifestFile manifest : bin) {
try (ManifestReader reader = ManifestReader.read(
ops.io().newInputFile(manifest.path()), ops.current()::spec)) {
for (ManifestEntry entry : reader.entries()) {
if (entry.status() == Status.DELETED) {
// suppress deletes from previous snapshots. only files deleted by this snapshot
// should be added to the new manifest
if (entry.snapshotId() == snapshotId()) {
writer.addEntry(entry);
}
} else if (entry.status() == Status.ADDED && entry.snapshotId() == snapshotId()) {
// adds from this snapshot are still adds, otherwise they should be existing
writer.addEntry(entry);
} else {
// add all files from the old manifest as existing files
writer.existing(entry);
}
}
}
}
} finally {
writer.close();
}
ManifestFile manifest = writer.toManifestFile();
// update the cache
mergeManifests.put(bin, manifest);
return manifest;
}
private ManifestFile newFilesAsManifest() throws IOException {
if (hasNewFiles && cachedNewManifest != null) {
deleteFile(cachedNewManifest.path());
cachedNewManifest = null;
}
if (cachedNewManifest == null) {
OutputFile out = manifestPath(manifestCount.getAndIncrement());
ManifestWriter writer = new ManifestWriter(spec, out, snapshotId());
try {
writer.addAll(newFiles);
} finally {
writer.close();
}
this.cachedNewManifest = writer.toManifestFile();
this.hasNewFiles = false;
}
return cachedNewManifest;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy