com.netease.arctic.shade.org.apache.iceberg.MergingSnapshotProducer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.netease.arctic.shade.org.apache.iceberg;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import com.netease.arctic.shade.org.apache.iceberg.events.CreateSnapshotEvent;
import com.netease.arctic.shade.org.apache.iceberg.exceptions.RuntimeIOException;
import com.netease.arctic.shade.org.apache.iceberg.exceptions.ValidationException;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expression;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expressions;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterator;
import com.netease.arctic.shade.org.apache.iceberg.io.InputFile;
import com.netease.arctic.shade.org.apache.iceberg.io.OutputFile;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Predicate;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Iterators;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Maps;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Sets;
import com.netease.arctic.shade.org.apache.iceberg.util.CharSequenceSet;
import com.netease.arctic.shade.org.apache.iceberg.util.Pair;
import com.netease.arctic.shade.org.apache.iceberg.util.SnapshotUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT_DEFAULT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT;
abstract class MergingSnapshotProducer extends SnapshotProducer {
private static final Logger LOG = LoggerFactory.getLogger(MergingSnapshotProducer.class);
// data is only added in "append" and "overwrite" operations
private static final Set VALIDATE_ADDED_FILES_OPERATIONS =
ImmutableSet.of(DataOperations.APPEND, DataOperations.OVERWRITE);
// data files are removed in "overwrite", "replace", and "delete"
private static final Set VALIDATE_DATA_FILES_EXIST_OPERATIONS =
ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.REPLACE, DataOperations.DELETE);
private static final Set VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS =
ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.REPLACE);
// delete files can be added in "overwrite" or "delete" operations
private static final Set VALIDATE_ADDED_DELETE_FILES_OPERATIONS =
ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.DELETE);
private final String tableName;
private final TableOperations ops;
private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder();
private final ManifestMergeManager mergeManager;
private final ManifestFilterManager filterManager;
private final ManifestMergeManager deleteMergeManager;
private final ManifestFilterManager deleteFilterManager;
private final boolean snapshotIdInheritanceEnabled;
// update data
private final List newFiles = Lists.newArrayList();
private Long newFilesSequenceNumber;
private final Map> newDeleteFilesBySpec = Maps.newHashMap();
private final List appendManifests = Lists.newArrayList();
private final List rewrittenAppendManifests = Lists.newArrayList();
private final SnapshotSummary.Builder addedFilesSummary = SnapshotSummary.builder();
private final SnapshotSummary.Builder appendedManifestsSummary = SnapshotSummary.builder();
private Expression deleteExpression = Expressions.alwaysFalse();
private PartitionSpec dataSpec;
// cache new manifests after writing
private ManifestFile cachedNewManifest = null;
private boolean hasNewFiles = false;
// cache new manifests for delete files
private final List cachedNewDeleteManifests = Lists.newLinkedList();
private boolean hasNewDeleteFiles = false;
private boolean caseSensitive = true;
MergingSnapshotProducer(String tableName, TableOperations ops) {
super(ops);
this.tableName = tableName;
this.ops = ops;
this.dataSpec = null;
long targetSizeBytes = ops.current()
.propertyAsLong(MANIFEST_TARGET_SIZE_BYTES, MANIFEST_TARGET_SIZE_BYTES_DEFAULT);
int minCountToMerge = ops.current()
.propertyAsInt(MANIFEST_MIN_MERGE_COUNT, MANIFEST_MIN_MERGE_COUNT_DEFAULT);
boolean mergeEnabled = ops.current()
.propertyAsBoolean(TableProperties.MANIFEST_MERGE_ENABLED, TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT);
this.mergeManager = new DataFileMergeManager(targetSizeBytes, minCountToMerge, mergeEnabled);
this.filterManager = new DataFileFilterManager();
this.deleteMergeManager = new DeleteFileMergeManager(targetSizeBytes, minCountToMerge, mergeEnabled);
this.deleteFilterManager = new DeleteFileFilterManager();
this.snapshotIdInheritanceEnabled = ops.current()
.propertyAsBoolean(SNAPSHOT_ID_INHERITANCE_ENABLED, SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT);
}
@Override
public ThisT set(String property, String value) {
summaryBuilder.set(property, value);
return self();
}
public ThisT caseSensitive(boolean isCaseSensitive) {
this.caseSensitive = isCaseSensitive;
filterManager.caseSensitive(isCaseSensitive);
deleteFilterManager.caseSensitive(isCaseSensitive);
return self();
}
protected boolean isCaseSensitive() {
return caseSensitive;
}
protected PartitionSpec dataSpec() {
Preconditions.checkState(dataSpec != null, "Cannot determine partition spec: no data files have been added");
// the spec is set when the write is started
return dataSpec;
}
protected Expression rowFilter() {
return deleteExpression;
}
protected List addedFiles() {
return ImmutableList.copyOf(newFiles);
}
protected void failAnyDelete() {
filterManager.failAnyDelete();
deleteFilterManager.failAnyDelete();
}
protected void failMissingDeletePaths() {
filterManager.failMissingDeletePaths();
deleteFilterManager.failMissingDeletePaths();
}
/**
* Add a filter to match files to delete. A file will be deleted if all of the rows it contains
* match this or any other filter passed to this method.
*
* @param expr an expression to match rows.
*/
protected void deleteByRowFilter(Expression expr) {
this.deleteExpression = expr;
filterManager.deleteByRowFilter(expr);
// if a delete file matches the row filter, then it can be deleted because the rows will also be deleted
deleteFilterManager.deleteByRowFilter(expr);
}
/**
* Add a partition tuple to drop from the table during the delete phase.
*/
protected void dropPartition(int specId, StructLike partition) {
// dropping the data in a partition also drops all deletes in the partition
filterManager.dropPartition(specId, partition);
deleteFilterManager.dropPartition(specId, partition);
}
/**
* Add a specific data file to be deleted in the new snapshot.
*/
protected void delete(DataFile file) {
filterManager.delete(file);
}
/**
* Add a specific delete file to be deleted in the new snapshot.
*/
protected void delete(DeleteFile file) {
deleteFilterManager.delete(file);
}
/**
* Add a specific data path to be deleted in the new snapshot.
*/
protected void delete(CharSequence path) {
// this is an old call that never worked for delete files and can only be used to remove data files.
filterManager.delete(path);
}
/**
* Add a data file to the new snapshot.
*/
protected void add(DataFile file) {
Preconditions.checkNotNull(file, "Invalid data file: null");
setDataSpec(file);
addedFilesSummary.addedFile(dataSpec(), file);
hasNewFiles = true;
newFiles.add(file);
}
/**
* Add a delete file to the new snapshot.
*/
protected void add(DeleteFile file) {
Preconditions.checkNotNull(file, "Invalid delete file: null");
PartitionSpec fileSpec = ops.current().spec(file.specId());
List deleteFiles = newDeleteFilesBySpec.computeIfAbsent(file.specId(), specId -> Lists.newArrayList());
deleteFiles.add(file);
addedFilesSummary.addedFile(fileSpec, file);
hasNewDeleteFiles = true;
}
private void setDataSpec(DataFile file) {
PartitionSpec fileSpec = ops.current().spec(file.specId());
Preconditions.checkNotNull(fileSpec, "Cannot find partition spec for data file: %s", file.path());
if (dataSpec == null) {
dataSpec = fileSpec;
} else if (dataSpec.specId() != file.specId()) {
throw new ValidationException("Invalid data file, expected spec id: %d", dataSpec.specId());
}
}
/**
* Add all files in a manifest to the new snapshot.
*/
protected void add(ManifestFile manifest) {
Preconditions.checkArgument(manifest.content() == ManifestContent.DATA,
"Cannot append delete manifest: %s", manifest);
if (snapshotIdInheritanceEnabled && manifest.snapshotId() == null) {
appendedManifestsSummary.addedManifest(manifest);
appendManifests.add(manifest);
} else {
// the manifest must be rewritten with this update's snapshot ID
ManifestFile copiedManifest = copyManifest(manifest);
rewrittenAppendManifests.add(copiedManifest);
}
}
private ManifestFile copyManifest(ManifestFile manifest) {
TableMetadata current = ops.current();
InputFile toCopy = ops.io().newInputFile(manifest.path());
OutputFile newManifestPath = newManifestOutput();
return ManifestFiles.copyAppendManifest(
current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), appendedManifestsSummary);
}
/**
* Validates that no files matching a filter have been added to the table since a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param conflictDetectionFilter an expression used to find new conflicting data files
*/
protected void validateAddedDataFiles(TableMetadata base, Long startingSnapshotId,
Expression conflictDetectionFilter) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null) {
return;
}
Pair, Set> history =
validationHistory(base, startingSnapshotId, VALIDATE_ADDED_FILES_OPERATIONS, ManifestContent.DATA);
List manifests = history.first();
Set newSnapshots = history.second();
ManifestGroup conflictGroup = new ManifestGroup(ops.io(), manifests, ImmutableList.of())
.caseSensitive(caseSensitive)
.filterManifestEntries(entry -> newSnapshots.contains(entry.snapshotId()))
.filterData(conflictDetectionFilter)
.specsById(base.specsById())
.ignoreDeleted()
.ignoreExisting();
try (CloseableIterator> conflicts = conflictGroup.entries().iterator()) {
if (conflicts.hasNext()) {
throw new ValidationException("Found conflicting files that can contain records matching %s: %s",
conflictDetectionFilter,
Iterators.toString(Iterators.transform(conflicts, entry -> entry.file().path().toString())));
}
} catch (IOException e) {
throw new UncheckedIOException(
String.format("Failed to validate no appends matching %s", conflictDetectionFilter), e);
}
}
/**
* Validates that no new delete files that must be applied to the given data files have been added to the table since
* a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFiles data files to validate have no new row deletes
*/
protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
Iterable dataFiles) {
validateNoNewDeletesForDataFiles(base, startingSnapshotId, null, dataFiles, newFilesSequenceNumber != null);
}
/**
* Validates that no new delete files that must be applied to the given data files have been added to the table since
* a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFilter a data filter
* @param dataFiles data files to validate have no new row deletes
*/
protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
Expression dataFilter, Iterable dataFiles) {
validateNoNewDeletesForDataFiles(base, startingSnapshotId, dataFilter, dataFiles, false);
}
/**
* Validates that no new delete files that must be applied to the given data files have been added to the table since
* a starting snapshot, with the option to ignore equality deletes during the validation.
*
* For example, in the case of rewriting data files, if the added data files have the same sequence number as the
* replaced data files, equality deletes added at a higher sequence number are still effective against the added
* data files, so there is no risk of commit conflict between RewriteFiles and RowDelta. In cases like this,
* validation against equality delete files can be omitted.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFilter a data filter
* @param dataFiles data files to validate have no new row deletes
* @param ignoreEqualityDeletes whether equality deletes should be ignored in validation
*/
private void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
Expression dataFilter, Iterable dataFiles,
boolean ignoreEqualityDeletes) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null || base.formatVersion() < 2) {
return;
}
Pair, Set> history =
validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES);
List deleteManifests = history.first();
long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter);
for (DataFile dataFile : dataFiles) {
// if any delete is found that applies to files written in or before the starting snapshot, fail
DeleteFile[] deleteFiles = deletes.forDataFile(startingSequenceNumber, dataFile);
if (ignoreEqualityDeletes) {
ValidationException.check(
Arrays.stream(deleteFiles).noneMatch(deleteFile -> deleteFile.content() == FileContent.POSITION_DELETES),
"Cannot commit, found new position delete for replaced data file: %s", dataFile);
} else {
ValidationException.check(deleteFiles.length == 0,
"Cannot commit, found new delete for replaced data file: %s", dataFile);
}
}
}
/**
* Validates that no delete files matching a filter have been added to the table since a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFilter an expression used to find new conflicting delete files
*/
protected void validateNoNewDeleteFiles(TableMetadata base, Long startingSnapshotId, Expression dataFilter) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null || base.formatVersion() < 2) {
return;
}
Pair, Set> history =
validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES);
List deleteManifests = history.first();
long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter);
ValidationException.check(deletes.isEmpty(),
"Found new conflicting delete files that can apply to records matching %s: %s",
dataFilter, Iterables.transform(deletes.referencedDeleteFiles(), ContentFile::path));
}
protected void setNewFilesSequenceNumber(long sequenceNumber) {
this.newFilesSequenceNumber = sequenceNumber;
}
private long startingSequenceNumber(TableMetadata metadata, Long staringSnapshotId) {
if (staringSnapshotId != null && metadata.snapshot(staringSnapshotId) != null) {
Snapshot startingSnapshot = metadata.snapshot(staringSnapshotId);
return startingSnapshot.sequenceNumber();
} else {
return TableMetadata.INITIAL_SEQUENCE_NUMBER;
}
}
private DeleteFileIndex buildDeleteFileIndex(List deleteManifests, long startingSequenceNumber,
Expression dataFilter) {
DeleteFileIndex.Builder builder = DeleteFileIndex.builderFor(ops.io(), deleteManifests)
.afterSequenceNumber(startingSequenceNumber)
.caseSensitive(caseSensitive)
.specsById(ops.current().specsById());
if (dataFilter != null) {
builder.filterData(dataFilter);
}
return builder.build();
}
@SuppressWarnings("CollectionUndefinedEquality")
protected void validateDataFilesExist(TableMetadata base, Long startingSnapshotId,
CharSequenceSet requiredDataFiles, boolean skipDeletes,
Expression conflictDetectionFilter) {
// if there is no current table state, no files have been removed
if (base.currentSnapshot() == null) {
return;
}
Set matchingOperations = skipDeletes ?
VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS :
VALIDATE_DATA_FILES_EXIST_OPERATIONS;
Pair, Set> history =
validationHistory(base, startingSnapshotId, matchingOperations, ManifestContent.DATA);
List manifests = history.first();
Set newSnapshots = history.second();
ManifestGroup matchingDeletesGroup = new ManifestGroup(ops.io(), manifests, ImmutableList.of())
.filterManifestEntries(entry -> entry.status() != ManifestEntry.Status.ADDED &&
newSnapshots.contains(entry.snapshotId()) && requiredDataFiles.contains(entry.file().path()))
.specsById(base.specsById())
.ignoreExisting();
if (conflictDetectionFilter != null) {
matchingDeletesGroup.filterData(conflictDetectionFilter);
}
try (CloseableIterator> deletes = matchingDeletesGroup.entries().iterator()) {
if (deletes.hasNext()) {
throw new ValidationException("Cannot commit, missing data files: %s",
Iterators.toString(Iterators.transform(deletes, entry -> entry.file().path().toString())));
}
} catch (IOException e) {
throw new UncheckedIOException("Failed to validate required files exist", e);
}
}
private Pair, Set> validationHistory(TableMetadata base, Long startingSnapshotId,
Set matchingOperations,
ManifestContent content) {
List manifests = Lists.newArrayList();
Set newSnapshots = Sets.newHashSet();
Snapshot lastSnapshot = null;
Iterable snapshots = SnapshotUtil.ancestorsBetween(
base.currentSnapshot().snapshotId(), startingSnapshotId, base::snapshot);
for (Snapshot currentSnapshot : snapshots) {
lastSnapshot = currentSnapshot;
if (matchingOperations.contains(currentSnapshot.operation())) {
newSnapshots.add(currentSnapshot.snapshotId());
if (content == ManifestContent.DATA) {
for (ManifestFile manifest : currentSnapshot.dataManifests()) {
if (manifest.snapshotId() == currentSnapshot.snapshotId()) {
manifests.add(manifest);
}
}
} else {
for (ManifestFile manifest : currentSnapshot.deleteManifests()) {
if (manifest.snapshotId() == currentSnapshot.snapshotId()) {
manifests.add(manifest);
}
}
}
}
}
ValidationException.check(lastSnapshot == null || Objects.equals(lastSnapshot.parentId(), startingSnapshotId),
"Cannot determine history between starting snapshot %s and the last known ancestor %s",
startingSnapshotId, lastSnapshot != null ? lastSnapshot.snapshotId() : null);
return Pair.of(manifests, newSnapshots);
}
@Override
protected Map summary() {
summaryBuilder.setPartitionSummaryLimit(ops.current().propertyAsInt(
TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, TableProperties.WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT));
return summaryBuilder.build();
}
@Override
public List apply(TableMetadata base) {
Snapshot current = base.currentSnapshot();
// filter any existing manifests
List filtered = filterManager.filterManifests(
base.schema(), current != null ? current.dataManifests() : null);
long minDataSequenceNumber = filtered.stream()
.map(ManifestFile::minSequenceNumber)
.filter(seq -> seq != ManifestWriter.UNASSIGNED_SEQ) // filter out unassigned in rewritten manifests
.reduce(base.lastSequenceNumber(), Math::min);
deleteFilterManager.dropDeleteFilesOlderThan(minDataSequenceNumber);
List filteredDeletes = deleteFilterManager.filterManifests(
base.schema(), current != null ? current.deleteManifests() : null);
// only keep manifests that have live data files or that were written by this commit
Predicate shouldKeep = manifest ->
manifest.hasAddedFiles() || manifest.hasExistingFiles() || manifest.snapshotId() == snapshotId();
Iterable unmergedManifests = Iterables.filter(
Iterables.concat(prepareNewManifests(), filtered), shouldKeep);
Iterable unmergedDeleteManifests = Iterables.filter(
Iterables.concat(prepareDeleteManifests(), filteredDeletes), shouldKeep);
// update the snapshot summary
summaryBuilder.clear();
summaryBuilder.merge(addedFilesSummary);
summaryBuilder.merge(appendedManifestsSummary);
summaryBuilder.merge(filterManager.buildSummary(filtered));
summaryBuilder.merge(deleteFilterManager.buildSummary(filteredDeletes));
List manifests = Lists.newArrayList();
Iterables.addAll(manifests, mergeManager.mergeManifests(unmergedManifests));
Iterables.addAll(manifests, deleteMergeManager.mergeManifests(unmergedDeleteManifests));
return manifests;
}
@Override
public Object updateEvent() {
long snapshotId = snapshotId();
Snapshot justSaved = ops.refresh().snapshot(snapshotId);
long sequenceNumber = TableMetadata.INVALID_SEQUENCE_NUMBER;
Map summary;
if (justSaved == null) {
// The snapshot just saved may not be present if the latest metadata couldn't be loaded due to eventual
// consistency problems in refresh.
LOG.warn("Failed to load committed snapshot: omitting sequence number from notifications");
summary = summary();
} else {
sequenceNumber = justSaved.sequenceNumber();
summary = justSaved.summary();
}
return new CreateSnapshotEvent(
tableName,
operation(),
snapshotId,
sequenceNumber,
summary);
}
private void cleanUncommittedAppends(Set committed) {
if (cachedNewManifest != null && !committed.contains(cachedNewManifest)) {
deleteFile(cachedNewManifest.path());
this.cachedNewManifest = null;
}
ListIterator deleteManifestsIterator = cachedNewDeleteManifests.listIterator();
while (deleteManifestsIterator.hasNext()) {
ManifestFile deleteManifest = deleteManifestsIterator.next();
if (!committed.contains(deleteManifest)) {
deleteFile(deleteManifest.path());
deleteManifestsIterator.remove();
}
}
// rewritten manifests are always owned by the table
for (ManifestFile manifest : rewrittenAppendManifests) {
if (!committed.contains(manifest)) {
deleteFile(manifest.path());
}
}
// manifests that are not rewritten are only owned by the table if the commit succeeded
if (!committed.isEmpty()) {
// the commit succeeded if at least one manifest was committed
// the table now owns appendManifests; clean up any that are not used
for (ManifestFile manifest : appendManifests) {
if (!committed.contains(manifest)) {
deleteFile(manifest.path());
}
}
}
}
@Override
protected void cleanUncommitted(Set committed) {
mergeManager.cleanUncommitted(committed);
filterManager.cleanUncommitted(committed);
deleteMergeManager.cleanUncommitted(committed);
deleteFilterManager.cleanUncommitted(committed);
cleanUncommittedAppends(committed);
}
private Iterable prepareNewManifests() {
Iterable newManifests;
if (newFiles.size() > 0) {
ManifestFile newManifest = newFilesAsManifest();
newManifests = Iterables.concat(ImmutableList.of(newManifest), appendManifests, rewrittenAppendManifests);
} else {
newManifests = Iterables.concat(appendManifests, rewrittenAppendManifests);
}
return Iterables.transform(
newManifests,
manifest -> GenericManifestFile.copyOf(manifest).withSnapshotId(snapshotId()).build());
}
private ManifestFile newFilesAsManifest() {
if (hasNewFiles && cachedNewManifest != null) {
deleteFile(cachedNewManifest.path());
cachedNewManifest = null;
}
if (cachedNewManifest == null) {
try {
ManifestWriter writer = newManifestWriter(dataSpec());
try {
if (newFilesSequenceNumber == null) {
writer.addAll(newFiles);
} else {
newFiles.forEach(f -> writer.add(f, newFilesSequenceNumber));
}
} finally {
writer.close();
}
this.cachedNewManifest = writer.toManifestFile();
this.hasNewFiles = false;
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to close manifest writer");
}
}
return cachedNewManifest;
}
private Iterable prepareDeleteManifests() {
if (newDeleteFilesBySpec.isEmpty()) {
return ImmutableList.of();
}
return newDeleteFilesAsManifests();
}
private List newDeleteFilesAsManifests() {
if (hasNewDeleteFiles && cachedNewDeleteManifests.size() > 0) {
for (ManifestFile cachedNewDeleteManifest : cachedNewDeleteManifests) {
deleteFile(cachedNewDeleteManifest.path());
}
// this triggers a rewrite of all delete manifests even if there is only one new delete file
// if there is a relevant use case in the future, the behavior can be optimized
cachedNewDeleteManifests.clear();
}
if (cachedNewDeleteManifests.isEmpty()) {
newDeleteFilesBySpec.forEach((specId, deleteFiles) -> {
PartitionSpec spec = ops.current().spec(specId);
try {
ManifestWriter writer = newDeleteManifestWriter(spec);
try {
writer.addAll(deleteFiles);
} finally {
writer.close();
}
cachedNewDeleteManifests.add(writer.toManifestFile());
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to close manifest writer");
}
});
this.hasNewDeleteFiles = false;
}
return cachedNewDeleteManifests;
}
private class DataFileFilterManager extends ManifestFilterManager {
private DataFileFilterManager() {
super(ops.current().specsById());
}
@Override
protected void deleteFile(String location) {
MergingSnapshotProducer.this.deleteFile(location);
}
@Override
protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) {
return MergingSnapshotProducer.this.newManifestWriter(manifestSpec);
}
@Override
protected ManifestReader newManifestReader(ManifestFile manifest) {
return MergingSnapshotProducer.this.newManifestReader(manifest);
}
}
private class DataFileMergeManager extends ManifestMergeManager {
DataFileMergeManager(long targetSizeBytes, int minCountToMerge, boolean mergeEnabled) {
super(targetSizeBytes, minCountToMerge, mergeEnabled);
}
@Override
protected long snapshotId() {
return MergingSnapshotProducer.this.snapshotId();
}
@Override
protected PartitionSpec spec(int specId) {
return ops.current().spec(specId);
}
@Override
protected void deleteFile(String location) {
MergingSnapshotProducer.this.deleteFile(location);
}
@Override
protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) {
return MergingSnapshotProducer.this.newManifestWriter(manifestSpec);
}
@Override
protected ManifestReader newManifestReader(ManifestFile manifest) {
return MergingSnapshotProducer.this.newManifestReader(manifest);
}
}
private class DeleteFileFilterManager extends ManifestFilterManager {
private DeleteFileFilterManager() {
super(ops.current().specsById());
}
@Override
protected void deleteFile(String location) {
MergingSnapshotProducer.this.deleteFile(location);
}
@Override
protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) {
return MergingSnapshotProducer.this.newDeleteManifestWriter(manifestSpec);
}
@Override
protected ManifestReader newManifestReader(ManifestFile manifest) {
return MergingSnapshotProducer.this.newDeleteManifestReader(manifest);
}
}
private class DeleteFileMergeManager extends ManifestMergeManager {
DeleteFileMergeManager(long targetSizeBytes, int minCountToMerge, boolean mergeEnabled) {
super(targetSizeBytes, minCountToMerge, mergeEnabled);
}
@Override
protected long snapshotId() {
return MergingSnapshotProducer.this.snapshotId();
}
@Override
protected PartitionSpec spec(int specId) {
return ops.current().spec(specId);
}
@Override
protected void deleteFile(String location) {
MergingSnapshotProducer.this.deleteFile(location);
}
@Override
protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) {
return MergingSnapshotProducer.this.newDeleteManifestWriter(manifestSpec);
}
@Override
protected ManifestReader newManifestReader(ManifestFile manifest) {
return MergingSnapshotProducer.this.newDeleteManifestReader(manifest);
}
}
}