
org.dinky.shaded.paimon.operation.SnapshotDeletion Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.operation;
import org.dinky.shaded.paimon.Snapshot;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.index.IndexFileHandler;
import org.dinky.shaded.paimon.manifest.FileKind;
import org.dinky.shaded.paimon.manifest.ManifestEntry;
import org.dinky.shaded.paimon.manifest.ManifestFile;
import org.dinky.shaded.paimon.manifest.ManifestList;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.Pair;
import org.dinky.shaded.paimon.utils.TagManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
/** Delete snapshot files. */
public class SnapshotDeletion extends FileDeletionBase {
private static final Logger LOG = LoggerFactory.getLogger(SnapshotDeletion.class);
/** Used to record which tag is cached in tagged snapshots list. */
private int cachedTagIndex = -1;
/** Used to cache data files used by current tag. */
private final Map>> cachedTagDataFiles = new HashMap<>();
public SnapshotDeletion(
FileIO fileIO,
FileStorePathFactory pathFactory,
ManifestFile manifestFile,
ManifestList manifestList,
IndexFileHandler indexFileHandler) {
super(fileIO, pathFactory, manifestFile, manifestList, indexFileHandler);
}
@Override
public void cleanUnusedDataFiles(Snapshot snapshot, Predicate skipper) {
// try read manifests
List manifestFileNames =
readManifestFileNames(tryReadManifestList(snapshot.deltaManifestList()));
List manifestEntries = new ArrayList<>();
// data file path -> (original manifest entry, extra file paths)
Map>> dataFileToDelete = new HashMap<>();
for (String manifest : manifestFileNames) {
try {
manifestEntries = manifestFile.read(manifest);
} catch (Exception e) {
// cancel deletion if any exception occurs
LOG.warn("Failed to read some manifest files. Cancel deletion.", e);
return;
}
getDataFileToDelete(dataFileToDelete, manifestEntries);
}
doCleanUnusedDataFile(dataFileToDelete, skipper);
}
@Override
public void cleanUnusedManifests(Snapshot snapshot, Set skippingSet) {
cleanUnusedManifests(snapshot, skippingSet, true);
}
private void getDataFileToDelete(
Map>> dataFileToDelete,
List dataFileEntries) {
// we cannot delete a data file directly when we meet a DELETE entry, because that
// file might be upgraded
for (ManifestEntry entry : dataFileEntries) {
Path bucketPath = pathFactory.bucketPath(entry.partition(), entry.bucket());
Path dataFilePath = new Path(bucketPath, entry.file().fileName());
switch (entry.kind()) {
case ADD:
dataFileToDelete.remove(dataFilePath);
break;
case DELETE:
List extraFiles = new ArrayList<>(entry.file().extraFiles().size());
for (String file : entry.file().extraFiles()) {
extraFiles.add(new Path(bucketPath, file));
}
dataFileToDelete.put(dataFilePath, Pair.of(entry, extraFiles));
break;
default:
throw new UnsupportedOperationException(
"Unknown value kind " + entry.kind().name());
}
}
}
private void doCleanUnusedDataFile(
Map>> dataFileToDelete,
Predicate skipper) {
List actualDataFileToDelete = new ArrayList<>();
dataFileToDelete.forEach(
(path, pair) -> {
ManifestEntry entry = pair.getLeft();
// check whether we should skip the data file
if (!skipper.test(entry)) {
// delete data files
actualDataFileToDelete.add(path);
actualDataFileToDelete.addAll(pair.getRight());
recordDeletionBuckets(entry);
}
});
deleteFiles(actualDataFileToDelete, fileIO::deleteQuietly);
}
@VisibleForTesting
void cleanUnusedDataFile(List dataFileLog) {
Map>> dataFileToDelete = new HashMap<>();
getDataFileToDelete(dataFileToDelete, dataFileLog);
doCleanUnusedDataFile(dataFileToDelete, f -> false);
}
/**
* Delete added file in the manifest list files. Added files marked as "ADD" in manifests.
*
* @param manifestListName name of manifest list
*/
public void deleteAddedDataFiles(String manifestListName) {
List manifestFileNames =
readManifestFileNames(tryReadManifestList(manifestListName));
for (String file : manifestFileNames) {
try {
List manifestEntries = manifestFile.read(file);
deleteAddedDataFiles(manifestEntries);
} catch (Exception e) {
// We want to delete the data file, so just ignore the unavailable files
LOG.info("Failed to read manifest " + file + ". Ignore it.", e);
}
}
}
private void deleteAddedDataFiles(List manifestEntries) {
List dataFileToDelete = new ArrayList<>();
for (ManifestEntry entry : manifestEntries) {
if (entry.kind() == FileKind.ADD) {
dataFileToDelete.add(
new Path(
pathFactory.bucketPath(entry.partition(), entry.bucket()),
entry.file().fileName()));
recordDeletionBuckets(entry);
}
}
deleteFiles(dataFileToDelete, fileIO::deleteQuietly);
}
public Predicate dataFileSkipper(
List taggedSnapshots, long expiringSnapshotId) throws Exception {
int index = TagManager.findPreviousTag(taggedSnapshots, expiringSnapshotId);
// refresh tag data files
if (index >= 0 && cachedTagIndex != index) {
cachedTagIndex = index;
cachedTagDataFiles.clear();
addMergedDataFiles(cachedTagDataFiles, taggedSnapshots.get(index));
}
return entry -> index >= 0 && containsDataFile(cachedTagDataFiles, entry);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy