All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.operation.SnapshotDeletion Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.operation;

import org.dinky.shaded.paimon.Snapshot;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.index.IndexFileHandler;
import org.dinky.shaded.paimon.manifest.FileKind;
import org.dinky.shaded.paimon.manifest.ManifestEntry;
import org.dinky.shaded.paimon.manifest.ManifestFile;
import org.dinky.shaded.paimon.manifest.ManifestList;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.Pair;
import org.dinky.shaded.paimon.utils.TagManager;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;

/** Delete snapshot files. */
public class SnapshotDeletion extends FileDeletionBase {

    private static final Logger LOG = LoggerFactory.getLogger(SnapshotDeletion.class);

    /** Used to record which tag is cached in tagged snapshots list. */
    private int cachedTagIndex = -1;

    /** Used to cache data files used by current tag. */
    private final Map>> cachedTagDataFiles = new HashMap<>();

    public SnapshotDeletion(
            FileIO fileIO,
            FileStorePathFactory pathFactory,
            ManifestFile manifestFile,
            ManifestList manifestList,
            IndexFileHandler indexFileHandler) {
        super(fileIO, pathFactory, manifestFile, manifestList, indexFileHandler);
    }

    @Override
    public void cleanUnusedDataFiles(Snapshot snapshot, Predicate skipper) {
        // try read manifests
        List manifestFileNames =
                readManifestFileNames(tryReadManifestList(snapshot.deltaManifestList()));
        List manifestEntries = new ArrayList<>();
        // data file path -> (original manifest entry, extra file paths)
        Map>> dataFileToDelete = new HashMap<>();
        for (String manifest : manifestFileNames) {
            try {
                manifestEntries = manifestFile.read(manifest);
            } catch (Exception e) {
                // cancel deletion if any exception occurs
                LOG.warn("Failed to read some manifest files. Cancel deletion.", e);
                return;
            }

            getDataFileToDelete(dataFileToDelete, manifestEntries);
        }

        doCleanUnusedDataFile(dataFileToDelete, skipper);
    }

    @Override
    public void cleanUnusedManifests(Snapshot snapshot, Set skippingSet) {
        cleanUnusedManifests(snapshot, skippingSet, true);
    }

    private void getDataFileToDelete(
            Map>> dataFileToDelete,
            List dataFileEntries) {
        // we cannot delete a data file directly when we meet a DELETE entry, because that
        // file might be upgraded
        for (ManifestEntry entry : dataFileEntries) {
            Path bucketPath = pathFactory.bucketPath(entry.partition(), entry.bucket());
            Path dataFilePath = new Path(bucketPath, entry.file().fileName());
            switch (entry.kind()) {
                case ADD:
                    dataFileToDelete.remove(dataFilePath);
                    break;
                case DELETE:
                    List extraFiles = new ArrayList<>(entry.file().extraFiles().size());
                    for (String file : entry.file().extraFiles()) {
                        extraFiles.add(new Path(bucketPath, file));
                    }
                    dataFileToDelete.put(dataFilePath, Pair.of(entry, extraFiles));
                    break;
                default:
                    throw new UnsupportedOperationException(
                            "Unknown value kind " + entry.kind().name());
            }
        }
    }

    private void doCleanUnusedDataFile(
            Map>> dataFileToDelete,
            Predicate skipper) {
        List actualDataFileToDelete = new ArrayList<>();
        dataFileToDelete.forEach(
                (path, pair) -> {
                    ManifestEntry entry = pair.getLeft();
                    // check whether we should skip the data file
                    if (!skipper.test(entry)) {
                        // delete data files
                        actualDataFileToDelete.add(path);
                        actualDataFileToDelete.addAll(pair.getRight());

                        recordDeletionBuckets(entry);
                    }
                });
        deleteFiles(actualDataFileToDelete, fileIO::deleteQuietly);
    }

    @VisibleForTesting
    void cleanUnusedDataFile(List dataFileLog) {
        Map>> dataFileToDelete = new HashMap<>();
        getDataFileToDelete(dataFileToDelete, dataFileLog);
        doCleanUnusedDataFile(dataFileToDelete, f -> false);
    }

    /**
     * Delete added file in the manifest list files. Added files marked as "ADD" in manifests.
     *
     * @param manifestListName name of manifest list
     */
    public void deleteAddedDataFiles(String manifestListName) {
        List manifestFileNames =
                readManifestFileNames(tryReadManifestList(manifestListName));
        for (String file : manifestFileNames) {
            try {
                List manifestEntries = manifestFile.read(file);
                deleteAddedDataFiles(manifestEntries);
            } catch (Exception e) {
                // We want to delete the data file, so just ignore the unavailable files
                LOG.info("Failed to read manifest " + file + ". Ignore it.", e);
            }
        }
    }

    private void deleteAddedDataFiles(List manifestEntries) {
        List dataFileToDelete = new ArrayList<>();
        for (ManifestEntry entry : manifestEntries) {
            if (entry.kind() == FileKind.ADD) {
                dataFileToDelete.add(
                        new Path(
                                pathFactory.bucketPath(entry.partition(), entry.bucket()),
                                entry.file().fileName()));
                recordDeletionBuckets(entry);
            }
        }
        deleteFiles(dataFileToDelete, fileIO::deleteQuietly);
    }

    public Predicate dataFileSkipper(
            List taggedSnapshots, long expiringSnapshotId) throws Exception {
        int index = TagManager.findPreviousTag(taggedSnapshots, expiringSnapshotId);
        // refresh tag data files
        if (index >= 0 && cachedTagIndex != index) {
            cachedTagIndex = index;
            cachedTagDataFiles.clear();
            addMergedDataFiles(cachedTagDataFiles, taggedSnapshots.get(index));
        }

        return entry -> index >= 0 && containsDataFile(cachedTagDataFiles, entry);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy