All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.operation.FileDeletionBase Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.operation;

import org.dinky.shaded.paimon.Snapshot;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.index.IndexFileHandler;
import org.dinky.shaded.paimon.index.IndexFileMeta;
import org.dinky.shaded.paimon.manifest.IndexManifestEntry;
import org.dinky.shaded.paimon.manifest.ManifestEntry;
import org.dinky.shaded.paimon.manifest.ManifestFile;
import org.dinky.shaded.paimon.manifest.ManifestFileMeta;
import org.dinky.shaded.paimon.manifest.ManifestList;
import org.dinky.shaded.paimon.utils.FileStorePathFactory;
import org.dinky.shaded.paimon.utils.FileUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;

/**
 * Base class for file deletion including methods for clean data files, manifest files and empty
 * data directories.
 */
public abstract class FileDeletionBase {

    private static final Logger LOG = LoggerFactory.getLogger(FileDeletionBase.class);

    protected final FileIO fileIO;
    protected final FileStorePathFactory pathFactory;
    protected final ManifestFile manifestFile;
    protected final ManifestList manifestList;
    protected final IndexFileHandler indexFileHandler;
    protected final Map> deletionBuckets;
    protected final Executor ioExecutor;

    public FileDeletionBase(
            FileIO fileIO,
            FileStorePathFactory pathFactory,
            ManifestFile manifestFile,
            ManifestList manifestList,
            IndexFileHandler indexFileHandler) {
        this.fileIO = fileIO;
        this.pathFactory = pathFactory;
        this.manifestFile = manifestFile;
        this.manifestList = manifestList;
        this.indexFileHandler = indexFileHandler;

        this.deletionBuckets = new HashMap<>();
        this.ioExecutor = FileUtils.COMMON_IO_FORK_JOIN_POOL;
    }

    /**
     * Clean data files that will not be used anymore in the snapshot.
     *
     * @param snapshot {@link Snapshot} that will be cleaned
     * @param skipper if the test result of a data file is true, it will be skipped when deleting;
     *     else it will be deleted
     */
    public abstract void cleanUnusedDataFiles(Snapshot snapshot, Predicate skipper);

    /**
     * Clean metadata files that will not be used anymore of a snapshot, including data manifests,
     * index manifests and manifest lists.
     *
     * @param snapshot {@link Snapshot} that will be cleaned
     * @param skippingSet manifests that should not be deleted
     */
    public abstract void cleanUnusedManifests(Snapshot snapshot, Set skippingSet);

    /** Try to delete data directories that may be empty after data file deletion. */
    public void cleanDataDirectories() {
        if (deletionBuckets.isEmpty()) {
            return;
        }

        // All directory paths are deduplicated and sorted by hierarchy level
        Map> deduplicate = new HashMap<>();
        for (Map.Entry> entry : deletionBuckets.entrySet()) {
            List toDeleteEmptyDirectory = new ArrayList<>();
            // try to delete bucket directories
            for (Integer bucket : entry.getValue()) {
                toDeleteEmptyDirectory.add(pathFactory.bucketPath(entry.getKey(), bucket));
            }
            deleteFiles(toDeleteEmptyDirectory, this::tryDeleteEmptyDirectory);

            List hierarchicalPaths = pathFactory.getHierarchicalPartitionPath(entry.getKey());
            int hierarchies = hierarchicalPaths.size();
            if (hierarchies == 0) {
                continue;
            }

            if (tryDeleteEmptyDirectory(hierarchicalPaths.get(hierarchies - 1))) {
                // deduplicate high level partition directories
                for (int hierarchy = 0; hierarchy < hierarchies - 1; hierarchy++) {
                    Path path = hierarchicalPaths.get(hierarchy);
                    deduplicate.computeIfAbsent(hierarchy, i -> new HashSet<>()).add(path);
                }
            }
        }

        // from deepest to shallowest
        for (int hierarchy = deduplicate.size() - 1; hierarchy >= 0; hierarchy--) {
            deduplicate.get(hierarchy).forEach(this::tryDeleteEmptyDirectory);
        }

        deletionBuckets.clear();
    }

    protected void recordDeletionBuckets(ManifestEntry entry) {
        deletionBuckets
                .computeIfAbsent(entry.partition(), p -> new HashSet<>())
                .add(entry.bucket());
    }

    protected void cleanUnusedManifests(
            Snapshot snapshot, Set skippingSet, boolean deleteChangelog) {
        // clean base and delta manifests
        List toDeleteManifests = new ArrayList<>();
        List toExpireManifests = new ArrayList<>();
        toExpireManifests.addAll(tryReadManifestList(snapshot.baseManifestList()));
        toExpireManifests.addAll(tryReadManifestList(snapshot.deltaManifestList()));
        for (ManifestFileMeta manifest : toExpireManifests) {
            String fileName = manifest.fileName();
            if (!skippingSet.contains(fileName)) {
                toDeleteManifests.add(fileName);
                // to avoid other snapshots trying to delete again
                skippingSet.add(fileName);
            }
        }
        deleteFiles(toDeleteManifests, manifestFile::delete);

        toDeleteManifests.clear();
        if (!skippingSet.contains(snapshot.baseManifestList())) {
            toDeleteManifests.add(snapshot.baseManifestList());
        }
        if (!skippingSet.contains(snapshot.deltaManifestList())) {
            toDeleteManifests.add(snapshot.deltaManifestList());
        }
        deleteFiles(toDeleteManifests, manifestList::delete);

        // clean changelog manifests
        if (deleteChangelog && snapshot.changelogManifestList() != null) {
            deleteFiles(
                    tryReadManifestList(snapshot.changelogManifestList()),
                    manifest -> manifestFile.delete(manifest.fileName()));
            manifestList.delete(snapshot.changelogManifestList());
        }

        // clean index manifests
        String indexManifest = snapshot.indexManifest();
        // check exists, it may have been deleted by other snapshots
        if (indexManifest != null && indexFileHandler.existsManifest(indexManifest)) {
            List indexManifestEntries =
                    indexFileHandler.readManifest(indexManifest);
            indexManifestEntries.removeIf(
                    entry -> skippingSet.contains(entry.indexFile().fileName()));
            deleteFiles(indexManifestEntries, indexFileHandler::deleteIndexFile);

            if (!skippingSet.contains(indexManifest)) {
                indexFileHandler.deleteManifest(indexManifest);
            }
        }
    }

    /**
     * It is possible that a job was killed during expiration and some manifest files have been
     * deleted, so if the clean methods need to get manifests of a snapshot to be cleaned, we should
     * try to read manifests and return empty list if failed instead of calling {@link
     * Snapshot#dataManifests} directly.
     */
    protected List tryReadManifestList(String manifestListName) {
        try {
            return manifestList.read(manifestListName);
        } catch (Exception e) {
            LOG.warn("Failed to read manifest list file " + manifestListName, e);
            return Collections.emptyList();
        }
    }

    protected List tryReadDataManifests(Snapshot snapshot) {
        List manifestFileMetas = tryReadManifestList(snapshot.baseManifestList());
        manifestFileMetas.addAll(tryReadManifestList(snapshot.deltaManifestList()));
        return readManifestFileNames(manifestFileMetas);
    }

    protected List readManifestFileNames(List manifestFileMetas) {
        return manifestFileMetas.stream()
                .map(ManifestFileMeta::fileName)
                .collect(Collectors.toCollection(LinkedList::new));
    }

    /**
     * NOTE: This method is used for building data file skipping set. If failed to read some
     * manifests, it will throw exception which callers must handle.
     */
    protected void addMergedDataFiles(
            Map>> dataFiles, Snapshot snapshot)
            throws IOException {
        for (ManifestEntry entry : readMergedDataFiles(snapshot)) {
            dataFiles
                    .computeIfAbsent(entry.partition(), p -> new HashMap<>())
                    .computeIfAbsent(entry.bucket(), b -> new HashSet<>())
                    .add(entry.file().fileName());
        }
    }

    protected Collection readMergedDataFiles(Snapshot snapshot) throws IOException {
        // read data manifests
        List files = tryReadDataManifests(snapshot);

        // read and merge manifest entries
        Map map = new HashMap<>();
        for (String manifest : files) {
            List entries;
            entries = manifestFile.readWithIOException(manifest);
            ManifestEntry.mergeEntries(entries, map);
        }

        return map.values();
    }

    protected boolean containsDataFile(
            Map>> dataFiles, ManifestEntry testee) {
        Map> buckets = dataFiles.get(testee.partition());
        if (buckets != null) {
            Set fileNames = buckets.get(testee.bucket());
            if (fileNames != null) {
                return fileNames.contains(testee.file().fileName());
            }
        }
        return false;
    }

    /** Changelogs were not checked. Let the subclass determine whether to delete them. */
    public Set manifestSkippingSet(Snapshot skippingSnapshot) {
        return manifestSkippingSet(Collections.singletonList(skippingSnapshot));
    }

    public Set manifestSkippingSet(List skippingSnapshots) {
        Set skippingSet = new HashSet<>();

        for (Snapshot skippingSnapshot : skippingSnapshots) {
            // data manifests
            skippingSet.add(skippingSnapshot.baseManifestList());
            skippingSet.add(skippingSnapshot.deltaManifestList());
            skippingSnapshot.dataManifests(manifestList).stream()
                    .map(ManifestFileMeta::fileName)
                    .forEach(skippingSet::add);

            // index manifests
            String indexManifest = skippingSnapshot.indexManifest();
            if (indexManifest != null) {
                skippingSet.add(indexManifest);
                indexFileHandler.readManifest(indexManifest).stream()
                        .map(IndexManifestEntry::indexFile)
                        .map(IndexFileMeta::fileName)
                        .forEach(skippingSet::add);
            }
        }

        return skippingSet;
    }

    private boolean tryDeleteEmptyDirectory(Path path) {
        try {
            fileIO.delete(path, false);
            return true;
        } catch (IOException e) {
            LOG.debug("Failed to delete directory '{}'. Check whether it is empty.", path);
            return false;
        }
    }

    protected  void deleteFiles(Collection files, Consumer deletion) {
        if (files.isEmpty()) {
            return;
        }

        List> deletionFutures = new ArrayList<>(files.size());
        for (T file : files) {
            deletionFutures.add(
                    CompletableFuture.runAsync(() -> deletion.accept(file), ioExecutor));
        }

        try {
            CompletableFuture.allOf(deletionFutures.toArray(new CompletableFuture[0])).get();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy