All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.operation.FileStoreExpireImpl Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.operation;

import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.Snapshot;
import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.consumer.ConsumerManager;
import org.dinky.shaded.paimon.manifest.ManifestEntry;
import org.dinky.shaded.paimon.utils.Preconditions;
import org.dinky.shaded.paimon.utils.SnapshotManager;
import org.dinky.shaded.paimon.utils.TagManager;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.OptionalLong;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.function.Predicate;

/**
 * Default implementation of {@link FileStoreExpire}. It retains a certain number or period of
 * latest snapshots.
 *
 * 

NOTE: This implementation will keep at least one snapshot so that users will not accidentally * clear all snapshots. * *

TODO: add concurrent tests. */ public class FileStoreExpireImpl implements FileStoreExpire { private static final Logger LOG = LoggerFactory.getLogger(FileStoreExpireImpl.class); private final int numRetainedMin; // snapshots exceeding any constraint will be expired private final int numRetainedMax; private final long millisRetained; private final SnapshotManager snapshotManager; private final ConsumerManager consumerManager; private final SnapshotDeletion snapshotDeletion; private final TagManager tagManager; private final int expireLimit; private Lock lock; public FileStoreExpireImpl( int numRetainedMin, int numRetainedMax, long millisRetained, SnapshotManager snapshotManager, SnapshotDeletion snapshotDeletion, TagManager tagManager, int expireLimit) { Preconditions.checkArgument( numRetainedMin >= 1, "The minimum number of completed snapshots to retain should be >= 1."); Preconditions.checkArgument( numRetainedMax >= numRetainedMin, "The maximum number of snapshots to retain should be >= the minimum number."); Preconditions.checkArgument( expireLimit > 1, String.format("The %s should be > 1.", CoreOptions.SNAPSHOT_EXPIRE_LIMIT.key())); this.numRetainedMin = numRetainedMin; this.numRetainedMax = numRetainedMax; this.millisRetained = millisRetained; this.snapshotManager = snapshotManager; this.consumerManager = new ConsumerManager(snapshotManager.fileIO(), snapshotManager.tablePath()); this.snapshotDeletion = snapshotDeletion; this.tagManager = tagManager; this.expireLimit = expireLimit; } @Override public FileStoreExpire withLock(Lock lock) { this.lock = lock; return this; } @Override public void expire() { Long latestSnapshotId = snapshotManager.latestSnapshotId(); if (latestSnapshotId == null) { // no snapshot, nothing to expire return; } long currentMillis = System.currentTimeMillis(); Long earliest = snapshotManager.earliestSnapshotId(); if (earliest == null) { return; } // locate the first snapshot between the numRetainedMax th and (numRetainedMin+1) th latest // snapshots to be retained. This snapshot needs to be preserved because it // doesn't fulfill the time threshold condition for expiration. for (long id = Math.max(latestSnapshotId - numRetainedMax + 1, earliest); id <= latestSnapshotId - numRetainedMin; id++) { if (snapshotManager.snapshotExists(id) && currentMillis - snapshotManager.snapshot(id).timeMillis() <= millisRetained) { // within time threshold, can assume that all snapshots after it are also within // the threshold expireUntil(earliest, id); return; } } // by default, expire until there are only numRetainedMin snapshots left expireUntil(earliest, latestSnapshotId - numRetainedMin + 1); } @VisibleForTesting public void expireUntil(long earliestId, long endExclusiveId) { OptionalLong minNextSnapshot = consumerManager.minNextSnapshot(); if (minNextSnapshot.isPresent()) { endExclusiveId = Math.min(minNextSnapshot.getAsLong(), endExclusiveId); } if (endExclusiveId <= earliestId) { // No expire happens: // write the hint file in order to see the earliest snapshot directly next time // should avoid duplicate writes when the file exists if (snapshotManager.readHint(SnapshotManager.EARLIEST) == null) { writeEarliestHint(endExclusiveId); } // fast exit return; } // find first snapshot to expire long beginInclusiveId = earliestId; for (long id = endExclusiveId - 1; id >= earliestId; id--) { if (!snapshotManager.snapshotExists(id)) { // only latest snapshots are retained, as we cannot find this snapshot, we can // assume that all snapshots preceding it have been removed beginInclusiveId = id + 1; break; } } endExclusiveId = Math.min(beginInclusiveId + expireLimit, endExclusiveId); if (LOG.isDebugEnabled()) { LOG.debug( "Snapshot expire range is [" + beginInclusiveId + ", " + endExclusiveId + ")"); } List taggedSnapshots = tagManager.taggedSnapshots(); // delete merge tree files // deleted merge tree files in a snapshot are not used by the next snapshot, so the range of // id should be (beginInclusiveId, endExclusiveId] for (long id = beginInclusiveId + 1; id <= endExclusiveId; id++) { if (LOG.isDebugEnabled()) { LOG.debug("Ready to delete merge tree files not used by snapshot #" + id); } Snapshot snapshot = snapshotManager.snapshot(id); // expire merge tree files and collect changed buckets Predicate skipper; try { skipper = snapshotDeletion.dataFileSkipper(taggedSnapshots, id); } catch (Exception e) { LOG.info( String.format( "Skip cleaning data files of snapshot '%s' due to failed to build skipping set.", id), e); continue; } snapshotDeletion.cleanUnusedDataFiles(snapshot, skipper); } // delete changelog files for (long id = beginInclusiveId; id < endExclusiveId; id++) { if (LOG.isDebugEnabled()) { LOG.debug("Ready to delete changelog files from snapshot #" + id); } Snapshot snapshot = snapshotManager.snapshot(id); if (snapshot.changelogManifestList() != null) { snapshotDeletion.deleteAddedDataFiles(snapshot.changelogManifestList()); } } // data files and changelog files in bucket directories has been deleted // then delete changed bucket directories if they are empty snapshotDeletion.cleanDataDirectories(); // delete manifests and indexFiles List skippingSnapshots = TagManager.findOverlappedSnapshots( taggedSnapshots, beginInclusiveId, endExclusiveId); skippingSnapshots.add(snapshotManager.snapshot(endExclusiveId)); Set skippingSet = snapshotDeletion.manifestSkippingSet(skippingSnapshots); for (long id = beginInclusiveId; id < endExclusiveId; id++) { if (LOG.isDebugEnabled()) { LOG.debug("Ready to delete manifests in snapshot #" + id); } Snapshot snapshot = snapshotManager.snapshot(id); snapshotDeletion.cleanUnusedManifests(snapshot, skippingSet); // delete snapshot last snapshotManager.fileIO().deleteQuietly(snapshotManager.snapshotPath(id)); } writeEarliestHint(endExclusiveId); } private void writeEarliestHint(long earliest) { // update earliest hint file Callable callable = () -> { snapshotManager.commitEarliestHint(earliest); return null; }; try { if (lock != null) { lock.runWithLock(callable); } else { callable.call(); } } catch (Exception e) { throw new RuntimeException(e); } } @VisibleForTesting SnapshotDeletion snapshotDeletion() { return snapshotDeletion; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy