All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.kinesis.leases.LeaseCleanupManager Maven / Gradle / Ivy

/*
 * Copyright 2020 Amazon.com, Inc. or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package software.amazon.kinesis.leases;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Value;
import lombok.experimental.Accessors;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.LeasePendingDeletion;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.retrieval.AWSExceptionManager;
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;

import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;

/**
 * Helper class to cleanup of any expired/closed shard leases. It will cleanup leases periodically as defined by
 * {@link LeaseManagementConfig#leaseCleanupConfig()} asynchronously.
 */
@Accessors(fluent=true)
@Slf4j
@RequiredArgsConstructor
@EqualsAndHashCode
public class LeaseCleanupManager {
    @NonNull
    private final LeaseCoordinator leaseCoordinator;
    @NonNull
    private final MetricsFactory metricsFactory;
    @NonNull
    private final ScheduledExecutorService deletionThreadPool;
    private final boolean cleanupLeasesUponShardCompletion;
    private final long leaseCleanupIntervalMillis;
    private final long completedLeaseCleanupIntervalMillis;
    private final long garbageLeaseCleanupIntervalMillis;
    private final Stopwatch completedLeaseStopwatch = Stopwatch.createUnstarted();
    private final Stopwatch garbageLeaseStopwatch = Stopwatch.createUnstarted();

    private final Queue deletionQueue = new ConcurrentLinkedQueue<>();

    private static final long INITIAL_DELAY = 0L;

    @Getter
    private volatile boolean isRunning = false;

    /**
     * Starts the lease cleanup thread, which is scheduled periodically as specified by
     * {@link LeaseCleanupManager#leaseCleanupIntervalMillis}
     */
    public void start() {
        if (!isRunning) {
            log.info("Starting lease cleanup thread.");
            completedLeaseStopwatch.reset().start();
            garbageLeaseStopwatch.reset().start();
            deletionThreadPool.scheduleAtFixedRate(new LeaseCleanupThread(), INITIAL_DELAY, leaseCleanupIntervalMillis,
                    TimeUnit.MILLISECONDS);
            isRunning = true;
        } else {
            log.info("Lease cleanup thread already running, no need to start.");
        }
    }

    /**
     * Enqueues a lease for deletion without check for duplicate entry. Use {@link #isEnqueuedForDeletion}
     * for checking the duplicate entries.
     * @param leasePendingDeletion
     */
    public void enqueueForDeletion(LeasePendingDeletion leasePendingDeletion) {
        final Lease lease = leasePendingDeletion.lease();
        if (lease == null) {
            log.warn("Cannot enqueue lease {} for deferred deletion - instance doesn't hold the lease for that shard.",
                    lease.leaseKey());
        } else {
            log.debug("Enqueuing lease {} for deferred deletion.", lease.leaseKey());
            if (!deletionQueue.add(leasePendingDeletion)) {
                log.warn("Unable to enqueue lease {} for deletion.", lease.leaseKey());
            }
        }
    }

    /**
     * Check if lease was already enqueued for deletion.
     * //TODO: Optimize verifying duplicate entries https://sim.amazon.com/issues/KinesisLTR-597.
     * @param leasePendingDeletion
     * @return true if enqueued for deletion; false otherwise.
     */
    public boolean isEnqueuedForDeletion(LeasePendingDeletion leasePendingDeletion) {
        return deletionQueue.contains(leasePendingDeletion);
    }

    /**
     * Returns how many leases are currently waiting in the queue pending deletion.
     * @return number of leases pending deletion.
     */
    private int leasesPendingDeletion() {
        return deletionQueue.size();
    }

    private boolean timeToCheckForCompletedShard() {
        return completedLeaseStopwatch.elapsed(TimeUnit.MILLISECONDS) >= completedLeaseCleanupIntervalMillis;
    }

    private boolean timeToCheckForGarbageShard() {
        return garbageLeaseStopwatch.elapsed(TimeUnit.MILLISECONDS) >= garbageLeaseCleanupIntervalMillis;
    }

    public LeaseCleanupResult cleanupLease(LeasePendingDeletion leasePendingDeletion,
            boolean timeToCheckForCompletedShard, boolean timeToCheckForGarbageShard) throws TimeoutException,
            InterruptedException, DependencyException, ProvisionedThroughputException, InvalidStateException {
        final Lease lease = leasePendingDeletion.lease();
        final ShardInfo shardInfo = leasePendingDeletion.shardInfo();
        final StreamIdentifier streamIdentifier = leasePendingDeletion.streamIdentifier();

        final AWSExceptionManager exceptionManager = createExceptionManager();

        boolean cleanedUpCompletedLease = false;
        boolean cleanedUpGarbageLease = false;
        boolean alreadyCheckedForGarbageCollection = false;
        boolean wereChildShardsPresent = false;
        boolean wasResourceNotFound = false;

        try {
            if (cleanupLeasesUponShardCompletion && timeToCheckForCompletedShard) {
                final Lease leaseFromDDB = leaseCoordinator.leaseRefresher().getLease(lease.leaseKey());
                if(leaseFromDDB != null) {
                    Set childShardKeys = leaseFromDDB.childShardIds();
                    if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
                        try {
                            childShardKeys = leasePendingDeletion.getChildShardsFromService();

                            if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
                                log.error(
                                        "No child shards returned from service for shard {} for {} while cleaning up lease.",
                                        shardInfo.shardId(), streamIdentifier.streamName());
                            } else {
                                wereChildShardsPresent = true;
                                updateLeaseWithChildShards(leasePendingDeletion, childShardKeys);
                            }
                        } catch (ExecutionException e) {
                            throw exceptionManager.apply(e.getCause());
                        } finally {
                            alreadyCheckedForGarbageCollection = true;
                        }
                    } else {
                        wereChildShardsPresent = true;
                    }
                    try {
                        cleanedUpCompletedLease = cleanupLeaseForCompletedShard(lease, shardInfo, childShardKeys);
                    } catch (Exception e) {
                        // Suppressing the exception here, so that we can attempt for garbage cleanup.
                        log.warn("Unable to cleanup lease for shard {} in {}", shardInfo.shardId(), streamIdentifier.streamName(), e);
                    }
                } else {
                    log.info("Lease not present in lease table while cleaning the shard {} of {}", shardInfo.shardId(), streamIdentifier.streamName());
                    cleanedUpCompletedLease = true;
                }
            }

            if (!alreadyCheckedForGarbageCollection && timeToCheckForGarbageShard) {
                try {
                    wereChildShardsPresent = !CollectionUtils
                            .isNullOrEmpty(leasePendingDeletion.getChildShardsFromService());
                } catch (ExecutionException e) {
                    throw exceptionManager.apply(e.getCause());
                }
            }
        } catch (ResourceNotFoundException e) {
            wasResourceNotFound = true;
            cleanedUpGarbageLease = cleanupLeaseForGarbageShard(lease, e);
        }

        return new LeaseCleanupResult(cleanedUpCompletedLease, cleanedUpGarbageLease, wereChildShardsPresent,
                wasResourceNotFound);
    }

    // A lease that ended with SHARD_END from ResourceNotFoundException is safe to delete if it no longer exists in the
    // stream (known explicitly from ResourceNotFound being thrown when processing this shard),
    private boolean cleanupLeaseForGarbageShard(Lease lease, Throwable e) throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        log.warn("Deleting lease {} as it is not present in the stream.", lease, e);
        leaseCoordinator.leaseRefresher().deleteLease(lease);
        return true;
    }

    private boolean allParentShardLeasesDeleted(Lease lease, ShardInfo shardInfo) throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        for (String parentShard : lease.parentShardIds()) {
            final Lease parentLease = leaseCoordinator.leaseRefresher().getLease(ShardInfo.getLeaseKey(shardInfo, parentShard));

            if (parentLease != null) {
                log.warn("Lease {} has a parent lease {} which is still present in the lease table, skipping deletion " +
                        "for this lease.", lease, parentLease);
                return false;
            }
        }
        return true;
    }

    // We should only be deleting the current shard's lease if
    // 1. All of its children are currently being processed, i.e their checkpoint is not TRIM_HORIZON or AT_TIMESTAMP.
    // 2. Its parent shard lease(s) have already been deleted.
    private boolean cleanupLeaseForCompletedShard(Lease lease, ShardInfo shardInfo, Set childShardKeys)
            throws DependencyException, ProvisionedThroughputException, InvalidStateException, IllegalStateException {
        final Set processedChildShardLeaseKeys = new HashSet<>();
        final Set childShardLeaseKeys = childShardKeys.stream().map(ck -> ShardInfo.getLeaseKey(shardInfo, ck))
                .collect(Collectors.toSet());

        for (String childShardLeaseKey : childShardLeaseKeys) {
            final Lease childShardLease = Optional.ofNullable(
                    leaseCoordinator.leaseRefresher().getLease(childShardLeaseKey))
                    .orElseThrow(() -> new IllegalStateException(
                            "Child lease " + childShardLeaseKey + " for completed shard not found in "
                                    + "lease table - not cleaning up lease " + lease));

            if (!childShardLease.checkpoint().equals(ExtendedSequenceNumber.TRIM_HORIZON) && !childShardLease
                    .checkpoint().equals(ExtendedSequenceNumber.AT_TIMESTAMP)) {
                processedChildShardLeaseKeys.add(childShardLease.leaseKey());
            }
        }

        if (!allParentShardLeasesDeleted(lease, shardInfo) || !Objects.equals(childShardLeaseKeys, processedChildShardLeaseKeys)) {
            return false;
        }

        log.info("Deleting lease {} as it has been completely processed and processing of child shard(s) has begun.",
                lease);
        leaseCoordinator.leaseRefresher().deleteLease(lease);

        return true;
    }

    private void updateLeaseWithChildShards(LeasePendingDeletion leasePendingDeletion, Set childShardKeys)
            throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        final Lease updatedLease = leasePendingDeletion.lease();
        updatedLease.childShardIds(childShardKeys);

        leaseCoordinator.leaseRefresher().updateLeaseWithMetaInfo(updatedLease, UpdateField.CHILD_SHARDS);
    }

    private AWSExceptionManager createExceptionManager() {
        final AWSExceptionManager exceptionManager = new AWSExceptionManager();
        exceptionManager.add(ResourceNotFoundException.class, t -> t);

        return exceptionManager;
    }

    @VisibleForTesting
    void cleanupLeases() {
        log.info("Number of pending leases to clean before the scan : {}", leasesPendingDeletion());
        if (deletionQueue.isEmpty()) {
            log.debug("No leases pending deletion.");
        } else if (timeToCheckForCompletedShard() | timeToCheckForGarbageShard()) {
            final Queue failedDeletions = new ConcurrentLinkedQueue<>();
            boolean completedLeaseCleanedUp = false;
            boolean garbageLeaseCleanedUp = false;

            log.debug("Attempting to clean up {} lease(s).", deletionQueue.size());

            while (!deletionQueue.isEmpty()) {
                final LeasePendingDeletion leasePendingDeletion = deletionQueue.poll();
                final String leaseKey = leasePendingDeletion.lease().leaseKey();
                final StreamIdentifier streamIdentifier = leasePendingDeletion.streamIdentifier();
                boolean deletionSucceeded = false;
                try {
                    final LeaseCleanupResult leaseCleanupResult = cleanupLease(leasePendingDeletion,
                            timeToCheckForCompletedShard(), timeToCheckForGarbageShard());
                    completedLeaseCleanedUp |= leaseCleanupResult.cleanedUpCompletedLease();
                    garbageLeaseCleanedUp |= leaseCleanupResult.cleanedUpGarbageLease();

                    if (leaseCleanupResult.leaseCleanedUp()) {
                        log.info("Successfully cleaned up lease {} for {} due to {}", leaseKey, streamIdentifier, leaseCleanupResult);
                        deletionSucceeded = true;
                    } else {
                        log.warn("Unable to clean up lease {} for {} due to {}", leaseKey, streamIdentifier, leaseCleanupResult);
                    }
                } catch (Exception e) {
                    log.error("Failed to cleanup lease {} for {}. Will re-enqueue for deletion and retry on next " +
                            "scheduled execution.", leaseKey, streamIdentifier, e);
                }
                if (!deletionSucceeded) {
                    log.debug("Did not cleanup lease {} for {}. Re-enqueueing for deletion.", leaseKey, streamIdentifier);
                    failedDeletions.add(leasePendingDeletion);
                }
            }
            if (completedLeaseCleanedUp) {
                log.debug("At least one completed lease was cleaned up - restarting interval");
                completedLeaseStopwatch.reset().start();
            }
            if (garbageLeaseCleanedUp) {
                log.debug("At least one garbage lease was cleaned up - restarting interval");
                garbageLeaseStopwatch.reset().start();
            }
            deletionQueue.addAll(failedDeletions);

            log.info("Number of pending leases to clean after the scan : {}", leasesPendingDeletion());
        }
    }

    private class LeaseCleanupThread implements Runnable {
        @Override
        public void run() {
            cleanupLeases();
        }
    }

    @Value
    public static class LeaseCleanupResult {
        boolean cleanedUpCompletedLease;
        boolean cleanedUpGarbageLease;
        boolean wereChildShardsPresent;
        boolean wasResourceNotFound;

        public boolean leaseCleanedUp() {
            return cleanedUpCompletedLease | cleanedUpGarbageLease;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy