All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.kinesis.leases.impl.LeaseCleanupManager Maven / Gradle / Ivy

Go to download

The Amazon Kinesis Client Library for Java enables Java developers to easily consume and process data from Amazon Kinesis.

There is a newer version: 1.15.2
Show newest version
package com.amazonaws.services.kinesis.leases.impl;

/*
 * Copyright 2020 Amazon.com, Inc. or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShardInfo;
import com.amazonaws.services.kinesis.clientlibrary.proxies.IKinesisProxy;
import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber;
import com.amazonaws.services.kinesis.leases.LeasePendingDeletion;
import com.amazonaws.services.kinesis.leases.exceptions.DependencyException;
import com.amazonaws.services.kinesis.leases.exceptions.InvalidStateException;
import com.amazonaws.services.kinesis.leases.exceptions.ProvisionedThroughputException;
import com.amazonaws.services.kinesis.leases.interfaces.ILeaseManager;
import com.amazonaws.services.kinesis.metrics.interfaces.IMetricsFactory;
import com.amazonaws.services.kinesis.model.ResourceNotFoundException;
import com.amazonaws.services.kinesis.model.ShardIteratorType;
import com.amazonaws.util.CollectionUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import lombok.AccessLevel;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Value;
import lombok.experimental.Accessors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

/**
 * Helper class to cleanup of any expired/closed shard leases. It will cleanup leases periodically as defined by
 * {@link KinesisClientLibConfiguration#leaseCleanupIntervalMillis()} upon worker shutdown, following a re-shard event or
 * a shard expiring from the service.
 */
@RequiredArgsConstructor(access= AccessLevel.PACKAGE)
@EqualsAndHashCode
public class LeaseCleanupManager {
    @NonNull
    private IKinesisProxy kinesisProxy;
    @NonNull
    private final ILeaseManager leaseManager;
    @NonNull
    private final ScheduledExecutorService deletionThreadPool;
    @NonNull
    private final IMetricsFactory metricsFactory;
    private final boolean cleanupLeasesUponShardCompletion;
    private final long leaseCleanupIntervalMillis;
    private final long completedLeaseCleanupIntervalMillis;
    private final long garbageLeaseCleanupIntervalMillis;
    private final int maxRecords;

    private final Stopwatch completedLeaseStopwatch = Stopwatch.createUnstarted();
    private final Stopwatch garbageLeaseStopwatch = Stopwatch.createUnstarted();
    private final Queue deletionQueue = new ConcurrentLinkedQueue<>();

    private static final long INITIAL_DELAY = 0L;
    private static final Log LOG = LogFactory.getLog(LeaseCleanupManager.class);

    @Getter
    private volatile boolean isRunning = false;

    /**
     * Method to return a new instance of {@link LeaseCleanupManager}.
     * @param kinesisProxy
     * @param leaseManager
     * @param deletionThreadPool
     * @param metricsFactory
     * @param cleanupLeasesUponShardCompletion
     * @param leaseCleanupIntervalMillis
     * @param completedLeaseCleanupIntervalMillis
     * @param garbageLeaseCleanupIntervalMillis
     * @param maxRecords
     * @return
     */
    public static LeaseCleanupManager newInstance(IKinesisProxy kinesisProxy, ILeaseManager leaseManager,
                                                  ScheduledExecutorService deletionThreadPool, IMetricsFactory metricsFactory,
                                                  boolean cleanupLeasesUponShardCompletion, long leaseCleanupIntervalMillis,
                                                  long completedLeaseCleanupIntervalMillis, long garbageLeaseCleanupIntervalMillis,
                                                  int maxRecords) {
        return new LeaseCleanupManager(kinesisProxy, leaseManager, deletionThreadPool, metricsFactory, cleanupLeasesUponShardCompletion,
                leaseCleanupIntervalMillis, completedLeaseCleanupIntervalMillis, garbageLeaseCleanupIntervalMillis, maxRecords);
    }

    /**
     * Starts the lease cleanup thread, which is scheduled periodically as specified by
     * {@link LeaseCleanupManager#leaseCleanupIntervalMillis}
     */
    public void start() {
        if (!isRunning) {
            LOG.info("Starting lease cleanup thread.");
            completedLeaseStopwatch.start();
            garbageLeaseStopwatch.start();
            deletionThreadPool.scheduleAtFixedRate(new LeaseCleanupThread(), INITIAL_DELAY, leaseCleanupIntervalMillis,
                                                   TimeUnit.MILLISECONDS);
            isRunning = true;
        } else {
            LOG.info("Lease cleanup thread already running, no need to start.");
        }
    }

    /**
     * Stops the lease cleanup thread, which is scheduled periodically as specified by
     * {@link LeaseCleanupManager#leaseCleanupIntervalMillis}
     */
    public void shutdown() {
        if (isRunning) {
            LOG.info("Stopping the lease cleanup thread.");
            completedLeaseStopwatch.stop();
            garbageLeaseStopwatch.stop();
            deletionThreadPool.shutdown();

            isRunning = false;
        } else {
            LOG.info("Lease cleanup thread already stopped.");
        }
    }

    /**
     * Enqueues a lease for deletion without check for duplicate entry. Use {@link #isEnqueuedForDeletion}
     * for checking the duplicate entries.
     * @param leasePendingDeletion
     */
    public void enqueueForDeletion(LeasePendingDeletion leasePendingDeletion) {
        final KinesisClientLease lease = leasePendingDeletion.lease();
        if (lease == null) {
            LOG.warn("Cannot enqueue lease " + lease.getLeaseKey() + " for deferred deletion - instance doesn't hold " +
                    "the lease for that shard.");
        } else {
            LOG.debug("Enqueuing lease " + lease.getLeaseKey() + " for deferred deletion.");
            if (!deletionQueue.add(leasePendingDeletion)) {
                LOG.warn("Unable to enqueue lease " + lease.getLeaseKey() + " for deletion.");
            }
        }
    }

    /**
     * Check if lease was already enqueued for deletion.
     * //TODO: Optimize verifying duplicate entries https://sim.amazon.com/issues/KinesisLTR-597.
     * @param leasePendingDeletion
     * @return true if enqueued for deletion; false otherwise.
     */
    public boolean isEnqueuedForDeletion(LeasePendingDeletion leasePendingDeletion) {
        return deletionQueue.contains(leasePendingDeletion);
    }

    /**
     * Returns how many leases are currently waiting in the queue pending deletion.
     * @return number of leases pending deletion.
     */
    private int leasesPendingDeletion() {
        return deletionQueue.size();
    }

    private boolean timeToCheckForCompletedShard() {
        return completedLeaseStopwatch.elapsed(TimeUnit.MILLISECONDS) >= completedLeaseCleanupIntervalMillis;
    }

    private boolean timeToCheckForGarbageShard() {
        return garbageLeaseStopwatch.elapsed(TimeUnit.MILLISECONDS) >= garbageLeaseCleanupIntervalMillis;
    }

    public LeaseCleanupResult cleanupLease(LeasePendingDeletion leasePendingDeletion,
                                           boolean timeToCheckForCompletedShard, boolean timeToCheckForGarbageShard)
            throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        final KinesisClientLease lease = leasePendingDeletion.lease();
        final ShardInfo shardInfo = leasePendingDeletion.shardInfo();

        boolean cleanedUpCompletedLease = false;
        boolean cleanedUpGarbageLease = false;
        boolean alreadyCheckedForGarbageCollection = false;
        boolean wereChildShardsPresent = false;
        boolean wasResourceNotFound = false;
        String cleanupFailureReason = "";

        try {
            if (cleanupLeasesUponShardCompletion && timeToCheckForCompletedShard) {
                final KinesisClientLease leaseFromDDB = leaseManager.getLease(shardInfo.getShardId());
                if(leaseFromDDB != null) {
                    Set childShardKeys = leaseFromDDB.getChildShardIds();
                    if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
                        try {
                            // throws ResourceNotFoundException
                            childShardKeys = getChildShardsFromService(shardInfo);
                            if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
                                LOG.error("No child shards returned from service for shard " + shardInfo.getShardId());
                                // If no children shard is found in DDB and from service, then do not delete the lease
                                throw new InvalidStateException("No child shards found for this supposedly " +
                                        "closed shard in both local DDB and in service " + shardInfo.getShardId());
                            } else {
                                wereChildShardsPresent = true;
                                updateLeaseWithChildShards(leasePendingDeletion, childShardKeys);
                            }
                        } finally {
                            // We rely on resource presence in service for garbage collection. Since we already
                            // made a call to getChildShardsFromService we would be coming to know if the resource
                            // is present of not. In latter case, we would throw ResourceNotFoundException, which is
                            // handled in catch block.
                            alreadyCheckedForGarbageCollection = true;
                        }
                    } else {
                        wereChildShardsPresent = true;
                    }
                    try {
                        final CompletedShardResult completedShardResult = cleanupLeaseForCompletedShard(lease, childShardKeys);
                        cleanedUpCompletedLease = completedShardResult.cleanedUp();
                        cleanupFailureReason = completedShardResult.failureMsg();
                    } catch (Exception e) {
                        // Suppressing the exception here, so that we can attempt for garbage cleanup.
                        LOG.warn("Unable to cleanup lease for shard " + shardInfo.getShardId() + " due to " + e.getMessage());
                    }
                } else {
                    LOG.info("Lease not present in lease table while cleaning the shard " + shardInfo.getShardId());
                    cleanedUpCompletedLease = true;
                }
            } else {
                cleanupFailureReason = "Configuration/Interval condition not satisfied to execute lease cleanup this cycle";
            }
            if (!cleanedUpCompletedLease && !alreadyCheckedForGarbageCollection && timeToCheckForGarbageShard) {
                // throws ResourceNotFoundException
                wereChildShardsPresent = !CollectionUtils
                            .isNullOrEmpty(getChildShardsFromService(shardInfo));
            }
        } catch (ResourceNotFoundException e) {
            wasResourceNotFound = true;
            cleanedUpGarbageLease = cleanupLeaseForGarbageShard(lease);
            cleanupFailureReason = cleanedUpGarbageLease ? "" : "DDB Lease Deletion Failed";
        } catch (Exception e) {
            LOG.warn("Unable to cleanup lease for shard " + shardInfo.getShardId() + " : " + e.getMessage());
            cleanupFailureReason = e.getMessage();
        }
        return new LeaseCleanupResult(cleanedUpCompletedLease, cleanedUpGarbageLease, wereChildShardsPresent,
                wasResourceNotFound, cleanupFailureReason);
    }

    private Set getChildShardsFromService(ShardInfo shardInfo) {
        final String iterator = kinesisProxy.getIterator(shardInfo.getShardId(), ShardIteratorType.LATEST.toString());
        return kinesisProxy.get(iterator, maxRecords).getChildShards().stream().map(c -> c.getShardId()).collect(Collectors.toSet());
    }

    // A lease that ended with SHARD_END from ResourceNotFoundException is safe to delete if it no longer exists in the
    // stream (known explicitly from ResourceNotFound being thrown when processing this shard),
    private boolean cleanupLeaseForGarbageShard(KinesisClientLease lease) throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        LOG.info("Deleting lease " + lease.getLeaseKey() + " as it is not present in the stream.");
        try {
            leaseManager.deleteLease(lease);
        } catch (Exception e) {
            LOG.warn("Lease deletion failed for " + lease.getLeaseKey() + " due to " + e.getMessage());
            return false;
        }
        return true;
    }

    private boolean allParentShardLeasesDeleted(KinesisClientLease lease) throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        for (String parentShard : lease.getParentShardIds()) {
            final KinesisClientLease parentLease = leaseManager.getLease(parentShard);

            if (parentLease != null) {
                LOG.warn("Lease " + lease.getLeaseKey() + " has a parent lease " + parentLease.getLeaseKey() +
                        " which is still present in the lease table, skipping deletion for this lease.");
                return false;
            }
        }
        return true;
    }

    // We should only be deleting the current shard's lease if
    // 1. All of its children are currently being processed, i.e their checkpoint is not TRIM_HORIZON or AT_TIMESTAMP.
    // 2. Its parent shard lease(s) have already been deleted.
    private CompletedShardResult cleanupLeaseForCompletedShard(KinesisClientLease lease, Set childShardLeaseKeys)
            throws DependencyException, ProvisionedThroughputException, InvalidStateException, IllegalStateException {

        final Set processedChildShardLeaseKeys = new HashSet<>();

        for (String childShardLeaseKey : childShardLeaseKeys) {
            final KinesisClientLease childShardLease = Optional.ofNullable(
                    leaseManager.getLease(childShardLeaseKey))
                    .orElseThrow(() -> new IllegalStateException(
                            "Child lease " + childShardLeaseKey + " for completed shard not found in "
                                    + "lease table - not cleaning up lease " + lease));

            if (!childShardLease.getCheckpoint().equals(ExtendedSequenceNumber.TRIM_HORIZON) && !childShardLease
                    .getCheckpoint().equals(ExtendedSequenceNumber.AT_TIMESTAMP)) {
                processedChildShardLeaseKeys.add(childShardLease.getLeaseKey());
            }
        }

        boolean parentShardsDeleted = allParentShardLeasesDeleted(lease);
        boolean childrenStartedProcessing = Objects.equals(childShardLeaseKeys, processedChildShardLeaseKeys);

        if (!parentShardsDeleted || !childrenStartedProcessing) {
            return new CompletedShardResult(false, !parentShardsDeleted ? "Parent shard(s) not deleted yet" : "Child shard(s) yet to begin processing");
        }

        LOG.info("Deleting lease " + lease.getLeaseKey() + " as it has been completely processed and processing of child shard(s) has begun.");
        leaseManager.deleteLease(lease);

        return new CompletedShardResult(true, "");
    }

    private void updateLeaseWithChildShards(LeasePendingDeletion leasePendingDeletion, Set childShardKeys)
            throws DependencyException, ProvisionedThroughputException, InvalidStateException {
        final KinesisClientLease updatedLease = leasePendingDeletion.lease();
        updatedLease.setChildShardIds(childShardKeys);

        leaseManager.updateLeaseWithMetaInfo(updatedLease, UpdateField.CHILD_SHARDS);
    }

    @VisibleForTesting
    void cleanupLeases() {
        LOG.info("Number of pending leases to clean before the scan : " + leasesPendingDeletion());
        if (deletionQueue.isEmpty()) {
            LOG.debug("No leases pending deletion.");
        } else if (timeToCheckForCompletedShard() | timeToCheckForGarbageShard()) {
            final Queue failedDeletions = new ConcurrentLinkedQueue<>();
            boolean completedLeaseCleanedUp = false;
            boolean garbageLeaseCleanedUp = false;

            LOG.debug("Attempting to clean up " + deletionQueue.size() + " lease(s).");

            while (!deletionQueue.isEmpty()) {
                final LeasePendingDeletion leasePendingDeletion = deletionQueue.poll();
                final String leaseKey = leasePendingDeletion.lease().getLeaseKey();
                boolean deletionSucceeded = false;
                try {
                    final LeaseCleanupResult leaseCleanupResult = cleanupLease(leasePendingDeletion,
                            timeToCheckForCompletedShard(), timeToCheckForGarbageShard());
                    completedLeaseCleanedUp |= leaseCleanupResult.cleanedUpCompletedLease();
                    garbageLeaseCleanedUp |= leaseCleanupResult.cleanedUpGarbageLease();

                    if (leaseCleanupResult.leaseCleanedUp()) {
                        LOG.debug("Successfully cleaned up lease " + leaseKey);
                        deletionSucceeded = true;
                    } else {
                        LOG.warn("Unable to clean up lease " + leaseKey + " due to " + leaseCleanupResult);
                    }
                } catch (Exception e) {
                    LOG.error("Failed to cleanup lease " + leaseKey + ". Will re-enqueue for deletion and retry on next " +
                            "scheduled execution.", e);
                }
                if (!deletionSucceeded) {
                    LOG.debug("Did not cleanup lease " + leaseKey + ". Re-enqueueing for deletion.");
                    failedDeletions.add(leasePendingDeletion);
                }
            }
            if (completedLeaseCleanedUp) {
                LOG.debug("At least one completed lease was cleaned up - restarting interval");
                completedLeaseStopwatch.reset().start();
            }
            if (garbageLeaseCleanedUp) {
                LOG.debug("At least one garbage lease was cleaned up - restarting interval");
                garbageLeaseStopwatch.reset().start();
            }
            deletionQueue.addAll(failedDeletions);

            LOG.info("Number of pending leases to clean after the scan : " +  leasesPendingDeletion());
        }
    }

    private class LeaseCleanupThread implements Runnable {
        @Override
        public void run() {
            cleanupLeases();
        }
    }

    @Value
    @Accessors(fluent=true)
    public static class LeaseCleanupResult {
        boolean cleanedUpCompletedLease;
        boolean cleanedUpGarbageLease;
        boolean wereChildShardsPresent;
        boolean wasResourceNotFound;
        String cleanupFailureReason;

        public boolean leaseCleanedUp() {
            return cleanedUpCompletedLease | cleanedUpGarbageLease;
        }
    }

    @Value
    @Accessors(fluent = true)
    private static class CompletedShardResult {
        boolean cleanedUp;
        String failureMsg;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy