software.amazon.kinesis.lifecycle.ShutdownTask Maven / Gradle / Ivy
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.lifecycle;
import com.google.common.annotations.VisibleForTesting;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.kinesis.model.ChildShard;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.exceptions.internal.BlockedOnParentShardException;
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCleanupManager;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.leases.UpdateField;
import software.amazon.kinesis.leases.exceptions.CustomerApplicationException;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.LeasePendingDeletion;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.lifecycle.events.LeaseLostInput;
import software.amazon.kinesis.lifecycle.events.ShardEndedInput;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import software.amazon.kinesis.processor.ShardRecordProcessor;
import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* Task for invoking the ShardRecordProcessor shutdown() callback.
*/
@RequiredArgsConstructor
@Slf4j
@KinesisClientInternalApi
public class ShutdownTask implements ConsumerTask {
private static final String SHUTDOWN_TASK_OPERATION = "ShutdownTask";
private static final String RECORD_PROCESSOR_SHUTDOWN_METRIC = "RecordProcessor.shutdown";
@VisibleForTesting
static final int RETRY_RANDOM_MAX_RANGE = 30;
@NonNull
private final ShardInfo shardInfo;
@NonNull
private final ShardDetector shardDetector;
@NonNull
private final ShardRecordProcessor shardRecordProcessor;
@NonNull
private final ShardRecordProcessorCheckpointer recordProcessorCheckpointer;
@NonNull
private final ShutdownReason reason;
@NonNull
private final InitialPositionInStreamExtended initialPositionInStream;
private final boolean cleanupLeasesOfCompletedShards;
private final boolean ignoreUnexpectedChildShards;
@NonNull
private final LeaseCoordinator leaseCoordinator;
private final long backoffTimeMillis;
@NonNull
private final RecordsPublisher recordsPublisher;
@NonNull
private final HierarchicalShardSyncer hierarchicalShardSyncer;
@NonNull
private final MetricsFactory metricsFactory;
private final TaskType taskType = TaskType.SHUTDOWN;
private final List childShards;
@NonNull
private final StreamIdentifier streamIdentifier;
@NonNull
private final LeaseCleanupManager leaseCleanupManager;
private static final Function leaseKeyProvider = shardInfo -> ShardInfo.getLeaseKey(shardInfo);
/*
* Invokes ShardRecordProcessor shutdown() API.
* (non-Javadoc)
*
* @see com.amazonaws.services.kinesis.clientlibrary.lib.worker.ConsumerTask#call()
*/
@Override
public TaskResult call() {
recordProcessorCheckpointer.checkpointer().operation(SHUTDOWN_TASK_OPERATION);
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, SHUTDOWN_TASK_OPERATION);
Exception exception;
try {
try {
log.debug("Invoking shutdown() for shard {} with childShards {}, concurrencyToken {}. Shutdown reason: {}",
leaseKeyProvider.apply(shardInfo), childShards, shardInfo.concurrencyToken(), reason);
final long startTime = System.currentTimeMillis();
final Lease currentShardLease = leaseCoordinator.getCurrentlyHeldLease(leaseKeyProvider.apply(shardInfo));
final Runnable leaseLostAction = () -> shardRecordProcessor.leaseLost(LeaseLostInput.builder().build());
if (reason == ShutdownReason.SHARD_END) {
try {
takeShardEndAction(currentShardLease, scope, startTime);
} catch (InvalidStateException e) {
// If InvalidStateException happens, it indicates we have a non recoverable error in short term.
// In this scenario, we should shutdown the shardConsumer with LEASE_LOST reason to allow other worker to take the lease and retry shutting down.
log.warn("Lease {}: Invalid state encountered while shutting down shardConsumer with SHARD_END reason. " +
"Dropping the lease and shutting down shardConsumer using LEASE_LOST reason. ", leaseKeyProvider.apply(shardInfo), e);
dropLease(currentShardLease);
throwOnApplicationException(leaseLostAction, scope, startTime);
}
} else {
throwOnApplicationException(leaseLostAction, scope, startTime);
}
log.debug("Shutting down retrieval strategy for shard {}.", leaseKeyProvider.apply(shardInfo));
recordsPublisher.shutdown();
log.debug("Record processor completed shutdown() for shard {}", leaseKeyProvider.apply(shardInfo));
return new TaskResult(null);
} catch (Exception e) {
if (e instanceof CustomerApplicationException) {
log.error("Shard {}: Application exception. ", leaseKeyProvider.apply(shardInfo), e);
} else {
log.error("Shard {}: Caught exception: ", leaseKeyProvider.apply(shardInfo), e);
}
exception = e;
// backoff if we encounter an exception.
try {
Thread.sleep(this.backoffTimeMillis);
} catch (InterruptedException ie) {
log.debug("Shard {}: Interrupted sleep", leaseKeyProvider.apply(shardInfo), ie);
}
}
} finally {
MetricsUtil.endScope(scope);
}
return new TaskResult(exception);
}
// Involves persisting child shard info, attempt to checkpoint and enqueueing lease for cleanup.
private void takeShardEndAction(Lease currentShardLease,
MetricsScope scope, long startTime)
throws DependencyException, ProvisionedThroughputException, InvalidStateException,
CustomerApplicationException {
// Create new lease for the child shards if they don't exist.
// We have one valid scenario that shutdown task got created with SHARD_END reason and an empty list of childShards.
// This would happen when KinesisDataFetcher(for polling mode) or FanOutRecordsPublisher(for StoS mode) catches ResourceNotFound exception.
// In this case, KinesisDataFetcher and FanOutRecordsPublisher will send out SHARD_END signal to trigger a shutdown task with empty list of childShards.
// This scenario could happen when customer deletes the stream while leaving the KCL application running.
if (currentShardLease == null) {
throw new InvalidStateException(leaseKeyProvider.apply(shardInfo)
+ " : Lease not owned by the current worker. Leaving ShardEnd handling to new owner.");
}
if (!CollectionUtils.isNullOrEmpty(childShards)) {
createLeasesForChildShardsIfNotExist();
updateLeaseWithChildShards(currentShardLease);
}
final LeasePendingDeletion leasePendingDeletion = new LeasePendingDeletion(streamIdentifier, currentShardLease,
shardInfo, shardDetector);
if (!leaseCleanupManager.isEnqueuedForDeletion(leasePendingDeletion)) {
boolean isSuccess = false;
try {
isSuccess = attemptShardEndCheckpointing(scope, startTime);
} finally {
// Check if either the shard end ddb persist is successful or
// if childshards is empty. When child shards is empty then either it is due to
// completed shard being reprocessed or we got RNF from service.
// For these cases enqueue the lease for deletion.
if (isSuccess || CollectionUtils.isNullOrEmpty(childShards)) {
leaseCleanupManager.enqueueForDeletion(leasePendingDeletion);
}
}
}
}
private boolean attemptShardEndCheckpointing(MetricsScope scope, long startTime)
throws DependencyException, ProvisionedThroughputException, InvalidStateException,
CustomerApplicationException {
final Lease leaseFromDdb = Optional.ofNullable(leaseCoordinator.leaseRefresher().getLease(leaseKeyProvider.apply(shardInfo)))
.orElseThrow(() -> new InvalidStateException("Lease for shard " + leaseKeyProvider.apply(shardInfo) + " does not exist."));
if (!leaseFromDdb.checkpoint().equals(ExtendedSequenceNumber.SHARD_END)) {
// Call the shardRecordsProcessor to checkpoint with SHARD_END sequence number.
// The shardEnded is implemented by customer. We should validate if the SHARD_END checkpointing is successful after calling shardEnded.
throwOnApplicationException(() -> applicationCheckpointAndVerification(), scope, startTime);
}
return true;
}
private void applicationCheckpointAndVerification() {
recordProcessorCheckpointer
.sequenceNumberAtShardEnd(recordProcessorCheckpointer.largestPermittedCheckpointValue());
recordProcessorCheckpointer.largestPermittedCheckpointValue(ExtendedSequenceNumber.SHARD_END);
shardRecordProcessor.shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build());
final ExtendedSequenceNumber lastCheckpointValue = recordProcessorCheckpointer.lastCheckpointValue();
if (lastCheckpointValue == null
|| !lastCheckpointValue.equals(ExtendedSequenceNumber.SHARD_END)) {
throw new IllegalArgumentException("Application didn't checkpoint at end of shard "
+ leaseKeyProvider.apply(shardInfo) + ". Application must checkpoint upon shard end. " +
"See ShardRecordProcessor.shardEnded javadocs for more information.");
}
}
private void throwOnApplicationException(Runnable action, MetricsScope metricsScope, final long startTime) throws CustomerApplicationException {
try {
action.run();
} catch (Exception e) {
throw new CustomerApplicationException("Customer application throws exception for shard " + leaseKeyProvider.apply(shardInfo) +": ", e);
} finally {
MetricsUtil.addLatency(metricsScope, RECORD_PROCESSOR_SHUTDOWN_METRIC, startTime, MetricsLevel.SUMMARY);
}
}
private void createLeasesForChildShardsIfNotExist()
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
// For child shard resulted from merge of two parent shards, verify if both the parents are either present or
// not present in the lease table before creating the lease entry.
if (!CollectionUtils.isNullOrEmpty(childShards) && childShards.size() == 1) {
final ChildShard childShard = childShards.get(0);
final List parentLeaseKeys = childShard.parentShards().stream()
.map(parentShardId -> ShardInfo.getLeaseKey(shardInfo, parentShardId)).collect(Collectors.toList());
if (parentLeaseKeys.size() != 2) {
throw new InvalidStateException("Shard " + shardInfo.shardId() + "'s only child shard " + childShard
+ " does not contain other parent information.");
} else {
boolean isValidLeaseTableState =
Objects.isNull(leaseCoordinator.leaseRefresher().getLease(parentLeaseKeys.get(0))) == Objects
.isNull(leaseCoordinator.leaseRefresher().getLease(parentLeaseKeys.get(1)));
if (!isValidLeaseTableState) {
if (!isOneInNProbability(RETRY_RANDOM_MAX_RANGE)) {
throw new BlockedOnParentShardException(
"Shard " + shardInfo.shardId() + "'s only child shard " + childShard
+ " has partial parent information in lease table. Hence deferring lease creation of child shard.");
} else {
throw new InvalidStateException(
"Shard " + shardInfo.shardId() + "'s only child shard " + childShard
+ " has partial parent information in lease table. Hence deferring lease creation of child shard.");
}
}
}
}
// Attempt create leases for child shards.
for(ChildShard childShard : childShards) {
final String leaseKey = ShardInfo.getLeaseKey(shardInfo, childShard.shardId());
if(leaseCoordinator.leaseRefresher().getLease(leaseKey) == null) {
final Lease leaseToCreate = hierarchicalShardSyncer.createLeaseForChildShard(childShard, shardDetector.streamIdentifier());
leaseCoordinator.leaseRefresher().createLeaseIfNotExists(leaseToCreate);
log.info("Shard {}: Created child shard lease: {}", shardInfo.shardId(), leaseToCreate.leaseKey());
}
}
}
/**
* Returns true for 1 in N probability.
*/
@VisibleForTesting
boolean isOneInNProbability(int n) {
Random r = new Random();
return 1 == r.nextInt((n - 1) + 1) + 1;
}
private void updateLeaseWithChildShards(Lease currentLease)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
Set childShardIds = childShards.stream().map(ChildShard::shardId).collect(Collectors.toSet());
final Lease updatedLease = currentLease.copy();
updatedLease.childShardIds(childShardIds);
leaseCoordinator.leaseRefresher().updateLeaseWithMetaInfo(updatedLease, UpdateField.CHILD_SHARDS);
log.info("Shard {}: Updated current lease {} with child shard information: {}", shardInfo.shardId(), currentLease.leaseKey(), childShardIds);
}
/*
* (non-Javadoc)
*
* @see com.amazonaws.services.kinesis.clientlibrary.lib.worker.ConsumerTask#taskType()
*/
@Override
public TaskType taskType() {
return taskType;
}
@VisibleForTesting
public ShutdownReason getReason() {
return reason;
}
private void dropLease(Lease currentLease) {
if (currentLease == null) {
log.warn("Shard {}: Unable to find the lease for shard. Will shutdown the shardConsumer directly.", leaseKeyProvider.apply(shardInfo));
return;
} else {
leaseCoordinator.dropLease(currentLease);
log.info("Dropped lease for shutting down ShardConsumer: " + currentLease.leaseKey());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy