software.amazon.kinesis.coordinator.PeriodicShardSyncManager Maven / Gradle / Ivy
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ComparisonChain;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NonNull;
import lombok.Value;
import lombok.experimental.Accessors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.Validate;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.awssdk.services.kinesis.model.Shard;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.common.HashKeyRangeForLease;
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.MultiStreamLease;
import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardSyncTaskManager;
import software.amazon.kinesis.leases.UpdateField;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.lifecycle.TaskResult;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import static software.amazon.kinesis.common.HashKeyRangeForLease.fromHashKeyRange;
/**
* The top level orchestrator for coordinating the periodic shard sync related
* activities.
*/
@Getter
@EqualsAndHashCode
@Slf4j
class PeriodicShardSyncManager {
private static final long INITIAL_DELAY = 60 * 1000L;
@VisibleForTesting
static final BigInteger MIN_HASH_KEY = BigInteger.ZERO;
@VisibleForTesting
static final BigInteger MAX_HASH_KEY = new BigInteger("2").pow(128).subtract(BigInteger.ONE);
static final String PERIODIC_SHARD_SYNC_MANAGER = "PeriodicShardSyncManager";
private Map hashRangeHoleTrackerMap = new HashMap<>();
private final String workerId;
private final LeaderDecider leaderDecider;
private final LeaseRefresher leaseRefresher;
private final Map currentStreamConfigMap;
private final Function shardSyncTaskManagerProvider;
private final ScheduledExecutorService shardSyncThreadPool;
private final boolean isMultiStreamingMode;
private final MetricsFactory metricsFactory;
private final long leasesRecoveryAuditorExecutionFrequencyMillis;
private final int leasesRecoveryAuditorInconsistencyConfidenceThreshold;
private boolean isRunning;
PeriodicShardSyncManager(String workerId, LeaderDecider leaderDecider, LeaseRefresher leaseRefresher,
Map currentStreamConfigMap,
Function shardSyncTaskManagerProvider, boolean isMultiStreamingMode,
MetricsFactory metricsFactory,
long leasesRecoveryAuditorExecutionFrequencyMillis,
int leasesRecoveryAuditorInconsistencyConfidenceThreshold) {
this(workerId, leaderDecider, leaseRefresher, currentStreamConfigMap, shardSyncTaskManagerProvider,
Executors.newSingleThreadScheduledExecutor(), isMultiStreamingMode, metricsFactory,
leasesRecoveryAuditorExecutionFrequencyMillis, leasesRecoveryAuditorInconsistencyConfidenceThreshold);
}
PeriodicShardSyncManager(String workerId, LeaderDecider leaderDecider, LeaseRefresher leaseRefresher,
Map currentStreamConfigMap,
Function shardSyncTaskManagerProvider,
ScheduledExecutorService shardSyncThreadPool, boolean isMultiStreamingMode,
MetricsFactory metricsFactory,
long leasesRecoveryAuditorExecutionFrequencyMillis,
int leasesRecoveryAuditorInconsistencyConfidenceThreshold) {
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager.");
this.workerId = workerId;
this.leaderDecider = leaderDecider;
this.leaseRefresher = leaseRefresher;
this.currentStreamConfigMap = currentStreamConfigMap;
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
this.shardSyncThreadPool = shardSyncThreadPool;
this.isMultiStreamingMode = isMultiStreamingMode;
this.metricsFactory = metricsFactory;
this.leasesRecoveryAuditorExecutionFrequencyMillis = leasesRecoveryAuditorExecutionFrequencyMillis;
this.leasesRecoveryAuditorInconsistencyConfidenceThreshold = leasesRecoveryAuditorInconsistencyConfidenceThreshold;
}
public synchronized TaskResult start() {
if (!isRunning) {
final Runnable periodicShardSyncer = () -> {
try {
runShardSync();
} catch (Throwable t) {
log.error("Error during runShardSync.", t);
}
};
shardSyncThreadPool.scheduleWithFixedDelay(periodicShardSyncer, INITIAL_DELAY, leasesRecoveryAuditorExecutionFrequencyMillis,
TimeUnit.MILLISECONDS);
isRunning = true;
}
return new TaskResult(null);
}
/**
* Runs shardSync once
* Does not schedule periodic shardSync
* @return the result of the task
*/
public synchronized void syncShardsOnce() throws Exception {
// TODO: Resume the shard sync from failed stream in the next attempt, to avoid syncing
// TODO: for already synced streams
for(Map.Entry streamConfigEntry : currentStreamConfigMap.entrySet()) {
final StreamIdentifier streamIdentifier = streamConfigEntry.getKey();
log.info("Syncing Kinesis shard info for " + streamIdentifier);
final StreamConfig streamConfig = streamConfigEntry.getValue();
final ShardSyncTaskManager shardSyncTaskManager = shardSyncTaskManagerProvider.apply(streamConfig);
final TaskResult taskResult = shardSyncTaskManager.callShardSyncTask();
if (taskResult.getException() != null) {
throw taskResult.getException();
}
}
}
public void stop() {
if (isRunning) {
log.info(String.format("Shutting down leader decider on worker %s", workerId));
leaderDecider.shutdown();
log.info(String.format("Shutting down periodic shard sync task scheduler on worker %s", workerId));
shardSyncThreadPool.shutdown();
isRunning = false;
}
}
private void runShardSync() {
if (leaderDecider.isLeader(workerId)) {
log.info(String.format("WorkerId %s is leader, running the periodic shard sync task", workerId));
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory,
PERIODIC_SHARD_SYNC_MANAGER);
int numStreamsWithPartialLeases = 0;
int numStreamsToSync = 0;
boolean isRunSuccess = false;
final long runStartMillis = System.currentTimeMillis();
try {
// Construct the stream to leases map to be used in the lease sync
final Map> streamToLeasesMap = getStreamToLeasesMap(
currentStreamConfigMap.keySet());
// For each of the stream, check if shard sync needs to be done based on the leases state.
for (Map.Entry streamConfigEntry : currentStreamConfigMap.entrySet()) {
final ShardSyncResponse shardSyncResponse = checkForShardSync(streamConfigEntry.getKey(),
streamToLeasesMap.get(streamConfigEntry.getKey()));
numStreamsWithPartialLeases += shardSyncResponse.isHoleDetected() ? 1 : 0;
numStreamsToSync += shardSyncResponse.shouldDoShardSync ? 1 : 0;
if (shardSyncResponse.shouldDoShardSync()) {
log.info("Periodic shard syncer initiating shard sync for {} due to the reason - {} ",
streamConfigEntry.getKey(), shardSyncResponse.reasonForDecision());
final ShardSyncTaskManager shardSyncTaskManager = shardSyncTaskManagerProvider
.apply(streamConfigEntry.getValue());
if (!shardSyncTaskManager.submitShardSyncTask()) {
log.warn(
"Failed to submit shard sync task for stream {}. This could be due to the previous pending shard sync task.",
shardSyncTaskManager.shardDetector().streamIdentifier().streamName());
}
} else {
log.info("Skipping shard sync for {} due to the reason - {}", streamConfigEntry.getKey(),
shardSyncResponse.reasonForDecision());
}
}
isRunSuccess = true;
} catch (Exception e) {
log.error("Caught exception while running periodic shard syncer.", e);
} finally {
scope.addData("NumStreamsWithPartialLeases", numStreamsWithPartialLeases, StandardUnit.COUNT, MetricsLevel.SUMMARY);
scope.addData("NumStreamsToSync", numStreamsToSync, StandardUnit.COUNT, MetricsLevel.SUMMARY);
MetricsUtil.addSuccessAndLatency(scope, isRunSuccess, runStartMillis, MetricsLevel.SUMMARY);
scope.end();
}
} else {
log.debug("WorkerId {} is not a leader, not running the shard sync task", workerId);
}
}
private Map> getStreamToLeasesMap(
final Set streamIdentifiersToFilter)
throws DependencyException, ProvisionedThroughputException, InvalidStateException {
final List leases = leaseRefresher.listLeases();
if (!isMultiStreamingMode) {
Validate.isTrue(streamIdentifiersToFilter.size() == 1);
return Collections.singletonMap(streamIdentifiersToFilter.iterator().next(), leases);
} else {
final Map> streamToLeasesMap = new HashMap<>();
for (Lease lease : leases) {
StreamIdentifier streamIdentifier = StreamIdentifier
.multiStreamInstance(((MultiStreamLease) lease).streamIdentifier());
if (streamIdentifiersToFilter.contains(streamIdentifier)) {
streamToLeasesMap.computeIfAbsent(streamIdentifier, s -> new ArrayList<>()).add(lease);
}
}
return streamToLeasesMap;
}
}
@VisibleForTesting
ShardSyncResponse checkForShardSync(StreamIdentifier streamIdentifier, List leases) {
if (CollectionUtils.isNullOrEmpty(leases)) {
// If the leases is null or empty then we need to do shard sync
log.info("No leases found for {}. Will be triggering shard sync", streamIdentifier);
return new ShardSyncResponse(true, false, "No leases found for " + streamIdentifier);
}
// Check if there are any holes in the leases and return the first hole if present.
Optional hashRangeHoleOpt = hasHoleInLeases(streamIdentifier, leases);
if (hashRangeHoleOpt.isPresent()) {
// If hole is present, check if the hole is detected consecutively in previous occurrences.
// If hole is determined with high confidence return true; return false otherwise
// We are using the high confidence factor to avoid shard sync on any holes during resharding and
// lease cleanups or any intermittent issues.
final HashRangeHoleTracker hashRangeHoleTracker = hashRangeHoleTrackerMap
.computeIfAbsent(streamIdentifier, s -> new HashRangeHoleTracker());
final boolean hasHoleWithHighConfidence = hashRangeHoleTracker
.hasHighConfidenceOfHoleWith(hashRangeHoleOpt.get());
return new ShardSyncResponse(hasHoleWithHighConfidence, true,
"Detected same hole for " + hashRangeHoleTracker.getNumConsecutiveHoles()
+ " times. Shard sync will be initiated when threshold reaches "
+ leasesRecoveryAuditorInconsistencyConfidenceThreshold);
} else {
// If hole is not present, clear any previous tracking for this stream and return false;
hashRangeHoleTrackerMap.remove(streamIdentifier);
return new ShardSyncResponse(false, false, "Hash Ranges are complete for " + streamIdentifier);
}
}
@Value
@Accessors(fluent = true)
@VisibleForTesting
static class ShardSyncResponse {
private final boolean shouldDoShardSync;
private final boolean isHoleDetected;
private final String reasonForDecision;
}
@VisibleForTesting
Optional hasHoleInLeases(StreamIdentifier streamIdentifier, List leases) {
// Filter the leases with any checkpoint other than shard end.
List activeLeases = leases.stream()
.filter(lease -> lease.checkpoint() != null && !lease.checkpoint().isShardEnd()).collect(Collectors.toList());
List activeLeasesWithHashRanges = fillWithHashRangesIfRequired(streamIdentifier, activeLeases);
return checkForHoleInHashKeyRanges(streamIdentifier, activeLeasesWithHashRanges);
}
// If leases are missing hashranges information, update the leases in-memory as well as in the lease storage
// by learning from kinesis shards.
private List fillWithHashRangesIfRequired(StreamIdentifier streamIdentifier, List activeLeases) {
List activeLeasesWithNoHashRanges = activeLeases.stream()
.filter(lease -> lease.hashKeyRangeForLease() == null).collect(Collectors.toList());
Optional minLeaseOpt = activeLeasesWithNoHashRanges.stream().min(Comparator.comparing(Lease::leaseKey));
if (minLeaseOpt.isPresent()) {
// TODO : use minLease for new ListShards with startingShardId
final Lease minLease = minLeaseOpt.get();
final ShardDetector shardDetector = shardSyncTaskManagerProvider
.apply(currentStreamConfigMap.get(streamIdentifier)).shardDetector();
final Map kinesisShards = shardDetector.listShards().stream()
.collect(Collectors.toMap(Shard::shardId, shard -> shard));
return activeLeases.stream().map(lease -> {
if (lease.hashKeyRangeForLease() == null) {
final String shardId = lease instanceof MultiStreamLease ?
((MultiStreamLease) lease).shardId() :
lease.leaseKey();
final Shard shard = kinesisShards.get(shardId);
if(shard == null) {
return lease;
}
lease.hashKeyRange(fromHashKeyRange(shard.hashKeyRange()));
try {
leaseRefresher.updateLeaseWithMetaInfo(lease, UpdateField.HASH_KEY_RANGE);
} catch (Exception e) {
log.warn(
"Unable to update hash range key information for lease {} of stream {}. This may result in explicit lease sync.",
lease.leaseKey(), streamIdentifier);
}
}
return lease;
}).filter(lease -> lease.hashKeyRangeForLease() != null).collect(Collectors.toList());
} else {
return activeLeases;
}
}
@VisibleForTesting
static Optional checkForHoleInHashKeyRanges(StreamIdentifier streamIdentifier,
List leasesWithHashKeyRanges) {
// Sort the hash ranges by starting hash key.
List sortedLeasesWithHashKeyRanges = sortLeasesByHashRange(leasesWithHashKeyRanges);
if(sortedLeasesWithHashKeyRanges.isEmpty()) {
log.error("No leases with valid hashranges found for stream {}", streamIdentifier);
return Optional.of(new HashRangeHole());
}
// Validate for hashranges bounds.
if (!sortedLeasesWithHashKeyRanges.get(0).hashKeyRangeForLease().startingHashKey().equals(MIN_HASH_KEY) || !sortedLeasesWithHashKeyRanges
.get(sortedLeasesWithHashKeyRanges.size() - 1).hashKeyRangeForLease().endingHashKey().equals(MAX_HASH_KEY)) {
log.error("Incomplete hash range found for stream {} between {} and {}.", streamIdentifier,
sortedLeasesWithHashKeyRanges.get(0),
sortedLeasesWithHashKeyRanges.get(sortedLeasesWithHashKeyRanges.size() - 1));
return Optional.of(new HashRangeHole(sortedLeasesWithHashKeyRanges.get(0).hashKeyRangeForLease(),
sortedLeasesWithHashKeyRanges.get(sortedLeasesWithHashKeyRanges.size() - 1).hashKeyRangeForLease()));
}
// Check for any holes in the sorted hashrange intervals.
if (sortedLeasesWithHashKeyRanges.size() > 1) {
Lease leftMostLeaseToReportInCaseOfHole = sortedLeasesWithHashKeyRanges.get(0);
HashKeyRangeForLease leftLeaseHashRange = leftMostLeaseToReportInCaseOfHole.hashKeyRangeForLease();
for (int i = 1; i < sortedLeasesWithHashKeyRanges.size(); i++) {
final HashKeyRangeForLease rightLeaseHashRange = sortedLeasesWithHashKeyRanges.get(i).hashKeyRangeForLease();
final BigInteger rangeDiff = rightLeaseHashRange.startingHashKey().subtract(leftLeaseHashRange.endingHashKey());
// Case of overlapping leases when the rangediff is 0 or negative.
// signum() will be -1 for negative and 0 if value is 0.
// Merge the range for further tracking.
if (rangeDiff.signum() <= 0) {
leftLeaseHashRange = new HashKeyRangeForLease(leftLeaseHashRange.startingHashKey(),
leftLeaseHashRange.endingHashKey().max(rightLeaseHashRange.endingHashKey()));
} else {
// Case of non overlapping leases when rangediff is positive. signum() will be 1 for positive.
// If rangeDiff is 1, then it is a case of continuous hashrange. If not, it is a hole.
if (!rangeDiff.equals(BigInteger.ONE)) {
log.error("Incomplete hash range found for {} between {} and {}.", streamIdentifier,
leftMostLeaseToReportInCaseOfHole, sortedLeasesWithHashKeyRanges.get(i));
return Optional.of(new HashRangeHole(leftMostLeaseToReportInCaseOfHole.hashKeyRangeForLease(),
sortedLeasesWithHashKeyRanges.get(i).hashKeyRangeForLease()));
}
leftMostLeaseToReportInCaseOfHole = sortedLeasesWithHashKeyRanges.get(i);
leftLeaseHashRange = rightLeaseHashRange;
}
}
}
return Optional.empty();
}
@VisibleForTesting
static List sortLeasesByHashRange(List leasesWithHashKeyRanges) {
if (leasesWithHashKeyRanges.size() == 0 || leasesWithHashKeyRanges.size() == 1)
return leasesWithHashKeyRanges;
Collections.sort(leasesWithHashKeyRanges, new HashKeyRangeComparator());
return leasesWithHashKeyRanges;
}
@Value
private static class HashRangeHole {
HashRangeHole() {
hashRangeAtStartOfPossibleHole = hashRangeAtEndOfPossibleHole = null;
}
HashRangeHole(HashKeyRangeForLease hashRangeAtStartOfPossibleHole, HashKeyRangeForLease hashRangeAtEndOfPossibleHole) {
this.hashRangeAtStartOfPossibleHole = hashRangeAtStartOfPossibleHole;
this.hashRangeAtEndOfPossibleHole = hashRangeAtEndOfPossibleHole;
}
private final HashKeyRangeForLease hashRangeAtStartOfPossibleHole;
private final HashKeyRangeForLease hashRangeAtEndOfPossibleHole;
}
private class HashRangeHoleTracker {
private HashRangeHole hashRangeHole;
@Getter
private Integer numConsecutiveHoles;
public boolean hasHighConfidenceOfHoleWith(@NonNull HashRangeHole hashRangeHole) {
if (hashRangeHole.equals(this.hashRangeHole)) {
++this.numConsecutiveHoles;
} else {
this.hashRangeHole = hashRangeHole;
this.numConsecutiveHoles = 1;
}
return numConsecutiveHoles >= leasesRecoveryAuditorInconsistencyConfidenceThreshold;
}
}
/**
* Helper class to compare leases based on their hash range.
*/
private static class HashKeyRangeComparator implements Comparator, Serializable {
private static final long serialVersionUID = 1L;
@Override
public int compare(Lease lease, Lease otherLease) {
Validate.notNull(lease);
Validate.notNull(otherLease);
Validate.notNull(lease.hashKeyRangeForLease());
Validate.notNull(otherLease.hashKeyRangeForLease());
return ComparisonChain.start()
.compare(lease.hashKeyRangeForLease().startingHashKey(), otherLease.hashKeyRangeForLease().startingHashKey())
.compare(lease.hashKeyRangeForLease().endingHashKey(), otherLease.hashKeyRangeForLease().endingHashKey())
.result();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy