org.apache.kafka.clients.consumer.internals.AbstractStickyAssignor Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.clients.consumer.internals;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class AbstractStickyAssignor extends AbstractPartitionAssignor {
private static final Logger log = LoggerFactory.getLogger(AbstractStickyAssignor.class);
public static final int DEFAULT_GENERATION = -1;
private PartitionMovements partitionMovements;
// Keep track of the partitions being migrated from one consumer to another during assignment
// so the cooperative assignor can adjust the assignment
protected Map partitionsTransferringOwnership = new HashMap<>();
static final class ConsumerGenerationPair {
final String consumer;
final int generation;
ConsumerGenerationPair(String consumer, int generation) {
this.consumer = consumer;
this.generation = generation;
}
}
public static final class MemberData {
public final List partitions;
public final Optional generation;
public MemberData(List partitions, Optional generation) {
this.partitions = partitions;
this.generation = generation;
}
}
abstract protected MemberData memberData(Subscription subscription);
@Override
public Map> assign(Map partitionsPerTopic,
Map subscriptions) {
Map> consumerToOwnedPartitions = new HashMap<>();
if (allSubscriptionsEqual(partitionsPerTopic.keySet(), subscriptions, consumerToOwnedPartitions)) {
log.debug("Detected that all consumers were subscribed to same set of topics, invoking the "
+ "optimized assignment algorithm");
partitionsTransferringOwnership = new HashMap<>();
return constrainedAssign(partitionsPerTopic, consumerToOwnedPartitions);
} else {
log.debug("Detected that all not consumers were subscribed to same set of topics, falling back to the "
+ "general case assignment algorithm");
partitionsTransferringOwnership = null;
return generalAssign(partitionsPerTopic, subscriptions);
}
}
/**
* Returns true iff all consumers have an identical subscription. Also fills out the passed in
* {@code consumerToOwnedPartitions} with each consumer's previously owned and still-subscribed partitions
*/
private boolean allSubscriptionsEqual(Set allTopics,
Map subscriptions,
Map> consumerToOwnedPartitions) {
Set membersWithOldGeneration = new HashSet<>();
Set membersOfCurrentHighestGeneration = new HashSet<>();
int maxGeneration = DEFAULT_GENERATION;
Set subscribedTopics = new HashSet<>();
for (Map.Entry subscriptionEntry : subscriptions.entrySet()) {
String consumer = subscriptionEntry.getKey();
Subscription subscription = subscriptionEntry.getValue();
// initialize the subscribed topics set if this is the first subscription
if (subscribedTopics.isEmpty()) {
subscribedTopics.addAll(subscription.topics());
} else if (!(subscription.topics().size() == subscribedTopics.size()
&& subscribedTopics.containsAll(subscription.topics()))) {
return false;
}
MemberData memberData = memberData(subscription);
List ownedPartitions = new ArrayList<>();
consumerToOwnedPartitions.put(consumer, ownedPartitions);
// Only consider this consumer's owned partitions as valid if it is a member of the current highest
// generation, or it's generation is not present but we have not seen any known generation so far
if (memberData.generation.isPresent() && memberData.generation.get() >= maxGeneration
|| !memberData.generation.isPresent() && maxGeneration == DEFAULT_GENERATION) {
// If the current member's generation is higher, all the previously owned partitions are invalid
if (memberData.generation.isPresent() && memberData.generation.get() > maxGeneration) {
membersWithOldGeneration.addAll(membersOfCurrentHighestGeneration);
membersOfCurrentHighestGeneration.clear();
maxGeneration = memberData.generation.get();
}
membersOfCurrentHighestGeneration.add(consumer);
for (final TopicPartition tp : memberData.partitions) {
// filter out any topics that no longer exist or aren't part of the current subscription
if (allTopics.contains(tp.topic())) {
ownedPartitions.add(tp);
}
}
}
}
for (String consumer : membersWithOldGeneration) {
consumerToOwnedPartitions.get(consumer).clear();
}
return true;
}
/**
* This constrainedAssign optimizes the assignment algorithm when all consumers were subscribed to same set of topics.
* The method includes the following steps:
*
* 1. Reassign as many previously owned partitions as possible, up to the maxQuota
* 2. Fill remaining members up to minQuota
* 3. If we ran out of unassigned partitions before filling all consumers, we need to start stealing partitions
* from the over-full consumers at max capacity
* 4. Otherwise we may have run out of unfilled consumers before assigning all partitions, in which case we
* should just distribute one partition each to all consumers at min capacity
*
* @param partitionsPerTopic The number of partitions for each subscribed topic
* @param consumerToOwnedPartitions Each consumer's previously owned and still-subscribed partitions
*
* @return Map from each member to the list of partitions assigned to them.
*/
private Map> constrainedAssign(Map partitionsPerTopic,
Map> consumerToOwnedPartitions) {
SortedSet unassignedPartitions = getTopicPartitions(partitionsPerTopic);
Set allRevokedPartitions = new HashSet<>();
// Each consumer should end up in exactly one of the below
// the consumers not yet at capacity
List unfilledMembers = new LinkedList<>();
// the members with exactly maxQuota partitions assigned
Queue maxCapacityMembers = new LinkedList<>();
// the members with exactly minQuota partitions assigned
Queue minCapacityMembers = new LinkedList<>();
int numberOfConsumers = consumerToOwnedPartitions.size();
int minQuota = (int) Math.floor(((double) unassignedPartitions.size()) / numberOfConsumers);
int maxQuota = (int) Math.ceil(((double) unassignedPartitions.size()) / numberOfConsumers);
// initialize the assignment map with an empty array of size minQuota for all members
Map> assignment = new HashMap<>(
consumerToOwnedPartitions.keySet().stream().collect(Collectors.toMap(c -> c, c -> new ArrayList<>(minQuota))));
// Reassign as many previously owned partitions as possible
for (Map.Entry> consumerEntry : consumerToOwnedPartitions.entrySet()) {
String consumer = consumerEntry.getKey();
List ownedPartitions = consumerEntry.getValue();
List consumerAssignment = assignment.get(consumer);
int i = 0;
// assign the first N partitions up to the max quota, and mark the remaining as being revoked
for (TopicPartition tp : ownedPartitions) {
if (i < maxQuota) {
consumerAssignment.add(tp);
unassignedPartitions.remove(tp);
} else {
allRevokedPartitions.add(tp);
}
++i;
}
if (ownedPartitions.size() < minQuota) {
unfilledMembers.add(consumer);
} else {
// It's possible for a consumer to be at both min and max capacity if minQuota == maxQuota
if (consumerAssignment.size() == minQuota)
minCapacityMembers.add(consumer);
if (consumerAssignment.size() == maxQuota)
maxCapacityMembers.add(consumer);
}
}
Collections.sort(unfilledMembers);
Iterator unassignedPartitionsIter = unassignedPartitions.iterator();
// Fill remaining members up to minQuota
while (!unfilledMembers.isEmpty() && !unassignedPartitions.isEmpty()) {
Iterator unfilledConsumerIter = unfilledMembers.iterator();
while (unfilledConsumerIter.hasNext()) {
String consumer = unfilledConsumerIter.next();
List consumerAssignment = assignment.get(consumer);
if (unassignedPartitionsIter.hasNext()) {
TopicPartition tp = unassignedPartitionsIter.next();
consumerAssignment.add(tp);
unassignedPartitionsIter.remove();
// We already assigned all possible ownedPartitions, so we know this must be newly to this consumer
if (allRevokedPartitions.contains(tp))
partitionsTransferringOwnership.put(tp, consumer);
} else {
break;
}
if (consumerAssignment.size() == minQuota) {
minCapacityMembers.add(consumer);
unfilledConsumerIter.remove();
}
}
}
// If we ran out of unassigned partitions before filling all consumers, we need to start stealing partitions
// from the over-full consumers at max capacity
for (String consumer : unfilledMembers) {
List consumerAssignment = assignment.get(consumer);
int remainingCapacity = minQuota - consumerAssignment.size();
while (remainingCapacity > 0) {
String overloadedConsumer = maxCapacityMembers.poll();
if (overloadedConsumer == null) {
throw new IllegalStateException("Some consumers are under capacity but all partitions have been assigned");
}
TopicPartition swappedPartition = assignment.get(overloadedConsumer).remove(0);
consumerAssignment.add(swappedPartition);
--remainingCapacity;
// This partition is by definition transferring ownership, the swapped partition must have come from
// the max capacity member's owned partitions since it can only reach max capacity with owned partitions
partitionsTransferringOwnership.put(swappedPartition, consumer);
}
minCapacityMembers.add(consumer);
}
// Otherwise we may have run out of unfilled consumers before assigning all partitions, in which case we
// should just distribute one partition each to all consumers at min capacity
for (TopicPartition unassignedPartition : unassignedPartitions) {
String underCapacityConsumer = minCapacityMembers.poll();
if (underCapacityConsumer == null) {
throw new IllegalStateException("Some partitions are unassigned but all consumers are at maximum capacity");
}
// We can skip the bookkeeping of unassignedPartitions and maxCapacityMembers here since we are at the end
assignment.get(underCapacityConsumer).add(unassignedPartition);
if (allRevokedPartitions.contains(unassignedPartition))
partitionsTransferringOwnership.put(unassignedPartition, underCapacityConsumer);
}
return assignment;
}
private SortedSet getTopicPartitions(Map partitionsPerTopic) {
SortedSet allPartitions =
new TreeSet<>(Comparator.comparing(TopicPartition::topic).thenComparing(TopicPartition::partition));
for (Entry entry: partitionsPerTopic.entrySet()) {
String topic = entry.getKey();
for (int i = 0; i < entry.getValue(); ++i) {
allPartitions.add(new TopicPartition(topic, i));
}
}
return allPartitions;
}
/**
* This generalAssign algorithm guarantees the assignment that is as balanced as possible.
* This method includes the following steps:
*
* 1. Preserving all the existing partition assignments
* 2. Removing all the partition assignments that have become invalid due to the change that triggers the reassignment
* 3. Assigning the unassigned partitions in a way that balances out the overall assignments of partitions to consumers
* 4. Further balancing out the resulting assignment by finding the partitions that can be reassigned
* to another consumer towards an overall more balanced assignment.
*
* @param partitionsPerTopic The number of partitions for each subscribed topic.
* @param subscriptions Map from the member id to their respective topic subscription
*
* @return Map from each member to the list of partitions assigned to them.
*/
private Map> generalAssign(Map partitionsPerTopic,
Map subscriptions) {
Map> currentAssignment = new HashMap<>();
Map prevAssignment = new HashMap<>();
partitionMovements = new PartitionMovements();
prepopulateCurrentAssignments(subscriptions, currentAssignment, prevAssignment);
// a mapping of all topic partitions to all consumers that can be assigned to them
final Map> partition2AllPotentialConsumers = new HashMap<>();
// a mapping of all consumers to all potential topic partitions that can be assigned to them
final Map> consumer2AllPotentialPartitions = new HashMap<>();
// initialize partition2AllPotentialConsumers and consumer2AllPotentialPartitions in the following two for loops
for (Entry entry: partitionsPerTopic.entrySet()) {
for (int i = 0; i < entry.getValue(); ++i)
partition2AllPotentialConsumers.put(new TopicPartition(entry.getKey(), i), new ArrayList<>());
}
for (Entry entry: subscriptions.entrySet()) {
String consumerId = entry.getKey();
consumer2AllPotentialPartitions.put(consumerId, new ArrayList<>());
entry.getValue().topics().stream().filter(topic -> partitionsPerTopic.get(topic) != null).forEach(topic -> {
for (int i = 0; i < partitionsPerTopic.get(topic); ++i) {
TopicPartition topicPartition = new TopicPartition(topic, i);
consumer2AllPotentialPartitions.get(consumerId).add(topicPartition);
partition2AllPotentialConsumers.get(topicPartition).add(consumerId);
}
});
// add this consumer to currentAssignment (with an empty topic partition assignment) if it does not already exist
if (!currentAssignment.containsKey(consumerId))
currentAssignment.put(consumerId, new ArrayList<>());
}
// a mapping of partition to current consumer
Map currentPartitionConsumer = new HashMap<>();
for (Map.Entry> entry: currentAssignment.entrySet())
for (TopicPartition topicPartition: entry.getValue())
currentPartitionConsumer.put(topicPartition, entry.getKey());
List sortedPartitions = sortPartitions(partition2AllPotentialConsumers);
// all partitions that need to be assigned (initially set to all partitions but adjusted in the following loop)
List unassignedPartitions = new ArrayList<>(sortedPartitions);
boolean revocationRequired = false;
for (Iterator>> it = currentAssignment.entrySet().iterator(); it.hasNext();) {
Map.Entry> entry = it.next();
if (!subscriptions.containsKey(entry.getKey())) {
// if a consumer that existed before (and had some partition assignments) is now removed, remove it from currentAssignment
for (TopicPartition topicPartition: entry.getValue())
currentPartitionConsumer.remove(topicPartition);
it.remove();
} else {
// otherwise (the consumer still exists)
for (Iterator partitionIter = entry.getValue().iterator(); partitionIter.hasNext();) {
TopicPartition partition = partitionIter.next();
if (!partition2AllPotentialConsumers.containsKey(partition)) {
// if this topic partition of this consumer no longer exists remove it from currentAssignment of the consumer
partitionIter.remove();
currentPartitionConsumer.remove(partition);
} else if (!subscriptions.get(entry.getKey()).topics().contains(partition.topic())) {
// if this partition cannot remain assigned to its current consumer because the consumer
// is no longer subscribed to its topic remove it from currentAssignment of the consumer
partitionIter.remove();
revocationRequired = true;
} else
// otherwise, remove the topic partition from those that need to be assigned only if
// its current consumer is still subscribed to its topic (because it is already assigned
// and we would want to preserve that assignment as much as possible)
unassignedPartitions.remove(partition);
}
}
}
// at this point we have preserved all valid topic partition to consumer assignments and removed
// all invalid topic partitions and invalid consumers. Now we need to assign unassignedPartitions
// to consumers so that the topic partition assignments are as balanced as possible.
// an ascending sorted set of consumers based on how many topic partitions are already assigned to them
TreeSet sortedCurrentSubscriptions = new TreeSet<>(new SubscriptionComparator(currentAssignment));
sortedCurrentSubscriptions.addAll(currentAssignment.keySet());
balance(currentAssignment, prevAssignment, sortedPartitions, unassignedPartitions, sortedCurrentSubscriptions,
consumer2AllPotentialPartitions, partition2AllPotentialConsumers, currentPartitionConsumer, revocationRequired);
return currentAssignment;
}
private void prepopulateCurrentAssignments(Map subscriptions,
Map> currentAssignment,
Map prevAssignment) {
// we need to process subscriptions' user data with each consumer's reported generation in mind
// higher generations overwrite lower generations in case of a conflict
// note that a conflict could exists only if user data is for different generations
// for each partition we create a sorted map of its consumers by generation
Map> sortedPartitionConsumersByGeneration = new HashMap<>();
for (Map.Entry subscriptionEntry: subscriptions.entrySet()) {
String consumer = subscriptionEntry.getKey();
MemberData memberData = memberData(subscriptionEntry.getValue());
for (TopicPartition partition: memberData.partitions) {
if (sortedPartitionConsumersByGeneration.containsKey(partition)) {
Map consumers = sortedPartitionConsumersByGeneration.get(partition);
if (memberData.generation.isPresent() && consumers.containsKey(memberData.generation.get())) {
// same partition is assigned to two consumers during the same rebalance.
// log a warning and skip this record
log.warn("Partition '{}' is assigned to multiple consumers following sticky assignment generation {}.",
partition, memberData.generation);
} else
consumers.put(memberData.generation.orElse(DEFAULT_GENERATION), consumer);
} else {
TreeMap sortedConsumers = new TreeMap<>();
sortedConsumers.put(memberData.generation.orElse(DEFAULT_GENERATION), consumer);
sortedPartitionConsumersByGeneration.put(partition, sortedConsumers);
}
}
}
// prevAssignment holds the prior ConsumerGenerationPair (before current) of each partition
// current and previous consumers are the last two consumers of each partition in the above sorted map
for (Map.Entry> partitionConsumersEntry: sortedPartitionConsumersByGeneration.entrySet()) {
TopicPartition partition = partitionConsumersEntry.getKey();
TreeMap consumers = partitionConsumersEntry.getValue();
Iterator it = consumers.descendingKeySet().iterator();
// let's process the current (most recent) consumer first
String consumer = consumers.get(it.next());
currentAssignment.computeIfAbsent(consumer, k -> new ArrayList<>());
currentAssignment.get(consumer).add(partition);
// now update previous assignment if any
if (it.hasNext()) {
int generation = it.next();
prevAssignment.put(partition, new ConsumerGenerationPair(consumers.get(generation), generation));
}
}
}
/**
* determine if the current assignment is a balanced one
*
* @param currentAssignment: the assignment whose balance needs to be checked
* @param sortedCurrentSubscriptions: an ascending sorted set of consumers based on how many topic partitions are already assigned to them
* @param allSubscriptions: a mapping of all consumers to all potential topic partitions that can be assigned to them
* @return true if the given assignment is balanced; false otherwise
*/
private boolean isBalanced(Map> currentAssignment,
TreeSet sortedCurrentSubscriptions,
Map> allSubscriptions) {
int min = currentAssignment.get(sortedCurrentSubscriptions.first()).size();
int max = currentAssignment.get(sortedCurrentSubscriptions.last()).size();
if (min >= max - 1)
// if minimum and maximum numbers of partitions assigned to consumers differ by at most one return true
return true;
// create a mapping from partitions to the consumer assigned to them
final Map allPartitions = new HashMap<>();
Set>> assignments = currentAssignment.entrySet();
for (Map.Entry> entry: assignments) {
List topicPartitions = entry.getValue();
for (TopicPartition topicPartition: topicPartitions) {
if (allPartitions.containsKey(topicPartition))
log.error("{} is assigned to more than one consumer.", topicPartition);
allPartitions.put(topicPartition, entry.getKey());
}
}
// for each consumer that does not have all the topic partitions it can get make sure none of the topic partitions it
// could but did not get cannot be moved to it (because that would break the balance)
for (String consumer: sortedCurrentSubscriptions) {
List consumerPartitions = currentAssignment.get(consumer);
int consumerPartitionCount = consumerPartitions.size();
// skip if this consumer already has all the topic partitions it can get
if (consumerPartitionCount == allSubscriptions.get(consumer).size())
continue;
// otherwise make sure it cannot get any more
List potentialTopicPartitions = allSubscriptions.get(consumer);
for (TopicPartition topicPartition: potentialTopicPartitions) {
if (!currentAssignment.get(consumer).contains(topicPartition)) {
String otherConsumer = allPartitions.get(topicPartition);
int otherConsumerPartitionCount = currentAssignment.get(otherConsumer).size();
if (consumerPartitionCount < otherConsumerPartitionCount) {
log.debug("{} can be moved from consumer {} to consumer {} for a more balanced assignment.",
topicPartition, otherConsumer, consumer);
return false;
}
}
}
}
return true;
}
/**
* @return the balance score of the given assignment, as the sum of assigned partitions size difference of all consumer pairs.
* A perfectly balanced assignment (with all consumers getting the same number of partitions) has a balance score of 0.
* Lower balance score indicates a more balanced assignment.
*/
private int getBalanceScore(Map> assignment) {
int score = 0;
Map consumer2AssignmentSize = new HashMap<>();
for (Entry> entry: assignment.entrySet())
consumer2AssignmentSize.put(entry.getKey(), entry.getValue().size());
Iterator> it = consumer2AssignmentSize.entrySet().iterator();
while (it.hasNext()) {
Entry entry = it.next();
int consumerAssignmentSize = entry.getValue();
it.remove();
for (Entry otherEntry: consumer2AssignmentSize.entrySet())
score += Math.abs(consumerAssignmentSize - otherEntry.getValue());
}
return score;
}
/**
* Sort valid partitions so they are processed in the potential reassignment phase in the proper order
* that causes minimal partition movement among consumers (hence honoring maximal stickiness)
*
* @param partition2AllPotentialConsumers a mapping of partitions to their potential consumers
* @return an ascending sorted list of topic partitions based on how many consumers can potentially use them
*/
private List sortPartitions(Map> partition2AllPotentialConsumers) {
List sortedPartitions = new ArrayList<>(partition2AllPotentialConsumers.keySet());
Collections.sort(sortedPartitions, new PartitionComparator(partition2AllPotentialConsumers));
return sortedPartitions;
}
/**
* The assignment should improve the overall balance of the partition assignments to consumers.
*/
private void assignPartition(TopicPartition partition,
TreeSet sortedCurrentSubscriptions,
Map> currentAssignment,
Map> consumer2AllPotentialPartitions,
Map currentPartitionConsumer) {
for (String consumer: sortedCurrentSubscriptions) {
if (consumer2AllPotentialPartitions.get(consumer).contains(partition)) {
sortedCurrentSubscriptions.remove(consumer);
currentAssignment.get(consumer).add(partition);
currentPartitionConsumer.put(partition, consumer);
sortedCurrentSubscriptions.add(consumer);
break;
}
}
}
private boolean canParticipateInReassignment(TopicPartition partition,
Map> partition2AllPotentialConsumers) {
// if a partition has two or more potential consumers it is subject to reassignment.
return partition2AllPotentialConsumers.get(partition).size() >= 2;
}
private boolean canParticipateInReassignment(String consumer,
Map> currentAssignment,
Map> consumer2AllPotentialPartitions,
Map> partition2AllPotentialConsumers) {
List currentPartitions = currentAssignment.get(consumer);
int currentAssignmentSize = currentPartitions.size();
int maxAssignmentSize = consumer2AllPotentialPartitions.get(consumer).size();
if (currentAssignmentSize > maxAssignmentSize)
log.error("The consumer {} is assigned more partitions than the maximum possible.", consumer);
if (currentAssignmentSize < maxAssignmentSize)
// if a consumer is not assigned all its potential partitions it is subject to reassignment
return true;
for (TopicPartition partition: currentPartitions)
// if any of the partitions assigned to a consumer is subject to reassignment the consumer itself
// is subject to reassignment
if (canParticipateInReassignment(partition, partition2AllPotentialConsumers))
return true;
return false;
}
/**
* Balance the current assignment using the data structures created in the assign(...) method above.
*/
private void balance(Map> currentAssignment,
Map prevAssignment,
List sortedPartitions,
List unassignedPartitions,
TreeSet sortedCurrentSubscriptions,
Map> consumer2AllPotentialPartitions,
Map> partition2AllPotentialConsumers,
Map currentPartitionConsumer,
boolean revocationRequired) {
boolean initializing = currentAssignment.get(sortedCurrentSubscriptions.last()).isEmpty();
boolean reassignmentPerformed = false;
// assign all unassigned partitions
for (TopicPartition partition: unassignedPartitions) {
// skip if there is no potential consumer for the partition
if (partition2AllPotentialConsumers.get(partition).isEmpty())
continue;
assignPartition(partition, sortedCurrentSubscriptions, currentAssignment,
consumer2AllPotentialPartitions, currentPartitionConsumer);
}
// narrow down the reassignment scope to only those partitions that can actually be reassigned
Set fixedPartitions = new HashSet<>();
for (TopicPartition partition: partition2AllPotentialConsumers.keySet())
if (!canParticipateInReassignment(partition, partition2AllPotentialConsumers))
fixedPartitions.add(partition);
sortedPartitions.removeAll(fixedPartitions);
unassignedPartitions.removeAll(fixedPartitions);
// narrow down the reassignment scope to only those consumers that are subject to reassignment
Map> fixedAssignments = new HashMap<>();
for (String consumer: consumer2AllPotentialPartitions.keySet())
if (!canParticipateInReassignment(consumer, currentAssignment,
consumer2AllPotentialPartitions, partition2AllPotentialConsumers)) {
sortedCurrentSubscriptions.remove(consumer);
fixedAssignments.put(consumer, currentAssignment.remove(consumer));
}
// create a deep copy of the current assignment so we can revert to it if we do not get a more balanced assignment later
Map> preBalanceAssignment = deepCopy(currentAssignment);
Map preBalancePartitionConsumers = new HashMap<>(currentPartitionConsumer);
// if we don't already need to revoke something due to subscription changes, first try to balance by only moving newly added partitions
if (!revocationRequired) {
performReassignments(unassignedPartitions, currentAssignment, prevAssignment, sortedCurrentSubscriptions,
consumer2AllPotentialPartitions, partition2AllPotentialConsumers, currentPartitionConsumer);
}
reassignmentPerformed = performReassignments(sortedPartitions, currentAssignment, prevAssignment, sortedCurrentSubscriptions,
consumer2AllPotentialPartitions, partition2AllPotentialConsumers, currentPartitionConsumer);
// if we are not preserving existing assignments and we have made changes to the current assignment
// make sure we are getting a more balanced assignment; otherwise, revert to previous assignment
if (!initializing && reassignmentPerformed && getBalanceScore(currentAssignment) >= getBalanceScore(preBalanceAssignment)) {
deepCopy(preBalanceAssignment, currentAssignment);
currentPartitionConsumer.clear();
currentPartitionConsumer.putAll(preBalancePartitionConsumers);
}
// add the fixed assignments (those that could not change) back
for (Entry> entry: fixedAssignments.entrySet()) {
String consumer = entry.getKey();
currentAssignment.put(consumer, entry.getValue());
sortedCurrentSubscriptions.add(consumer);
}
fixedAssignments.clear();
}
private boolean performReassignments(List reassignablePartitions,
Map> currentAssignment,
Map prevAssignment,
TreeSet sortedCurrentSubscriptions,
Map> consumer2AllPotentialPartitions,
Map> partition2AllPotentialConsumers,
Map currentPartitionConsumer) {
boolean reassignmentPerformed = false;
boolean modified;
// repeat reassignment until no partition can be moved to improve the balance
do {
modified = false;
// reassign all reassignable partitions (starting from the partition with least potential consumers and if needed)
// until the full list is processed or a balance is achieved
Iterator partitionIterator = reassignablePartitions.iterator();
while (partitionIterator.hasNext() && !isBalanced(currentAssignment, sortedCurrentSubscriptions, consumer2AllPotentialPartitions)) {
TopicPartition partition = partitionIterator.next();
// the partition must have at least two consumers
if (partition2AllPotentialConsumers.get(partition).size() <= 1)
log.error("Expected more than one potential consumer for partition '{}'", partition);
// the partition must have a current consumer
String consumer = currentPartitionConsumer.get(partition);
if (consumer == null)
log.error("Expected partition '{}' to be assigned to a consumer", partition);
if (prevAssignment.containsKey(partition) &&
currentAssignment.get(consumer).size() > currentAssignment.get(prevAssignment.get(partition).consumer).size() + 1) {
reassignPartition(partition, currentAssignment, sortedCurrentSubscriptions, currentPartitionConsumer, prevAssignment.get(partition).consumer);
reassignmentPerformed = true;
modified = true;
continue;
}
// check if a better-suited consumer exist for the partition; if so, reassign it
for (String otherConsumer: partition2AllPotentialConsumers.get(partition)) {
if (currentAssignment.get(consumer).size() > currentAssignment.get(otherConsumer).size() + 1) {
reassignPartition(partition, currentAssignment, sortedCurrentSubscriptions, currentPartitionConsumer, consumer2AllPotentialPartitions);
reassignmentPerformed = true;
modified = true;
break;
}
}
}
} while (modified);
return reassignmentPerformed;
}
private void reassignPartition(TopicPartition partition,
Map> currentAssignment,
TreeSet sortedCurrentSubscriptions,
Map currentPartitionConsumer,
Map> consumer2AllPotentialPartitions) {
// find the new consumer
String newConsumer = null;
for (String anotherConsumer: sortedCurrentSubscriptions) {
if (consumer2AllPotentialPartitions.get(anotherConsumer).contains(partition)) {
newConsumer = anotherConsumer;
break;
}
}
assert newConsumer != null;
reassignPartition(partition, currentAssignment, sortedCurrentSubscriptions, currentPartitionConsumer, newConsumer);
}
private void reassignPartition(TopicPartition partition,
Map> currentAssignment,
TreeSet sortedCurrentSubscriptions,
Map currentPartitionConsumer,
String newConsumer) {
String consumer = currentPartitionConsumer.get(partition);
// find the correct partition movement considering the stickiness requirement
TopicPartition partitionToBeMoved = partitionMovements.getTheActualPartitionToBeMoved(partition, consumer, newConsumer);
processPartitionMovement(partitionToBeMoved, newConsumer, currentAssignment, sortedCurrentSubscriptions, currentPartitionConsumer);
}
private void processPartitionMovement(TopicPartition partition,
String newConsumer,
Map> currentAssignment,
TreeSet sortedCurrentSubscriptions,
Map currentPartitionConsumer) {
String oldConsumer = currentPartitionConsumer.get(partition);
sortedCurrentSubscriptions.remove(oldConsumer);
sortedCurrentSubscriptions.remove(newConsumer);
partitionMovements.movePartition(partition, oldConsumer, newConsumer);
currentAssignment.get(oldConsumer).remove(partition);
currentAssignment.get(newConsumer).add(partition);
currentPartitionConsumer.put(partition, newConsumer);
sortedCurrentSubscriptions.add(newConsumer);
sortedCurrentSubscriptions.add(oldConsumer);
}
public boolean isSticky() {
return partitionMovements.isSticky();
}
private void deepCopy(Map> source, Map> dest) {
dest.clear();
for (Entry> entry: source.entrySet())
dest.put(entry.getKey(), new ArrayList<>(entry.getValue()));
}
private Map> deepCopy(Map> assignment) {
Map> copy = new HashMap<>();
deepCopy(assignment, copy);
return copy;
}
private static class PartitionComparator implements Comparator, Serializable {
private static final long serialVersionUID = 1L;
private Map> map;
PartitionComparator(Map> map) {
this.map = map;
}
@Override
public int compare(TopicPartition o1, TopicPartition o2) {
int ret = map.get(o1).size() - map.get(o2).size();
if (ret == 0) {
ret = o1.topic().compareTo(o2.topic());
if (ret == 0)
ret = o1.partition() - o2.partition();
}
return ret;
}
}
private static class SubscriptionComparator implements Comparator, Serializable {
private static final long serialVersionUID = 1L;
private Map> map;
SubscriptionComparator(Map> map) {
this.map = map;
}
@Override
public int compare(String o1, String o2) {
int ret = map.get(o1).size() - map.get(o2).size();
if (ret == 0)
ret = o1.compareTo(o2);
return ret;
}
}
/**
* This class maintains some data structures to simplify lookup of partition movements among consumers. At each point of
* time during a partition rebalance it keeps track of partition movements corresponding to each topic, and also possible
* movement (in form a ConsumerPair
object) for each partition.
*/
private static class PartitionMovements {
private Map>> partitionMovementsByTopic = new HashMap<>();
private Map partitionMovements = new HashMap<>();
private ConsumerPair removeMovementRecordOfPartition(TopicPartition partition) {
ConsumerPair pair = partitionMovements.remove(partition);
String topic = partition.topic();
Map> partitionMovementsForThisTopic = partitionMovementsByTopic.get(topic);
partitionMovementsForThisTopic.get(pair).remove(partition);
if (partitionMovementsForThisTopic.get(pair).isEmpty())
partitionMovementsForThisTopic.remove(pair);
if (partitionMovementsByTopic.get(topic).isEmpty())
partitionMovementsByTopic.remove(topic);
return pair;
}
private void addPartitionMovementRecord(TopicPartition partition, ConsumerPair pair) {
partitionMovements.put(partition, pair);
String topic = partition.topic();
if (!partitionMovementsByTopic.containsKey(topic))
partitionMovementsByTopic.put(topic, new HashMap<>());
Map> partitionMovementsForThisTopic = partitionMovementsByTopic.get(topic);
if (!partitionMovementsForThisTopic.containsKey(pair))
partitionMovementsForThisTopic.put(pair, new HashSet<>());
partitionMovementsForThisTopic.get(pair).add(partition);
}
private void movePartition(TopicPartition partition, String oldConsumer, String newConsumer) {
ConsumerPair pair = new ConsumerPair(oldConsumer, newConsumer);
if (partitionMovements.containsKey(partition)) {
// this partition has previously moved
ConsumerPair existingPair = removeMovementRecordOfPartition(partition);
assert existingPair.dstMemberId.equals(oldConsumer);
if (!existingPair.srcMemberId.equals(newConsumer)) {
// the partition is not moving back to its previous consumer
// return new ConsumerPair2(existingPair.src, newConsumer);
addPartitionMovementRecord(partition, new ConsumerPair(existingPair.srcMemberId, newConsumer));
}
} else
addPartitionMovementRecord(partition, pair);
}
private TopicPartition getTheActualPartitionToBeMoved(TopicPartition partition, String oldConsumer, String newConsumer) {
String topic = partition.topic();
if (!partitionMovementsByTopic.containsKey(topic))
return partition;
if (partitionMovements.containsKey(partition)) {
// this partition has previously moved
assert oldConsumer.equals(partitionMovements.get(partition).dstMemberId);
oldConsumer = partitionMovements.get(partition).srcMemberId;
}
Map> partitionMovementsForThisTopic = partitionMovementsByTopic.get(topic);
ConsumerPair reversePair = new ConsumerPair(newConsumer, oldConsumer);
if (!partitionMovementsForThisTopic.containsKey(reversePair))
return partition;
return partitionMovementsForThisTopic.get(reversePair).iterator().next();
}
private boolean isLinked(String src, String dst, Set pairs, List currentPath) {
if (src.equals(dst))
return false;
if (pairs.isEmpty())
return false;
if (new ConsumerPair(src, dst).in(pairs)) {
currentPath.add(src);
currentPath.add(dst);
return true;
}
for (ConsumerPair pair: pairs)
if (pair.srcMemberId.equals(src)) {
Set reducedSet = new HashSet<>(pairs);
reducedSet.remove(pair);
currentPath.add(pair.srcMemberId);
return isLinked(pair.dstMemberId, dst, reducedSet, currentPath);
}
return false;
}
private boolean in(List cycle, Set> cycles) {
List superCycle = new ArrayList<>(cycle);
superCycle.remove(superCycle.size() - 1);
superCycle.addAll(cycle);
for (List foundCycle: cycles) {
if (foundCycle.size() == cycle.size() && Collections.indexOfSubList(superCycle, foundCycle) != -1)
return true;
}
return false;
}
private boolean hasCycles(Set pairs) {
Set> cycles = new HashSet<>();
for (ConsumerPair pair: pairs) {
Set reducedPairs = new HashSet<>(pairs);
reducedPairs.remove(pair);
List path = new ArrayList<>(Collections.singleton(pair.srcMemberId));
if (isLinked(pair.dstMemberId, pair.srcMemberId, reducedPairs, path) && !in(path, cycles)) {
cycles.add(new ArrayList<>(path));
log.error("A cycle of length {} was found: {}", path.size() - 1, path.toString());
}
}
// for now we want to make sure there is no partition movements of the same topic between a pair of consumers.
// the odds of finding a cycle among more than two consumers seem to be very low (according to various randomized
// tests with the given sticky algorithm) that it should not worth the added complexity of handling those cases.
for (List cycle: cycles)
if (cycle.size() == 3) // indicates a cycle of length 2
return true;
return false;
}
private boolean isSticky() {
for (Map.Entry>> topicMovements: this.partitionMovementsByTopic.entrySet()) {
Set topicMovementPairs = topicMovements.getValue().keySet();
if (hasCycles(topicMovementPairs)) {
log.error("Stickiness is violated for topic {}"
+ "\nPartition movements for this topic occurred among the following consumer pairs:"
+ "\n{}", topicMovements.getKey(), topicMovements.getValue().toString());
return false;
}
}
return true;
}
}
/**
* ConsumerPair
represents a pair of Kafka consumer ids involved in a partition reassignment. Each
* ConsumerPair
object, which contains a source (src
) and a destination (dst
)
* element, normally corresponds to a particular partition or topic, and indicates that the particular partition or some
* partition of the particular topic was moved from the source consumer to the destination consumer during the rebalance.
* This class is used, through the PartitionMovements
class, by the sticky assignor and helps in determining
* whether a partition reassignment results in cycles among the generated graph of consumer pairs.
*/
private static class ConsumerPair {
private final String srcMemberId;
private final String dstMemberId;
ConsumerPair(String srcMemberId, String dstMemberId) {
this.srcMemberId = srcMemberId;
this.dstMemberId = dstMemberId;
}
public String toString() {
return this.srcMemberId + "->" + this.dstMemberId;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((this.srcMemberId == null) ? 0 : this.srcMemberId.hashCode());
result = prime * result + ((this.dstMemberId == null) ? 0 : this.dstMemberId.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (obj == null)
return false;
if (!getClass().isInstance(obj))
return false;
ConsumerPair otherPair = (ConsumerPair) obj;
return this.srcMemberId.equals(otherPair.srcMemberId) && this.dstMemberId.equals(otherPair.dstMemberId);
}
private boolean in(Set pairs) {
for (ConsumerPair pair: pairs)
if (this.equals(pair))
return true;
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy