Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.processor.internals;
import org.apache.kafka.clients.admin.Admin;
import org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo;
import org.apache.kafka.clients.consumer.ConsumerGroupMetadata;
import org.apache.kafka.clients.consumer.ConsumerPartitionAssignor;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.Configurable;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.KafkaFuture;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.TimeoutException;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.errors.MissingSourceTopicException;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.errors.TaskAssignmentException;
import org.apache.kafka.streams.processor.TaskId;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo;
import org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo;
import org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration;
import org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentConfigs;
import org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentListener;
import org.apache.kafka.streams.processor.internals.assignment.AssignorError;
import org.apache.kafka.streams.processor.internals.assignment.ClientState;
import org.apache.kafka.streams.processor.internals.assignment.CopartitionedTopicsEnforcer;
import org.apache.kafka.streams.processor.internals.assignment.FallbackPriorTaskAssignor;
import org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor;
import org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo;
import org.apache.kafka.streams.processor.internals.assignment.TaskAssignor;
import org.apache.kafka.streams.state.HostInfo;
import org.slf4j.Logger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static java.util.Comparator.comparingLong;
import static java.util.UUID.randomUUID;
import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchCommittedOffsets;
import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsetsFuture;
import static org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.EARLIEST_PROBEABLE_VERSION;
import static org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.LATEST_SUPPORTED_VERSION;
import static org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.UNKNOWN;
import static org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo.UNKNOWN_OFFSET_SUM;
public class StreamsPartitionAssignor implements ConsumerPartitionAssignor, Configurable {
private Logger log;
private String logPrefix;
private static class AssignedPartition implements Comparable {
private final TaskId taskId;
private final TopicPartition partition;
AssignedPartition(final TaskId taskId, final TopicPartition partition) {
this.taskId = taskId;
this.partition = partition;
}
@Override
public int compareTo(final AssignedPartition that) {
return PARTITION_COMPARATOR.compare(partition, that.partition);
}
@Override
public boolean equals(final Object o) {
if (!(o instanceof AssignedPartition)) {
return false;
}
final AssignedPartition other = (AssignedPartition) o;
return compareTo(other) == 0;
}
@Override
public int hashCode() {
// Only partition is important for compareTo, equals and hashCode.
return partition.hashCode();
}
}
private static class ClientMetadata {
private final HostInfo hostInfo;
private final ClientState state;
private final SortedSet consumers;
ClientMetadata(final String endPoint) {
// get the host info, or null if no endpoint is configured (ie endPoint == null)
hostInfo = HostInfo.buildFromEndpoint(endPoint);
// initialize the consumer memberIds
consumers = new TreeSet<>();
// initialize the client state
state = new ClientState();
}
void addConsumer(final String consumerMemberId, final List ownedPartitions) {
consumers.add(consumerMemberId);
state.incrementCapacity();
state.addOwnedPartitions(ownedPartitions, consumerMemberId);
}
void addPreviousTasksAndOffsetSums(final String consumerId, final Map taskOffsetSums) {
state.addPreviousTasksAndOffsetSums(consumerId, taskOffsetSums);
}
@Override
public String toString() {
return "ClientMetadata{" +
"hostInfo=" + hostInfo +
", consumers=" + consumers +
", state=" + state +
'}';
}
}
// keep track of any future consumers in a "dummy" Client since we can't decipher their subscription
private static final UUID FUTURE_ID = randomUUID();
protected static final Comparator PARTITION_COMPARATOR =
Comparator.comparing(TopicPartition::topic).thenComparingInt(TopicPartition::partition);
private String userEndPoint;
private AssignmentConfigs assignmentConfigs;
private TaskManager taskManager;
private StreamsMetadataState streamsMetadataState;
@SuppressWarnings("deprecation")
private org.apache.kafka.streams.processor.PartitionGrouper partitionGrouper;
private AtomicInteger assignmentErrorCode;
private AtomicLong nextScheduledRebalanceMs;
private Time time;
protected int usedSubscriptionMetadataVersion = LATEST_SUPPORTED_VERSION;
private Admin adminClient;
private InternalTopicManager internalTopicManager;
private CopartitionedTopicsEnforcer copartitionedTopicsEnforcer;
private RebalanceProtocol rebalanceProtocol;
private AssignmentListener assignmentListener;
private Supplier taskAssignorSupplier;
private byte uniqueField;
/**
* We need to have the PartitionAssignor and its StreamThread to be mutually accessible since the former needs
* later's cached metadata while sending subscriptions, and the latter needs former's returned assignment when
* adding tasks.
*
* @throws KafkaException if the stream thread is not specified
*/
@Override
public void configure(final Map configs) {
final AssignorConfiguration assignorConfiguration = new AssignorConfiguration(configs);
logPrefix = assignorConfiguration.logPrefix();
log = new LogContext(logPrefix).logger(getClass());
usedSubscriptionMetadataVersion = assignorConfiguration
.configuredMetadataVersion(usedSubscriptionMetadataVersion);
taskManager = assignorConfiguration.taskManager();
streamsMetadataState = assignorConfiguration.streamsMetadataState();
assignmentErrorCode = assignorConfiguration.assignmentErrorCode();
nextScheduledRebalanceMs = assignorConfiguration.nextScheduledRebalanceMs();
time = assignorConfiguration.time();
assignmentConfigs = assignorConfiguration.assignmentConfigs();
partitionGrouper = assignorConfiguration.partitionGrouper();
userEndPoint = assignorConfiguration.userEndPoint();
adminClient = assignorConfiguration.adminClient();
internalTopicManager = assignorConfiguration.internalTopicManager();
copartitionedTopicsEnforcer = assignorConfiguration.copartitionedTopicsEnforcer();
rebalanceProtocol = assignorConfiguration.rebalanceProtocol();
taskAssignorSupplier = assignorConfiguration::taskAssignor;
assignmentListener = assignorConfiguration.assignmentListener();
uniqueField = 0;
}
@Override
public String name() {
return "stream";
}
@Override
public List supportedProtocols() {
final List supportedProtocols = new ArrayList<>();
supportedProtocols.add(RebalanceProtocol.EAGER);
if (rebalanceProtocol == RebalanceProtocol.COOPERATIVE) {
supportedProtocols.add(rebalanceProtocol);
}
return supportedProtocols;
}
@Override
public ByteBuffer subscriptionUserData(final Set topics) {
// Adds the following information to subscription
// 1. Client UUID (a unique id assigned to an instance of KafkaStreams)
// 2. Map from task id to its overall lag
// 3. Unique Field to ensure a rebalance when a thread rejoins by forcing the user data to be different
handleRebalanceStart(topics);
uniqueField++;
return new SubscriptionInfo(
usedSubscriptionMetadataVersion,
LATEST_SUPPORTED_VERSION,
taskManager.processId(),
userEndPoint,
taskManager.getTaskOffsetSums(),
uniqueField)
.encode();
}
private Map errorAssignment(final Map clientsMetadata,
final int errorCode) {
final Map assignment = new HashMap<>();
for (final ClientMetadata clientMetadata : clientsMetadata.values()) {
for (final String consumerId : clientMetadata.consumers) {
assignment.put(consumerId, new Assignment(
Collections.emptyList(),
new AssignmentInfo(LATEST_SUPPORTED_VERSION,
Collections.emptyList(),
Collections.emptyMap(),
Collections.emptyMap(),
Collections.emptyMap(),
errorCode).encode()
));
}
}
return assignment;
}
/*
* This assigns tasks to consumer clients in the following steps.
*
* 0. decode the subscriptions to assemble the metadata for each client and check for version probing
*
* 1. check all repartition source topics and use internal topic manager to make sure
* they have been created with the right number of partitions. Also verify and/or create
* any changelog topics with the correct number of partitions.
*
* 2. use the partition grouper to generate tasks along with their assigned partitions, then use
* the configured TaskAssignor to construct the mapping of tasks to clients.
*
* 3. construct the global mapping of host to partitions to enable query routing.
*
* 4. within each client, assign tasks to consumer clients.
*/
@Override
public GroupAssignment assign(final Cluster metadata, final GroupSubscription groupSubscription) {
final Map subscriptions = groupSubscription.groupSubscription();
// ---------------- Step Zero ---------------- //
// construct the client metadata from the decoded subscription info
final Map clientMetadataMap = new HashMap<>();
final Set allOwnedPartitions = new HashSet<>();
int minReceivedMetadataVersion = LATEST_SUPPORTED_VERSION;
int minSupportedMetadataVersion = LATEST_SUPPORTED_VERSION;
int futureMetadataVersion = UNKNOWN;
for (final Map.Entry entry : subscriptions.entrySet()) {
final String consumerId = entry.getKey();
final Subscription subscription = entry.getValue();
final SubscriptionInfo info = SubscriptionInfo.decode(subscription.userData());
final int usedVersion = info.version();
minReceivedMetadataVersion = updateMinReceivedVersion(usedVersion, minReceivedMetadataVersion);
minSupportedMetadataVersion = updateMinSupportedVersion(info.latestSupportedVersion(), minSupportedMetadataVersion);
final UUID processId;
if (usedVersion > LATEST_SUPPORTED_VERSION) {
futureMetadataVersion = usedVersion;
processId = FUTURE_ID;
if (!clientMetadataMap.containsKey(FUTURE_ID)) {
clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null));
}
} else {
processId = info.processId();
}
ClientMetadata clientMetadata = clientMetadataMap.get(processId);
// create the new client metadata if necessary
if (clientMetadata == null) {
clientMetadata = new ClientMetadata(info.userEndPoint());
clientMetadataMap.put(info.processId(), clientMetadata);
}
// add the consumer and any info in its subscription to the client
clientMetadata.addConsumer(consumerId, subscription.ownedPartitions());
allOwnedPartitions.addAll(subscription.ownedPartitions());
clientMetadata.addPreviousTasksAndOffsetSums(consumerId, info.taskOffsetSums());
}
try {
final boolean versionProbing =
checkMetadataVersions(minReceivedMetadataVersion, minSupportedMetadataVersion, futureMetadataVersion);
log.debug("Constructed client metadata {} from the member subscriptions.", clientMetadataMap);
// ---------------- Step One ---------------- //
// parse the topology to determine the repartition source topics,
// making sure they are created with the number of partitions as
// the maximum of the depending sub-topologies source topics' number of partitions
final Map topicGroups = taskManager.builder().topicGroups();
final Map allRepartitionTopicPartitions = prepareRepartitionTopics(topicGroups, metadata);
final Cluster fullMetadata = metadata.withPartitions(allRepartitionTopicPartitions);
log.debug("Created repartition topics {} from the parsed topology.", allRepartitionTopicPartitions.values());
// ---------------- Step Two ---------------- //
// construct the assignment of tasks to clients
final Set allSourceTopics = new HashSet<>();
final Map> sourceTopicsByGroup = new HashMap<>();
for (final Map.Entry entry : topicGroups.entrySet()) {
allSourceTopics.addAll(entry.getValue().sourceTopics);
sourceTopicsByGroup.put(entry.getKey(), entry.getValue().sourceTopics);
}
// get the tasks as partition groups from the partition grouper
final Map> partitionsForTask =
partitionGrouper.partitionGroups(sourceTopicsByGroup, fullMetadata);
final Set statefulTasks = new HashSet<>();
final boolean probingRebalanceNeeded = assignTasksToClients(fullMetadata, allSourceTopics, topicGroups, clientMetadataMap, partitionsForTask, statefulTasks);
// ---------------- Step Three ---------------- //
// construct the global partition assignment per host map
final Map> partitionsByHost = new HashMap<>();
final Map> standbyPartitionsByHost = new HashMap<>();
if (minReceivedMetadataVersion >= 2) {
populatePartitionsByHostMaps(partitionsByHost, standbyPartitionsByHost, partitionsForTask, clientMetadataMap);
}
streamsMetadataState.onChange(partitionsByHost, standbyPartitionsByHost, fullMetadata);
// ---------------- Step Four ---------------- //
// compute the assignment of tasks to threads within each client and build the final group assignment
final Map assignment = computeNewAssignment(
statefulTasks,
clientMetadataMap,
partitionsForTask,
partitionsByHost,
standbyPartitionsByHost,
allOwnedPartitions,
minReceivedMetadataVersion,
minSupportedMetadataVersion,
versionProbing,
probingRebalanceNeeded
);
return new GroupAssignment(assignment);
} catch (final MissingSourceTopicException e) {
return new GroupAssignment(
errorAssignment(clientMetadataMap, AssignorError.INCOMPLETE_SOURCE_TOPIC_METADATA.code())
);
} catch (final TaskAssignmentException e) {
return new GroupAssignment(
errorAssignment(clientMetadataMap, AssignorError.ASSIGNMENT_ERROR.code())
);
}
}
/**
* Verify the subscription versions are within the expected bounds and check for version probing.
*
* @return whether this was a version probing rebalance
*/
private boolean checkMetadataVersions(final int minReceivedMetadataVersion,
final int minSupportedMetadataVersion,
final int futureMetadataVersion) {
final boolean versionProbing;
if (futureMetadataVersion == UNKNOWN) {
versionProbing = false;
} else if (minReceivedMetadataVersion >= EARLIEST_PROBEABLE_VERSION) {
versionProbing = true;
log.info("Received a future (version probing) subscription (version: {})."
+ " Sending assignment back (with supported version {}).",
futureMetadataVersion,
minSupportedMetadataVersion);
} else {
throw new TaskAssignmentException(
"Received a future (version probing) subscription (version: " + futureMetadataVersion
+ ") and an incompatible pre Kafka 2.0 subscription (version: " + minReceivedMetadataVersion
+ ") at the same time."
);
}
if (minReceivedMetadataVersion < LATEST_SUPPORTED_VERSION) {
log.info("Downgrade metadata to version {}. Latest supported version is {}.",
minReceivedMetadataVersion,
LATEST_SUPPORTED_VERSION);
}
if (minSupportedMetadataVersion < LATEST_SUPPORTED_VERSION) {
log.info("Downgrade latest supported metadata to version {}. Latest supported version is {}.",
minSupportedMetadataVersion,
LATEST_SUPPORTED_VERSION);
}
return versionProbing;
}
/**
* @return a map of repartition topics and their metadata
*/
private Map computeRepartitionTopicMetadata(final Map topicGroups,
final Cluster metadata) {
final Map repartitionTopicMetadata = new HashMap<>();
for (final TopicsInfo topicsInfo : topicGroups.values()) {
for (final String topic : topicsInfo.sourceTopics) {
if (!topicsInfo.repartitionSourceTopics.containsKey(topic) && !metadata.topics().contains(topic)) {
log.error("Source topic {} is missing/unknown during rebalance, please make sure all source topics " +
"have been pre-created before starting the Streams application. Returning error {}",
topic, AssignorError.INCOMPLETE_SOURCE_TOPIC_METADATA.name());
throw new MissingSourceTopicException("Missing source topic during assignment.");
}
}
for (final InternalTopicConfig topic : topicsInfo.repartitionSourceTopics.values()) {
repartitionTopicMetadata.put(topic.name(), topic);
}
}
return repartitionTopicMetadata;
}
/**
* Computes and assembles all repartition topic metadata then creates the topics if necessary.
*
* @return map from repartition topic to its partition info
*/
private Map prepareRepartitionTopics(final Map topicGroups,
final Cluster metadata) {
final Map repartitionTopicMetadata = computeRepartitionTopicMetadata(topicGroups, metadata);
setRepartitionTopicMetadataNumberOfPartitions(repartitionTopicMetadata, topicGroups, metadata);
// ensure the co-partitioning topics within the group have the same number of partitions,
// and enforce the number of partitions for those repartition topics to be the same if they
// are co-partitioned as well.
ensureCopartitioning(taskManager.builder().copartitionGroups(), repartitionTopicMetadata, metadata);
// make sure the repartition source topics exist with the right number of partitions,
// create these topics if necessary
internalTopicManager.makeReady(repartitionTopicMetadata);
// augment the metadata with the newly computed number of partitions for all the
// repartition source topics
final Map allRepartitionTopicPartitions = new HashMap<>();
for (final Map.Entry entry : repartitionTopicMetadata.entrySet()) {
final String topic = entry.getKey();
final int numPartitions = entry.getValue().numberOfPartitions().orElse(-1);
for (int partition = 0; partition < numPartitions; partition++) {
allRepartitionTopicPartitions.put(
new TopicPartition(topic, partition),
new PartitionInfo(topic, partition, null, new Node[0], new Node[0])
);
}
}
return allRepartitionTopicPartitions;
}
/**
* Computes the number of partitions and sets it for each repartition topic in repartitionTopicMetadata
*/
private void setRepartitionTopicMetadataNumberOfPartitions(final Map repartitionTopicMetadata,
final Map topicGroups,
final Cluster metadata) {
boolean numPartitionsNeeded;
do {
numPartitionsNeeded = false;
boolean progressMadeThisIteration = false; // avoid infinitely looping without making any progress on unknown repartitions
for (final TopicsInfo topicsInfo : topicGroups.values()) {
for (final String repartitionSourceTopic : topicsInfo.repartitionSourceTopics.keySet()) {
final Optional maybeNumPartitions = repartitionTopicMetadata.get(repartitionSourceTopic)
.numberOfPartitions();
Integer numPartitions = null;
if (!maybeNumPartitions.isPresent()) {
// try set the number of partitions for this repartition topic if it is not set yet
for (final TopicsInfo otherTopicsInfo : topicGroups.values()) {
final Set otherSinkTopics = otherTopicsInfo.sinkTopics;
if (otherSinkTopics.contains(repartitionSourceTopic)) {
// if this topic is one of the sink topics of this topology,
// use the maximum of all its source topic partitions as the number of partitions
for (final String upstreamSourceTopic : otherTopicsInfo.sourceTopics) {
Integer numPartitionsCandidate = null;
// It is possible the sourceTopic is another internal topic, i.e,
// map().join().join(map())
if (repartitionTopicMetadata.containsKey(upstreamSourceTopic)) {
if (repartitionTopicMetadata.get(upstreamSourceTopic).numberOfPartitions().isPresent()) {
numPartitionsCandidate =
repartitionTopicMetadata.get(upstreamSourceTopic).numberOfPartitions().get();
}
} else {
final Integer count = metadata.partitionCountForTopic(upstreamSourceTopic);
if (count == null) {
throw new TaskAssignmentException(
"No partition count found for source topic "
+ upstreamSourceTopic
+ ", but it should have been."
);
}
numPartitionsCandidate = count;
}
if (numPartitionsCandidate != null) {
if (numPartitions == null || numPartitionsCandidate > numPartitions) {
numPartitions = numPartitionsCandidate;
}
}
}
}
}
if (numPartitions == null) {
numPartitionsNeeded = true;
log.trace("Unable to determine number of partitions for {}, another iteration is needed",
repartitionSourceTopic);
} else {
repartitionTopicMetadata.get(repartitionSourceTopic).setNumberOfPartitions(numPartitions);
progressMadeThisIteration = true;
}
}
}
}
if (!progressMadeThisIteration && numPartitionsNeeded) {
throw new TaskAssignmentException("Failed to compute number of partitions for all repartition topics");
}
} while (numPartitionsNeeded);
}
/**
* Populates the taskForPartition and tasksForTopicGroup maps, and checks that partitions are assigned to exactly
* one task.
*
* @param taskForPartition a map from partition to the corresponding task. Populated here.
* @param tasksForTopicGroup a map from the topicGroupId to the set of corresponding tasks. Populated here.
* @param allSourceTopics a set of all source topics in the topology
* @param partitionsForTask a map from task to the set of input partitions
* @param fullMetadata the cluster metadata
*/
private void populateTasksForMaps(final Map taskForPartition,
final Map> tasksForTopicGroup,
final Set allSourceTopics,
final Map> partitionsForTask,
final Cluster fullMetadata) {
// check if all partitions are assigned, and there are no duplicates of partitions in multiple tasks
final Set allAssignedPartitions = new HashSet<>();
for (final Map.Entry> entry : partitionsForTask.entrySet()) {
final TaskId id = entry.getKey();
final Set partitions = entry.getValue();
for (final TopicPartition partition : partitions) {
taskForPartition.put(partition, id);
if (allAssignedPartitions.contains(partition)) {
log.warn("Partition {} is assigned to more than one tasks: {}", partition, partitionsForTask);
}
}
allAssignedPartitions.addAll(partitions);
tasksForTopicGroup.computeIfAbsent(id.topicGroupId, k -> new HashSet<>()).add(id);
}
checkAllPartitions(allSourceTopics, partitionsForTask, allAssignedPartitions, fullMetadata);
}
// Logs a warning if any partitions are not assigned to a task, or a task has no assigned partitions
private void checkAllPartitions(final Set allSourceTopics,
final Map> partitionsForTask,
final Set allAssignedPartitions,
final Cluster fullMetadata) {
for (final String topic : allSourceTopics) {
final List partitionInfoList = fullMetadata.partitionsForTopic(topic);
if (partitionInfoList.isEmpty()) {
log.warn("No partitions found for topic {}", topic);
} else {
for (final PartitionInfo partitionInfo : partitionInfoList) {
final TopicPartition partition = new TopicPartition(partitionInfo.topic(),
partitionInfo.partition());
if (!allAssignedPartitions.contains(partition)) {
log.warn("Partition {} is not assigned to any tasks: {}"
+ " Possible causes of a partition not getting assigned"
+ " is that another topic defined in the topology has not been"
+ " created when starting your streams application,"
+ " resulting in no tasks created for this topology at all.", partition,
partitionsForTask);
}
}
}
}
}
/**
* Resolve changelog topic metadata and create them if necessary. Fills in the changelogsByStatefulTask map and
* the optimizedSourceChangelogs set and returns the set of changelogs which were newly created.
*/
private Set prepareChangelogTopics(final Map topicGroups,
final Map> tasksForTopicGroup,
final Map> changelogsByStatefulTask,
final Set optimizedSourceChangelogs) {
// add tasks to state change log topic subscribers
final Map changelogTopicMetadata = new HashMap<>();
for (final Map.Entry entry : topicGroups.entrySet()) {
final int topicGroupId = entry.getKey();
final TopicsInfo topicsInfo = entry.getValue();
final Set topicGroupTasks = tasksForTopicGroup.get(topicGroupId);
if (topicGroupTasks == null) {
log.debug("No tasks found for topic group {}", topicGroupId);
continue;
} else if (topicsInfo.stateChangelogTopics.isEmpty()) {
continue;
}
for (final TaskId task : topicGroupTasks) {
changelogsByStatefulTask.put(
task,
topicsInfo.stateChangelogTopics
.keySet()
.stream()
.map(topic -> new TopicPartition(topic, task.partition))
.collect(Collectors.toSet()));
}
for (final InternalTopicConfig topicConfig : topicsInfo.nonSourceChangelogTopics()) {
// the expected number of partitions is the max value of TaskId.partition + 1
int numPartitions = UNKNOWN;
for (final TaskId task : topicGroupTasks) {
if (numPartitions < task.partition + 1) {
numPartitions = task.partition + 1;
}
}
topicConfig.setNumberOfPartitions(numPartitions);
changelogTopicMetadata.put(topicConfig.name(), topicConfig);
}
optimizedSourceChangelogs.addAll(topicsInfo.sourceTopicChangelogs());
}
final Set newlyCreatedTopics = internalTopicManager.makeReady(changelogTopicMetadata);
log.debug("Created state changelog topics {} from the parsed topology.", changelogTopicMetadata.values());
return newlyCreatedTopics;
}
/**
* Assigns a set of tasks to each client (Streams instance) using the configured task assignor, and also
* populate the stateful tasks that have been assigned to the clients
* @return true if a probing rebalance should be triggered
*/
private boolean assignTasksToClients(final Cluster fullMetadata,
final Set allSourceTopics,
final Map topicGroups,
final Map clientMetadataMap,
final Map> partitionsForTask,
final Set statefulTasks) {
if (!statefulTasks.isEmpty()) {
throw new TaskAssignmentException("The stateful tasks should not be populated before assigning tasks to clients");
}
final Map taskForPartition = new HashMap<>();
final Map> tasksForTopicGroup = new HashMap<>();
populateTasksForMaps(taskForPartition, tasksForTopicGroup, allSourceTopics, partitionsForTask, fullMetadata);
final Map> changelogsByStatefulTask = new HashMap<>();
final Set optimizedSourceChangelogs = new HashSet<>();
final Set newlyCreatedChangelogs =
prepareChangelogTopics(topicGroups, tasksForTopicGroup, changelogsByStatefulTask, optimizedSourceChangelogs);
final Map clientStates = new HashMap<>();
final boolean lagComputationSuccessful =
populateClientStatesMap(clientStates,
clientMetadataMap,
taskForPartition,
changelogsByStatefulTask,
newlyCreatedChangelogs,
optimizedSourceChangelogs
);
final Set allTasks = partitionsForTask.keySet();
statefulTasks.addAll(changelogsByStatefulTask.keySet());
log.debug("Assigning tasks {} to clients {} with number of replicas {}",
allTasks, clientStates, numStandbyReplicas());
final TaskAssignor taskAssignor = createTaskAssignor(lagComputationSuccessful);
final boolean probingRebalanceNeeded = taskAssignor.assign(clientStates,
allTasks,
statefulTasks,
assignmentConfigs);
log.info("Assigned tasks {} including stateful {} to clients as: \n{}.",
allTasks, statefulTasks, clientStates.entrySet().stream()
.map(entry -> entry.getKey() + "=" + entry.getValue().currentAssignment())
.collect(Collectors.joining(Utils.NL)));
return probingRebalanceNeeded;
}
private TaskAssignor createTaskAssignor(final boolean lagComputationSuccessful) {
final TaskAssignor taskAssignor = taskAssignorSupplier.get();
if (taskAssignor instanceof StickyTaskAssignor) {
// special case: to preserve pre-existing behavior, we invoke the StickyTaskAssignor
// whether or not lag computation failed.
return taskAssignor;
} else if (lagComputationSuccessful) {
return taskAssignor;
} else {
log.info("Failed to fetch end offsets for changelogs, will return previous assignment to clients and "
+ "trigger another rebalance to retry.");
return new FallbackPriorTaskAssignor();
}
}
/**
* Builds a map from client to state, and readies each ClientState for assignment by adding any missing prev tasks
* and computing the per-task overall lag based on the fetched end offsets for each changelog.
*
* @param clientStates a map from each client to its state, including offset lags. Populated by this method.
* @param clientMetadataMap a map from each client to its full metadata
* @param taskForPartition map from topic partition to its corresponding task
* @param changelogsByStatefulTask map from each stateful task to its set of changelog topic partitions
*
* @return whether we were able to successfully fetch the changelog end offsets and compute each client's lag
*/
private boolean populateClientStatesMap(final Map clientStates,
final Map clientMetadataMap,
final Map taskForPartition,
final Map> changelogsByStatefulTask,
final Set newlyCreatedChangelogs,
final Set optimizedSourceChangelogs) {
boolean fetchEndOffsetsSuccessful;
Map allTaskEndOffsetSums;
try {
final Collection allChangelogPartitions =
changelogsByStatefulTask.values().stream()
.flatMap(Collection::stream)
.collect(Collectors.toList());
final Set preexistingChangelogPartitions = new HashSet<>();
final Set preexistingSourceChangelogPartitions = new HashSet<>();
final Set newlyCreatedChangelogPartitions = new HashSet<>();
for (final TopicPartition changelog : allChangelogPartitions) {
if (newlyCreatedChangelogs.contains(changelog.topic())) {
newlyCreatedChangelogPartitions.add(changelog);
} else if (optimizedSourceChangelogs.contains(changelog.topic())) {
preexistingSourceChangelogPartitions.add(changelog);
} else {
preexistingChangelogPartitions.add(changelog);
}
}
// Make the listOffsets request first so it can fetch the offsets for non-source changelogs
// asynchronously while we use the blocking Consumer#committed call to fetch source-changelog offsets
final KafkaFuture