All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dev.responsive.kafka.internal.db.partitioning.SubPartitioner Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 Responsive Computing, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package dev.responsive.kafka.internal.db.partitioning;

import static dev.responsive.kafka.api.config.ResponsiveConfig.CASSANDRA_DESIRED_NUM_PARTITION_CONFIG;
import static dev.responsive.kafka.api.config.ResponsiveConfig.SUBPARTITION_HASHER_CONFIG;

import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
import dev.responsive.kafka.api.config.ResponsiveConfig;
import java.util.List;
import java.util.OptionalInt;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.common.utils.Utils;

/**
 * {@code SubPartitioner} allows sub-partitioning a partition
 * space into additional partitions such that there is a 1:N
 * mapping of original partitions to new partitions, aka
 * "sub-partitions". If two events are in the same subpartition
 * they must have been in the same original kafka partition
 * (though the opposite is not true).
 * 

* In this context, the resulting new table partition in Cassandra * is referred to as the "sub-partition", whereas the kafka partition * may be referred to as such, or sometimes "k-partition" or the * "original partition" *

* The algorithm used simply maps each kafka partition to * {@code n} new subpartitions, beginning with {@code k-partition * n} * and ending with {@code k-partition * n + n} (exclusive). To * compute which new partition a key falls into, it applies a hash * function and mods by {@code n}, adding that value to the base * mapped partition. */ public class SubPartitioner implements TablePartitioner { /** * the number of subpartitions is: {@code kafka_partitions * n} */ private final int factor; private final Function hasher; public static SubPartitioner create( final OptionalInt actualRemoteCount, final int numKafkaPartitions, final String tableName, final ResponsiveConfig config, final String changelogTopicName ) { final int requestedNumSubPartitions = config.getInt(CASSANDRA_DESIRED_NUM_PARTITION_CONFIG); final int factor = (requestedNumSubPartitions == ResponsiveConfig.NO_SUBPARTITIONS) ? 1 : (int) Math.ceil((double) requestedNumSubPartitions / numKafkaPartitions); final int computedRemoteNum = factor * numKafkaPartitions; if (actualRemoteCount.isPresent() && actualRemoteCount.getAsInt() != computedRemoteNum) { throw new ConfigException( String.format( "%s was configured to %d, which " + "given %s partitions in kafka topic %s would result in %d remote partitions " + "for table %s (remote partitions must be a multiple of the kafka partitions). " + "The remote store is already initialized with %d partitions - it is backwards " + "incompatible to change this. Please set %s to %d.", CASSANDRA_DESIRED_NUM_PARTITION_CONFIG, requestedNumSubPartitions, numKafkaPartitions, changelogTopicName, computedRemoteNum, tableName, actualRemoteCount.getAsInt(), CASSANDRA_DESIRED_NUM_PARTITION_CONFIG, actualRemoteCount.getAsInt())); } final var hasher = config.getConfiguredInstance(SUBPARTITION_HASHER_CONFIG, Hasher.class); return new SubPartitioner(factor, hasher); } @VisibleForTesting public SubPartitioner(final int factor, final Function hasher) { this.factor = factor; this.hasher = hasher; } @Override public Integer tablePartition(final int kafkaPartition, final Bytes key) { return first(kafkaPartition) + Utils.toPositive(hasher.apply(key)) % factor; } @Override public Integer metadataTablePartition(final int kafkaPartition) { return first(kafkaPartition); } @Override public boolean belongs(final Bytes key, final int kafkaPartition) { throw new UnsupportedOperationException( "SubPartitioner relies on specific mapping of kafka partition to key. This " + "method should not be called as the assumption is that only keys " + "stored for the given partition will be returned. If this exception " + "is seen in production please file a ticket with the Responsive team."); } /** * @param kafkaPartition the original kafka partition * * @return all subpartitions that this partition could map to */ public List allTablePartitions(final int kafkaPartition) { return IntStream .range(first(kafkaPartition), first(kafkaPartition) + factor) .boxed() .collect(Collectors.toList()); } /** * @return the multiplication factor of this sub-partitioner */ public int getFactor() { return factor; } /** * @param kafkaPartition the original kafka partition\\ * @return the sub-partition at index 0 for this kafka partition */ private int first(final int kafkaPartition) { return kafkaPartition * factor; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy