All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pl.allegro.tech.hermes.consumers.consumer.offset.OffsetCommitter Maven / Gradle / Ivy

package pl.allegro.tech.hermes.consumers.consumer.offset;

import com.codahale.metrics.Timer;
import com.google.common.collect.Sets;
import org.jctools.queues.MessagePassingQueue;
import org.jctools.queues.MpscArrayQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.allegro.tech.hermes.api.SubscriptionName;
import pl.allegro.tech.hermes.common.metric.HermesMetrics;
import pl.allegro.tech.hermes.consumers.consumer.receiver.MessageCommitter;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.function.Function;

/**
 * Note on algorithm used to calculate offsets to actually commit.
 * 

* The idea behind this algorithm is that we would like to commit: * * maximal offset marked as committed * * but not larger than smallest inflight offset (smallest inflight - 1) *

* Important note! This class is Kafka OffsetCommiter, and so it perceives offsets in Kafka way. Most importantly * committed offset marks message that is read as first on Consumer restart (offset is inclusive for reading and * exclusive for writing). *

* There are two queues which are used by Consumers to report message state: * * inflightOffsets: message offsets that are currently being sent (inflight) * * committedOffsets: message offsets that are ready to get committed *

* This committer class holds internal state in form of inflightOffsets and failedToCommitOffsets set. * inlfightOffsets are all offsets that are currently in inflight state. * failedToCommitOffsets are offsets that could not be committed in previous algorithm iteration *

* In scheduled periods, commit algorithm is run. It has three phases. First one is draining the queues and performing * reductions: * * drain committedOffsets queue to collection - it needs to be done before draining inflights, so this collection * will not grow anymore, resulting in having inflights unmatched by commits; commits are incremented by 1 to match * Kafka commit definition * * add all previously uncommitted offsets from failedToCommitOffsets collection to committedOffsets and clear * failedToCommitOffsets collection * * drain inflightOffset *

* Second phase is calculating the offsets: *

* * calculate maximal committed offset for each subscription & partition * * calculate minimal inflight offset for each subscription & partition *

* Third phase is choosing which offset to commit for each subscription/partition. This is the minimal value of * * maximum committed offset * * minimum inflight offset *

* This algorithm is very simple, memory efficient, can be performed in single thread and introduces no locks. */ public class OffsetCommitter implements Runnable { private static final Logger logger = LoggerFactory.getLogger(OffsetCommitter.class); private final ScheduledExecutorService scheduledExecutor = Executors.newSingleThreadScheduledExecutor(); private final int offsetCommitPeriodSeconds; private final OffsetQueue offsetQueue; private final List messageCommitters; private final HermesMetrics metrics; private final Set inflightOffsets = new HashSet<>(); private final Set failedToCommitOffsets = new HashSet<>(); private final MpscArrayQueue subscriptionsToCleanup = new MpscArrayQueue<>(1000); public OffsetCommitter( OffsetQueue offsetQueue, List messageCommitters, int offsetCommitPeriodSeconds, HermesMetrics metrics ) { this.offsetQueue = offsetQueue; this.messageCommitters = messageCommitters; this.offsetCommitPeriodSeconds = offsetCommitPeriodSeconds; this.metrics = metrics; } @Override public void run() { try (Timer.Context c = metrics.timer("offset-committer.duration").time()) { // committed offsets need to be drained first so that there is no possibility of new committed offsets // showing up after inflight queue is drained - this would lead to stall in committing offsets ReducingConsumer committedOffsetsReducer = processCommittedOffsets(); Map maxCommittedOffsets = committedOffsetsReducer.reduced; ReducingConsumer inflightOffsetReducer = processInflightOffsets(committedOffsetsReducer.all); Map minInflightOffsets = inflightOffsetReducer.reduced; int scheduledToCommit = 0; OffsetsToCommit offsetsToCommit = new OffsetsToCommit(); for (SubscriptionPartition partition : Sets.union(minInflightOffsets.keySet(), maxCommittedOffsets.keySet())) { long offset = Math.min( minInflightOffsets.getOrDefault(partition, Long.MAX_VALUE), maxCommittedOffsets.getOrDefault(partition, Long.MAX_VALUE) ); if (offset >= 0 && offset < Long.MAX_VALUE) { scheduledToCommit++; offsetsToCommit.add(new SubscriptionPartitionOffset(partition, offset)); } } commit(offsetsToCommit); metrics.counter("offset-committer.committed").inc(scheduledToCommit - failedToCommitOffsets.size()); metrics.counter("offset-committer.failed").inc(failedToCommitOffsets.size()); cleanupUnusedSubscriptions(); } catch (Exception exception) { logger.error("Failed to run offset committer: {}", exception.getMessage(), exception); } } private ReducingConsumer processCommittedOffsets() { ReducingConsumer committedOffsetsReducer = new ReducingConsumer(Math::max, c -> c + 1); offsetQueue.drainCommittedOffsets(committedOffsetsReducer); committedOffsetsReducer.resetModifierFunction(); failedToCommitOffsets.forEach(committedOffsetsReducer::accept); failedToCommitOffsets.clear(); return committedOffsetsReducer; } private ReducingConsumer processInflightOffsets(Set committedOffsets) { ReducingConsumer inflightOffsetReducer = new ReducingConsumer(Math::min); offsetQueue.drainInflightOffsets(o -> reduceIfNotCommitted(o, inflightOffsetReducer, committedOffsets)); inflightOffsets.forEach(o -> reduceIfNotCommitted(o, inflightOffsetReducer, committedOffsets)); inflightOffsets.clear(); inflightOffsets.addAll(inflightOffsetReducer.all); return inflightOffsetReducer; } private void reduceIfNotCommitted(SubscriptionPartitionOffset offset, ReducingConsumer inflightOffsetReducer, Set committedOffsets) { if (!committedOffsets.contains(offset)) { inflightOffsetReducer.accept(offset); } } private void commit(OffsetsToCommit offsetsToCommit) { for (MessageCommitter committer : messageCommitters) { FailedToCommitOffsets failedOffsets = committer.commitOffsets(offsetsToCommit); if (failedOffsets.hasFailed()) { failedToCommitOffsets.addAll(failedOffsets.failedOffsets()); } } } public void removeUncommittedOffsets(SubscriptionName subscriptionName) { subscriptionsToCleanup.offer(subscriptionName); } private void cleanupUnusedSubscriptions() { Set subscriptionNames = new HashSet<>(); subscriptionsToCleanup.drain(subscriptionNames::add); for (Iterator iterator = inflightOffsets.iterator(); iterator.hasNext(); ) { if (subscriptionNames.contains(iterator.next().getSubscriptionName())) { iterator.remove(); } } } public void start() { scheduledExecutor.scheduleWithFixedDelay(this, offsetCommitPeriodSeconds, offsetCommitPeriodSeconds, TimeUnit.SECONDS ); } public void shutdown() { scheduledExecutor.shutdown(); } private static final class ReducingConsumer implements MessagePassingQueue.Consumer { private final BiFunction reductor; private Function modifier; private final Map reduced = new HashMap<>(); private final Set all = new HashSet<>(); private ReducingConsumer(BiFunction reductor, Function offsetModifier) { this.reductor = reductor; this.modifier = offsetModifier; } private ReducingConsumer(BiFunction reductor) { this(reductor, Function.identity()); } private void resetModifierFunction() { this.modifier = Function.identity(); } @Override public void accept(SubscriptionPartitionOffset p) { all.add(p); reduced.compute( p.getSubscriptionPartition(), (k, v) -> { long offset = modifier.apply(p.getOffset()); return v == null ? offset : reductor.apply(v, offset); } ); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy