All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.pincette.rs.kafka.KafkaPublisher Maven / Gradle / Ivy

package net.pincette.rs.kafka;

import static java.lang.Thread.sleep;
import static java.time.Duration.ofMillis;
import static java.time.Duration.ofSeconds;
import static java.util.Optional.ofNullable;
import static java.util.concurrent.CompletableFuture.completedFuture;
import static java.util.logging.Level.SEVERE;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
import static net.pincette.rs.kafka.Util.LOGGER;
import static net.pincette.rs.kafka.Util.trace;
import static net.pincette.util.Collections.consumeHead;
import static net.pincette.util.Pair.pair;
import static net.pincette.util.StreamUtil.stream;
import static net.pincette.util.Util.tryToDo;
import static net.pincette.util.Util.tryToDoSilent;
import static net.pincette.util.Util.tryToGetForever;

import java.time.Duration;
import java.util.Collection;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.Flow.Publisher;
import java.util.function.BiConsumer;
import java.util.function.Supplier;
import java.util.stream.Stream;
import net.pincette.function.SideEffect;
import net.pincette.util.State;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;

/**
 * This publisher exposes an actual reactive streams publisher for each topic it is asked to
 * consume. You give it a function to generate a Kafka consumer, which it will use to do the actual
 * consumption. The publisher manages offset commits itself, so you shouldn't use the auto-commit
 * feature of the Kafka consumer. This publisher supports "at least once" semantics, which implies
 * crashes may be followed by duplicate messages. Backpressure is managed by pausing topics when
 * needed. This will not cause a Kafka rebalance.
 *
 * @param  the key type.
 * @param  the value type.
 * @author Werner Donn\u00e9
 */
public class KafkaPublisher {
  private static final Duration POLL_TIMEOUT = ofMillis(100);
  private static final Duration RETRY = ofSeconds(5);
  private static final int WAIT_TIMEOUT = 3000;

  private final Set cancelled = new HashSet<>();
  private final Supplier> consumerSupplier;
  private final BiConsumer> eventHandler;
  private final Map pendingCommits = new ConcurrentHashMap<>();
  private final Set paused = new HashSet<>();
  private final Map> publishers;
  private final Deque> recordsToCommit = new ConcurrentLinkedDeque<>();
  private final Set topics;
  private final Duration throttleTime;
  private KafkaConsumer consumer;
  private boolean started;
  private boolean stop;

  /**
   * Creates an uninitialized publisher. Use the with methods to produce initialized
   * instances.
   */
  public KafkaPublisher() {
    this(null, null, null, null);
  }

  private KafkaPublisher(
      final Supplier> consumer,
      final Set topics,
      final BiConsumer> eventHandler,
      final Duration throttleTime) {
    this.consumerSupplier = consumer;
    this.topics = topics;
    this.eventHandler = eventHandler;
    this.throttleTime = throttleTime;
    publishers = createPublishers();
  }

  public static  KafkaPublisher publisher(
      final Supplier> consumer) {
    return new KafkaPublisher<>(consumer, null, null, null);
  }

  private static Set partitions(
      final String topic, final Collection partitions) {
    return partitions.stream().filter(p -> p.topic().equals(topic)).collect(toSet());
  }

  private boolean allTopicsCancelled() {
    return cancelled.equals(topics);
  }

  private Set assigned(final String topic) {
    return partitions(topic, consumer.assignment());
  }

  private void cancelTopic(final String topic) {
    cancelled.add(topic);

    if (allTopicsCancelled()) {
      stop = true;
    }
  }

  private void close(final KafkaConsumer consumer) {
    sendEvent(ConsumerEvent.STOPPED);
    LOGGER.finest(() -> "Closing consumer " + consumer);
    consumer.close();
  }

  private TopicPublisher createPublisher(final String topic) {
    return new TopicPublisher<>(topic, recordsToCommit::addLast, this::cancelTopic);
  }

  private Map> createPublishers() {
    return ofNullable(topics).stream()
        .flatMap(Set::stream)
        .collect(toMap(t -> t, this::createPublisher));
  }

  private void commit() {
    getForever(
        () ->
            Optional.of(offsets(consumeHead(recordsToCommit)))
                .filter(offsets -> !offsets.isEmpty())
                .map(
                    offsets ->
                        SideEffect.run(
                                () ->
                                    getConsumer()
                                        .ifPresent(
                                            c -> {
                                              c.commitSync(trace("Commit", offsets));
                                              removePendingCommits(offsets);
                                            }))
                            .andThenGet(() -> true))
                .orElse(false));
  }

  private void dispatch(final ConsumerRecords records) {
    publishers.forEach(
        (t, p) -> dispatch(t, p, stream(records.records(t).iterator()).collect(toList())));
  }

  private void dispatch(
      final String topic,
      final TopicPublisher publisher,
      final List> records) {
    if (!records.isEmpty()) {
      pendingCommits.putAll(offsets(records.stream()));
      publisher.next(records);
    }

    if (!publisher.more()) {
      pause(topic);
    } else {
      resume(topic);
    }
  }

  private int excessQueuedBatches() {
    return queuedBatches() - publishers.size();
  }

  private Optional> getConsumer() {
    if (consumer == null && consumerSupplier != null && topics != null) {
      consumer = consumerSupplier.get();
      consumer.subscribe(topics, new RebalanceListener());
    }

    return ofNullable(consumer);
  }

  private  T getForever(final Supplier fn) {
    return tryToGetForever(() -> completedFuture(fn.get()), RETRY, this::panic)
        .toCompletableFuture()
        .join();
  }

  private void holdYourHorses() {
    shouldWait().ifPresent(millis -> tryToDoSilent(() -> sleep(millis)));
  }

  private Map offsets(
      final Stream> records) {
    return records
        .map(
            r ->
                pair(
                    new TopicPartition(r.topic(), r.partition()),
                    new OffsetAndMetadata(r.offset() + 1)))
        .collect(
            toMap(
                pair -> pair.first,
                pair -> pair.second,
                (o1, o2) -> o1.offset() > o2.offset() ? o1 : o2));
  }

  private void panic(final Exception exception) {
    LOGGER.log(SEVERE, exception.getMessage(), exception);

    if (consumer != null) {
      LOGGER.finest(() -> "Closing consumer " + consumer);
      consumer.close();
      consumer = null;
    }
  }

  private void pause() {
    getConsumer().ifPresent(c -> topics.forEach(this::pause));
  }

  private void pause(final String topic) {
    if (!paused.contains(topic)) {
      paused.add(topic);
      LOGGER.fine(() -> "Pause " + topic);
      consumer.pause(assigned(topic));
    }
  }

  private Set paused(final String topic) {
    return partitions(topic, consumer.paused());
  }

  private ConsumerRecords poll() {
    return getForever(() -> getConsumer().map(c -> c.poll(POLL_TIMEOUT)).orElse(null));
  }

  /**
   * Returns the publisher for each topic, which is the key of the map.
   *
   * @return The map of publishers.
   */
  public Map>> publishers() {
    return publishers.entrySet().stream().collect(toMap(Entry::getKey, Entry::getValue));
  }

  private int queuedBatches() {
    return publishers.values().stream().mapToInt(TopicPublisher::queued).sum();
  }

  private void removePendingCommits(final Map committed) {
    committed.forEach(
        (k, v) ->
            ofNullable(pendingCommits.get(k))
                .filter(o -> o.offset() <= v.offset())
                .ifPresent(o -> pendingCommits.remove(k)));
  }

  private void resume(final String topic) {
    if (paused.contains(topic)) {
      LOGGER.fine(() -> "Resume " + topic);
      consumer.resume(paused(topic));
      paused.remove(topic);
    }
  }

  private void sendEvent(final ConsumerEvent event) {
    if (eventHandler != null) {
      getConsumer().ifPresent(c -> eventHandler.accept(event, c));
    }
  }

  private Optional shouldWait() {
    return ofNullable(throttleTime)
        .map(time -> pair(time, excessQueuedBatches()))
        .filter(pair -> pair.second > 0)
        .map(pair -> pair.first)
        .map(Duration::toMillis);
  }

  /**
   * Starts the publisher. The method blocks until the publisher is stopped, which happens with
   * either a specific request of when all the topic streams have been cancelled.
   */
  public void start() {
    LOGGER.finest("Starting");

    if (consumerSupplier == null) {
      throw new IllegalArgumentException("Can't run without a consumer.");
    }

    if (topics == null || topics.isEmpty()) {
      throw new IllegalArgumentException("Can't run without topics.");
    }

    pause();

    while (!stop) {
      commit();
      dispatch(poll());
      holdYourHorses();
    }

    LOGGER.finest("Stopped polling");

    getConsumer()
        .ifPresent(
            c -> {
              stopPublishers();
              waitForPendingCommits();
              close(c);
            });
  }

  /** Signals the request to stop the publisher, which will wind down in an orderly way. */
  public void stop() {
    LOGGER.finest("Stop requested");
    stop = true;
  }

  private void stopPublishers() {
    LOGGER.finest("Stopping publishers");

    publishers.entrySet().stream()
        .filter(e -> !cancelled.contains(e.getKey()))
        .forEach(
            e -> {
              pause(e.getKey());
              e.getValue().complete();
            });
  }

  private void waitForPendingCommits() {
    final State waited = new State<>();

    waited.set(0);

    while (!allTopicsCancelled() && !pendingCommits.isEmpty() && waited.get() < WAIT_TIMEOUT) {
      tryToDo(
          () -> {
            commit();
            sleep(100);
            waited.set(waited.get() + 100);
          });
    }

    if (waited.get() >= WAIT_TIMEOUT) {
      LOGGER.info("Timeout pending commits.");
    }
  }

  /**
   * Creates a publisher with a Kafka consumer function. It may be called several times, for example
   * when there are issues with the Kafka cluster. You should not use the auto-commit feature in the
   * configuration of the consumer.
   *
   * @param consumer the function to generate a new Kafka consumer.
   * @return A new publisher instance.
   */
  public KafkaPublisher withConsumer(final Supplier> consumer) {
    return new KafkaPublisher<>(consumer, topics, eventHandler, throttleTime);
  }

  /**
   * Creates a publisher with an event handler that receives lifecycle events from the publisher.
   *
   * @param eventHandler the function that consumes the events. It may be null.
   * @return A new publisher instance.
   */
  public KafkaPublisher withEventHandler(
      final BiConsumer> eventHandler) {
    return new KafkaPublisher<>(consumerSupplier, topics, eventHandler, throttleTime);
  }

  /**
   * Creates a publisher with a set of topics that should be consumed.
   *
   * @param topics the topic names.
   * @return A new publisher instance.
   */
  public KafkaPublisher withTopics(final Set topics) {
    return new KafkaPublisher<>(consumerSupplier, topics, eventHandler, throttleTime);
  }

  /**
   * Creates a publisher which throttles the poll loop when more batches have been queued than there
   * are topic publishers.
   *
   * @param throttleTime the time the poll loop is stalled.
   * @return A new publisher instance.
   */
  public KafkaPublisher withThrottleTime(final Duration throttleTime) {
    return new KafkaPublisher<>(consumerSupplier, topics, eventHandler, throttleTime);
  }

  private class RebalanceListener implements ConsumerRebalanceListener {
    public void onPartitionsAssigned(final Collection partitions) {
      if (!started) {
        started = true;
        sendEvent(ConsumerEvent.STARTED);
      }
    }

    public void onPartitionsRevoked(final Collection partitions) {
      // Not interested.
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy