All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metamx.tranquility.kafka.KafkaConsumer Maven / Gradle / Ivy

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.metamx.tranquility.kafka;

import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.metamx.common.logger.Logger;
import com.metamx.tranquility.config.DataSourceConfig;
import com.metamx.tranquility.kafka.model.MessageCounters;
import com.metamx.tranquility.kafka.model.PropertiesBasedKafkaConfig;
import com.metamx.tranquility.kafka.writer.WriterController;
import io.druid.concurrent.Execs;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.consumer.TopicFilter;
import kafka.consumer.Whitelist;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;

import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * Spawns a number of threads to read messages from Kafka topics and write them by calling
 * WriterController.getWriter(topic).send(). Will periodically call WriterController.flushAll() and when this completes
 * will call ConsumerConnector.commitOffsets() to save the last written offset to ZooKeeper. This implementation
 * guarantees that any events in Kafka will be read at least once even in case of a failure condition but does not
 * guarantee that duplication will not occur.
 */
public class KafkaConsumer
{
  private static final Logger log = new Logger(KafkaConsumer.class);

  private final ExecutorService consumerExec;
  private final Thread commitThread;
  private final AtomicBoolean shutdown = new AtomicBoolean();

  // prevents reading the next event from Kafka while events are being flushed and offset is being committed to ZK
  private final ReentrantReadWriteLock commitLock = new ReentrantReadWriteLock();

  private final ConsumerConnector consumerConnector;
  private final TopicFilter topicFilter;
  private final int numThreads;
  private final int commitMillis;
  private final WriterController writerController;

  private Map previousMessageCounters = new HashMap<>();

  public KafkaConsumer(
      final PropertiesBasedKafkaConfig globalConfig,
      final Properties kafkaProperties,
      final Map> dataSourceConfigs,
      final WriterController writerController
  )
  {
    this.consumerConnector = getConsumerConnector(kafkaProperties);
    this.topicFilter = new Whitelist(buildTopicFilter(dataSourceConfigs));

    log.info("Kafka topic filter [%s]", this.topicFilter);

    int defaultNumThreads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
    this.numThreads = globalConfig.getConsumerNumThreads() > 0
                      ? globalConfig.getConsumerNumThreads()
                      : defaultNumThreads;

    this.commitMillis = globalConfig.getCommitPeriodMillis();
    this.writerController = writerController;
    this.consumerExec = Execs.multiThreaded(numThreads, "KafkaConsumer-%d");
    this.commitThread = new Thread(createCommitRunnable());
    this.commitThread.setName("KafkaConsumer-CommitThread");
    this.commitThread.setDaemon(true);
  }

  public void start()
  {
    commitThread.start();
    startConsumers();
  }

  public void stop()
  {
    if (shutdown.compareAndSet(false, true)) {
      log.info("Shutting down - attempting to flush buffers and commit final offsets");

      try {
        commitLock.writeLock().lockInterruptibly(); // prevent Kafka from consuming any more events
        try {
          writerController.flushAll(); // try to flush the remaining events to Druid
          writerController.stop();
          consumerConnector.commitOffsets(); // update commit offset
        }
        finally {
          commitLock.writeLock().unlock();
          consumerConnector.shutdown();
          commitThread.interrupt();
          consumerExec.shutdownNow();
        }
      }
      catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        Throwables.propagate(e);
      }

      log.info("Finished clean shutdown.");
    }
  }

  public void join() throws InterruptedException
  {
    commitThread.join();
  }

  void commit() throws InterruptedException
  {
    commitLock.writeLock().lockInterruptibly();
    try {
      final long flushStartTime = System.currentTimeMillis();
      final Map messageCounters = writerController.flushAll(); // blocks until complete

      final long commitStartTime = System.currentTimeMillis();
      consumerConnector.commitOffsets();

      final long finishedTime = System.currentTimeMillis();
      Map countsSinceLastCommit = new HashMap();
      for (Map.Entry entry : messageCounters.entrySet()) {
        countsSinceLastCommit.put(
            entry.getKey(),
            entry.getValue().difference(previousMessageCounters.get(entry.getKey()))
        );
      }

      previousMessageCounters = messageCounters;

      log.info(
          "Flushed %s pending messages in %sms and committed offsets in %sms.",
          countsSinceLastCommit.isEmpty() ? "0" : countsSinceLastCommit,
          commitStartTime - flushStartTime,
          finishedTime - commitStartTime
      );
    }
    finally {
      commitLock.writeLock().unlock();
    }
  }

  private Runnable createCommitRunnable()
  {
    return new Runnable()
    {
      @Override
      public void run()
      {
        long lastFlushTime = System.currentTimeMillis();
        try {
          while (!Thread.currentThread().isInterrupted()) {
            Thread.sleep(Math.max(commitMillis - (System.currentTimeMillis() - lastFlushTime), 0));
            commit();
            lastFlushTime = System.currentTimeMillis();
          }
        }
        catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          log.info("Commit thread interrupted.");
        }
        catch (Throwable e) {
          log.error(e, "Commit thread failed!");
          throw Throwables.propagate(e);
        }
        finally {
          stop();
        }
      }
    };
  }

  private void startConsumers()
  {
    final List> kafkaStreams = consumerConnector.createMessageStreamsByFilter(
        topicFilter,
        numThreads
    );

    for (final KafkaStream kafkaStream : kafkaStreams) {
      consumerExec.submit(
          new Runnable()
          {
            @Override
            public void run()
            {
              try {
                final Iterator> kafkaIterator = kafkaStream.iterator();

                while (kafkaIterator.hasNext()) {
                  if (Thread.currentThread().isInterrupted()) {
                    throw new InterruptedException();
                  }

                  // Kafka consumer treats messages as consumed and updates in-memory last offset when the message
                  // is returned by next(). In order to guarantee at-least-once message delivery, we need to a) set
                  // autocommit enable to false so the consumer will not automatically commit offsets to Zookeeper, and
                  // b) synchronize calls of kafkaIterator.next() with the commit thread so that we don't read messages
                  // and then call consumerConnector.commitOffsets() before those messages have been flushed through
                  // Tranquility into the indexing service.
                  commitLock.readLock().lockInterruptibly();

                  try {
                    MessageAndMetadata data = kafkaIterator.next();
                    writerController.getWriter(data.topic()).send(data.message());
                  }
                  finally {
                    commitLock.readLock().unlock();
                  }
                }
              }
              catch (InterruptedException e) {
                log.info("Consumer thread interrupted.");
              }
              catch (Throwable e) {
                log.error(e, "Exception: ");
                throw Throwables.propagate(e);
              }
              finally {
                stop();
              }
            }
          }
      );
    }
  }

  private static ConsumerConnector getConsumerConnector(final Properties props)
  {
    props.setProperty("auto.commit.enable", "false");

    final ConsumerConfig config = new ConsumerConfig(props);
    Preconditions.checkState(!config.autoCommitEnable(), "autocommit must be off");

    return Consumer.createJavaConsumerConnector(config);
  }

  private static String buildTopicFilter(Map> dataSourceConfigs)
  {
    StringBuilder topicFilter = new StringBuilder();
    for (Map.Entry> entry : dataSourceConfigs.entrySet()) {
      topicFilter.append(String.format("(%s)|", entry.getValue().propertiesBasedConfig().getTopicPattern()));
    }

    return topicFilter.length() > 0 ? topicFilter.substring(0, topicFilter.length() - 1) : "";
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy