All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kafka.tools.MirrorMaker.scala Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.tools

import kafka.consumer._
import kafka.metrics.KafkaMetricsGroup
import kafka.producer.{BaseProducer, NewShinyProducer, OldProducer}
import kafka.serializer._
import kafka.utils._
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}

import java.util.Random
import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
import java.util.concurrent.{BlockingQueue, ArrayBlockingQueue, CountDownLatch, TimeUnit}

import scala.collection.JavaConversions._

import joptsimple.OptionParser

object MirrorMaker extends Logging {

  private var connectors: Seq[ZookeeperConsumerConnector] = null
  private var consumerThreads: Seq[ConsumerThread] = null
  private var producerThreads: Seq[ProducerThread] = null
  private val isShuttingdown: AtomicBoolean = new AtomicBoolean(false)

  private val shutdownMessage : ProducerRecord[Array[Byte],Array[Byte]] = new ProducerRecord[Array[Byte],Array[Byte]]("shutdown", "shutdown".getBytes)

  def main(args: Array[String]) {
    
    info ("Starting mirror maker")
    val parser = new OptionParser

    val consumerConfigOpt = parser.accepts("consumer.config",
      "Consumer config to consume from a source cluster. " +
      "You may specify multiple of these.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(classOf[String])

    val producerConfigOpt = parser.accepts("producer.config",
      "Embedded producer config.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(classOf[String])

    val useNewProducerOpt = parser.accepts("new.producer",
      "Use the new producer implementation.")

    val numProducersOpt = parser.accepts("num.producers",
      "Number of producer instances")
      .withRequiredArg()
      .describedAs("Number of producers")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(1)
    
    val numStreamsOpt = parser.accepts("num.streams",
      "Number of consumption streams.")
      .withRequiredArg()
      .describedAs("Number of threads")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(1)

    val bufferSizeOpt =  parser.accepts("queue.size",
      "Number of messages that are buffered between the consumer and producer")
      .withRequiredArg()
      .describedAs("Queue size in terms of number of messages")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(10000)

    val whitelistOpt = parser.accepts("whitelist",
      "Whitelist of topics to mirror.")
      .withRequiredArg()
      .describedAs("Java regex (String)")
      .ofType(classOf[String])

    val blacklistOpt = parser.accepts("blacklist",
      "Blacklist of topics to mirror.")
      .withRequiredArg()
      .describedAs("Java regex (String)")
      .ofType(classOf[String])

    val helpOpt = parser.accepts("help", "Print this message.")
    
    if(args.length == 0)
      CommandLineUtils.printUsageAndDie(parser, "Continuously copy data between two Kafka clusters.")

    val options = parser.parse(args : _*)

    if (options.has(helpOpt)) {
      parser.printHelpOn(System.out)
      System.exit(0)
    }

    CommandLineUtils.checkRequiredArgs(parser, options, consumerConfigOpt, producerConfigOpt)
    if (List(whitelistOpt, blacklistOpt).count(options.has) != 1) {
      println("Exactly one of whitelist or blacklist is required.")
      System.exit(1)
    }

    val numProducers = options.valueOf(numProducersOpt).intValue()
    val numStreams = options.valueOf(numStreamsOpt).intValue()
    val bufferSize = options.valueOf(bufferSizeOpt).intValue()

    // create consumer streams
    connectors = options.valuesOf(consumerConfigOpt).toList
      .map(cfg => new ConsumerConfig(Utils.loadProps(cfg)))
      .map(new ZookeeperConsumerConnector(_))
    val numConsumers = connectors.size * numStreams

    // create a data channel btw the consumers and the producers
    val mirrorDataChannel = new DataChannel(bufferSize, numConsumers, numProducers)

    // create producer threads
    val useNewProducer = options.has(useNewProducerOpt)
    val producerProps = Utils.loadProps(options.valueOf(producerConfigOpt))
    val clientId = producerProps.getProperty("client.id", "")
    producerThreads = (0 until numProducers).map(i => {
      producerProps.setProperty("client.id", clientId + "-" + i)
      val producer =
      if (useNewProducer) {
        producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
        producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
        new NewShinyProducer(producerProps)
      }
      else
        new OldProducer(producerProps)
      new ProducerThread(mirrorDataChannel, producer, i)
    })

    // create consumer threads
    val filterSpec = if (options.has(whitelistOpt))
      new Whitelist(options.valueOf(whitelistOpt))
    else
      new Blacklist(options.valueOf(blacklistOpt))

    var streams: Seq[KafkaStream[Array[Byte], Array[Byte]]] = Nil
    try {
      streams = connectors.map(_.createMessageStreamsByFilter(filterSpec, numStreams, new DefaultDecoder(), new DefaultDecoder())).flatten
    } catch {
      case t: Throwable =>
        fatal("Unable to create stream - shutting down mirror maker.")
        connectors.foreach(_.shutdown)
    }
    consumerThreads = streams.zipWithIndex.map(streamAndIndex => new ConsumerThread(streamAndIndex._1, mirrorDataChannel, streamAndIndex._2))
    assert(consumerThreads.size == numConsumers)

    Runtime.getRuntime.addShutdownHook(new Thread() {
      override def run() {
        cleanShutdown()
      }
    })

    consumerThreads.foreach(_.start)
    producerThreads.foreach(_.start)

    // we wait on producer's shutdown latch instead of consumers
    // since the consumer threads can hit a timeout/other exception;
    // but in this case the producer should still be able to shutdown
    // based on the shutdown message in the channel
    producerThreads.foreach(_.awaitShutdown)
  }

  def cleanShutdown() {
    if (isShuttingdown.compareAndSet(false, true)) {
      if (connectors != null) connectors.foreach(_.shutdown)
      if (consumerThreads != null) consumerThreads.foreach(_.awaitShutdown)
      if (producerThreads != null) {
        producerThreads.foreach(_.shutdown)
        producerThreads.foreach(_.awaitShutdown)
      }
      info("Kafka mirror maker shutdown successfully")
    }
  }

  class DataChannel(capacity: Int, numInputs: Int, numOutputs: Int) extends KafkaMetricsGroup {

    val queues = new Array[BlockingQueue[ProducerRecord[Array[Byte],Array[Byte]]]](numOutputs)
    for (i <- 0 until numOutputs)
      queues(i) = new ArrayBlockingQueue[ProducerRecord[Array[Byte],Array[Byte]]](capacity)

    private val counter = new AtomicInteger(new Random().nextInt())

    // We use a single meter for aggregated wait percentage for the data channel.
    // Since meter is calculated as total_recorded_value / time_window and
    // time_window is independent of the number of threads, each recorded wait
    // time should be discounted by # threads.
    private val waitPut = newMeter("MirrorMaker-DataChannel-WaitOnPut", "percent", TimeUnit.NANOSECONDS)
    private val waitTake = newMeter("MirrorMaker-DataChannel-WaitOnTake", "percent", TimeUnit.NANOSECONDS)
    private val channelSizeHist = newHistogram("MirrorMaker-DataChannel-Size")

    def put(record: ProducerRecord[Array[Byte],Array[Byte]]) {
      // If the key of the message is empty, use round-robin to select the queue
      // Otherwise use the queue based on the key value so that same key-ed messages go to the same queue
      val queueId =
        if(record.key() != null) {
          Utils.abs(java.util.Arrays.hashCode(record.key())) % numOutputs
        } else {
          Utils.abs(counter.getAndIncrement()) % numOutputs
        }
      put(record, queueId)
    }

    def put(record: ProducerRecord[Array[Byte],Array[Byte]], queueId: Int) {
      val queue = queues(queueId)

      var putSucceed = false
      while (!putSucceed) {
        val startPutTime = SystemTime.nanoseconds
        putSucceed = queue.offer(record, 500, TimeUnit.MILLISECONDS)
        waitPut.mark((SystemTime.nanoseconds - startPutTime) / numInputs)
      }
      channelSizeHist.update(queue.size)
    }

    def take(queueId: Int): ProducerRecord[Array[Byte],Array[Byte]] = {
      val queue = queues(queueId)
      var data: ProducerRecord[Array[Byte],Array[Byte]] = null
      while (data == null) {
        val startTakeTime = SystemTime.nanoseconds
        data = queue.poll(500, TimeUnit.MILLISECONDS)
        waitTake.mark((SystemTime.nanoseconds - startTakeTime) / numOutputs)
      }
      channelSizeHist.update(queue.size)
      data
    }
  }

  class ConsumerThread(stream: KafkaStream[Array[Byte], Array[Byte]],
                       mirrorDataChannel: DataChannel,
                       threadId: Int)
          extends Thread with Logging with KafkaMetricsGroup {

    private val shutdownLatch = new CountDownLatch(1)
    private val threadName = "mirrormaker-consumer-" + threadId
    private var isCleanShutdown: Boolean = true
    this.logIdent = "[%s] ".format(threadName)

    this.setName(threadName)

    override def run() {
      info("Starting mirror maker consumer thread " + threadName)
      try {
        for (msgAndMetadata <- stream) {
          val data = new ProducerRecord[Array[Byte],Array[Byte]](msgAndMetadata.topic, msgAndMetadata.key, msgAndMetadata.message)
          mirrorDataChannel.put(data)
        }
      } catch {
        case e: Throwable => {
          fatal("Stream unexpectedly exited.", e)
          isCleanShutdown = false
        }
      } finally {
        shutdownLatch.countDown()
        info("Consumer thread stopped")
        // If it exits accidentally, stop the entire mirror maker.
        if (!isCleanShutdown) {
          fatal("Consumer thread exited abnormally, stopping the whole mirror maker.")
          System.exit(-1)
        }
      }
    }

    def awaitShutdown() {
      try {
        shutdownLatch.await()
        info("Consumer thread shutdown complete")
      } catch {
        case e: InterruptedException => fatal("Shutdown of the consumer thread interrupted. This might leak data!")
      }
    }
  }

  class ProducerThread (val dataChannel: DataChannel,
                        val producer: BaseProducer,
                        val threadId: Int) extends Thread with Logging with KafkaMetricsGroup {
    private val threadName = "mirrormaker-producer-" + threadId
    private val shutdownComplete: CountDownLatch = new CountDownLatch(1)
    private var isCleanShutdown: Boolean = true
    this.logIdent = "[%s] ".format(threadName)

    setName(threadName)

    override def run {
      info("Starting mirror maker producer thread " + threadName)
      try {
        while (true) {
          val data: ProducerRecord[Array[Byte],Array[Byte]] = dataChannel.take(threadId)
          trace("Sending message with value size %d".format(data.value().size))
          if(data eq shutdownMessage) {
            info("Received shutdown message")
            return
          }
          producer.send(data.topic(), data.key(), data.value())
        }
      } catch {
        case t: Throwable => {
          fatal("Producer thread failure due to ", t)
          isCleanShutdown = false
        }
      } finally {
        shutdownComplete.countDown
        info("Producer thread stopped")
        // If it exits accidentally, stop the entire mirror maker.
        if (!isCleanShutdown) {
          fatal("Producer thread exited abnormally, stopping the whole mirror maker.")
          System.exit(-1)
        }
      }
    }

    def shutdown {
      try {
        info("Producer thread " + threadName + " shutting down")
        dataChannel.put(shutdownMessage, threadId)
      }
      catch {
        case ie: InterruptedException => {
          warn("Interrupt during shutdown of ProducerThread")
        }
      }
    }

    def awaitShutdown {
      try {
        shutdownComplete.await
        producer.close
        info("Producer thread shutdown complete")
      } catch {
        case ie: InterruptedException => {
          warn("Shutdown of the producer thread interrupted")
        }
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy