All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kafka.tools.MirrorMaker.scala Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.tools

import joptsimple.OptionParser
import kafka.utils.{Utils, CommandLineUtils, Logging}
import kafka.producer.{KeyedMessage, ProducerConfig, Producer}
import scala.collection.JavaConversions._
import java.util.concurrent.CountDownLatch
import java.nio.ByteBuffer
import kafka.consumer._
import kafka.serializer._
import collection.mutable.ListBuffer
import kafka.tools.KafkaMigrationTool.{ProducerThread, ProducerDataChannel}
import kafka.javaapi

object MirrorMaker extends Logging {

  private var connectors: Seq[ZookeeperConsumerConnector] = null
  private var consumerThreads: Seq[MirrorMakerThread] = null
  private var producerThreads: ListBuffer[ProducerThread] = null

  def main(args: Array[String]) {
    
    info ("Starting mirror maker")
    val parser = new OptionParser

    val consumerConfigOpt = parser.accepts("consumer.config",
      "Consumer config to consume from a source cluster. " +
      "You may specify multiple of these.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(classOf[String])

    val producerConfigOpt = parser.accepts("producer.config",
      "Embedded producer config.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(classOf[String])

    val numProducersOpt = parser.accepts("num.producers",
      "Number of producer instances")
      .withRequiredArg()
      .describedAs("Number of producers")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(1)
    
    val numStreamsOpt = parser.accepts("num.streams",
      "Number of consumption streams.")
      .withRequiredArg()
      .describedAs("Number of threads")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(1)

    val bufferSizeOpt =  parser.accepts("queue.size", "Number of messages that are buffered between the consumer and producer")
      .withRequiredArg()
      .describedAs("Queue size in terms of number of messages")
      .ofType(classOf[java.lang.Integer])
      .defaultsTo(10000);

    val whitelistOpt = parser.accepts("whitelist",
      "Whitelist of topics to mirror.")
      .withRequiredArg()
      .describedAs("Java regex (String)")
      .ofType(classOf[String])

    val blacklistOpt = parser.accepts("blacklist",
            "Blacklist of topics to mirror.")
            .withRequiredArg()
            .describedAs("Java regex (String)")
            .ofType(classOf[String])

    val helpOpt = parser.accepts("help", "Print this message.")

    val options = parser.parse(args : _*)

    if (options.has(helpOpt)) {
      parser.printHelpOn(System.out)
      System.exit(0)
    }

    CommandLineUtils.checkRequiredArgs(parser, options, consumerConfigOpt, producerConfigOpt)
    if (List(whitelistOpt, blacklistOpt).count(options.has) != 1) {
      println("Exactly one of whitelist or blacklist is required.")
      System.exit(1)
    }

    val numStreams = options.valueOf(numStreamsOpt)
    val bufferSize = options.valueOf(bufferSizeOpt).intValue()

    val producers = (1 to options.valueOf(numProducersOpt).intValue()).map(_ => {
      val props = Utils.loadProps(options.valueOf(producerConfigOpt))
      val config = props.getProperty("partitioner.class") match {
        case null =>
          new ProducerConfig(props) {
            override val partitionerClass = "kafka.producer.ByteArrayPartitioner"
          }
        case pClass : String =>
          new ProducerConfig(props)
      }
      new Producer[Array[Byte], Array[Byte]](config)
    })

    connectors = options.valuesOf(consumerConfigOpt).toList
            .map(cfg => new ConsumerConfig(Utils.loadProps(cfg.toString)))
            .map(new ZookeeperConsumerConnector(_))

    val filterSpec = if (options.has(whitelistOpt))
      new Whitelist(options.valueOf(whitelistOpt))
    else
      new Blacklist(options.valueOf(blacklistOpt))

    var streams: Seq[KafkaStream[Array[Byte], Array[Byte]]] = Nil
    try {
      streams = connectors.map(_.createMessageStreamsByFilter(filterSpec, numStreams.intValue(), new DefaultDecoder(), new DefaultDecoder())).flatten
    } catch {
      case t: Throwable =>
        fatal("Unable to create stream - shutting down mirror maker.")
        connectors.foreach(_.shutdown)
    }

    val producerDataChannel = new ProducerDataChannel[KeyedMessage[Array[Byte], Array[Byte]]](bufferSize);

    consumerThreads = streams.zipWithIndex.map(streamAndIndex => new MirrorMakerThread(streamAndIndex._1, producerDataChannel, producers, streamAndIndex._2))

    producerThreads = new ListBuffer[ProducerThread]()

    Runtime.getRuntime.addShutdownHook(new Thread() {
      override def run() {
        cleanShutdown()
      }
    })

    // create producer threads
    var i: Int = 1
    for(producer <- producers) {
      val producerThread: KafkaMigrationTool.ProducerThread = new KafkaMigrationTool.ProducerThread(producerDataChannel,
        new javaapi.producer.Producer[Array[Byte], Array[Byte]](producer), i)
      producerThreads += producerThread
      i += 1
    }

    consumerThreads.foreach(_.start)
    producerThreads.foreach(_.start)

    // in case the consumer threads hit a timeout/other exception
    consumerThreads.foreach(_.awaitShutdown)
    cleanShutdown()
  }

  def cleanShutdown() {
    if (connectors != null) connectors.foreach(_.shutdown)
    if (consumerThreads != null) consumerThreads.foreach(_.awaitShutdown)
    if (producerThreads != null) {
      producerThreads.foreach(_.shutdown)
      producerThreads.foreach(_.awaitShutdown)
    }
    info("Kafka mirror maker shutdown successfully")
  }

  class MirrorMakerThread(stream: KafkaStream[Array[Byte], Array[Byte]],
                          producerDataChannel: ProducerDataChannel[KeyedMessage[Array[Byte], Array[Byte]]],
                          producers: Seq[Producer[Array[Byte], Array[Byte]]],
                          threadId: Int)
          extends Thread with Logging {

    private val shutdownLatch = new CountDownLatch(1)
    private val threadName = "mirrormaker-" + threadId

    this.setName(threadName)

    override def run() {
      info("Starting mirror maker thread " + threadName)
      try {
        for (msgAndMetadata <- stream) {
          // If the key of the message is empty, put it into the universal channel
          // Otherwise use a pre-assigned producer to send the message
          if (msgAndMetadata.key == null) {
            trace("Send the non-keyed message the producer channel.")
            val pd = new KeyedMessage[Array[Byte], Array[Byte]](msgAndMetadata.topic, msgAndMetadata.message)
            producerDataChannel.sendRequest(pd)
          } else {
            val producerId = Utils.abs(java.util.Arrays.hashCode(msgAndMetadata.key)) % producers.size()
            trace("Send message with key %s to producer %d.".format(java.util.Arrays.toString(msgAndMetadata.key), producerId))
            val producer = producers(producerId)
            val pd = new KeyedMessage[Array[Byte], Array[Byte]](msgAndMetadata.topic, msgAndMetadata.key, msgAndMetadata.message)
            producer.send(pd)
          }
        }
      } catch {
        case e: Exception =>
          fatal("%s stream unexpectedly exited.", e)
      } finally {
        shutdownLatch.countDown()
        info("Stopped thread %s.".format(threadName))
      }
    }

    def awaitShutdown() {
      try {
        shutdownLatch.await()
      } catch {
        case e: InterruptedException => fatal("Shutdown of thread %s interrupted. This might leak data!".format(threadName))
      }
    }
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy