All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kafka.tools.KafkaMigrationTool Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.tools;

import joptsimple.*;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.DefaultEncoder;
import kafka.utils.Utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;


/**
 * This is a  kafka 0.7 to 0.8 online migration tool used for migrating data from 0.7 to 0.8 cluster. Internally,
 * it's composed of a kafka 0.7 consumer and kafka 0.8 producer. The kafka 0.7 consumer consumes data from the
 * 0.7 cluster, and the kafka 0.8 producer produces data to the 0.8 cluster.
 *
 * The 0.7 consumer is loaded from kafka 0.7 jar using a "parent last, child first" java class loader.
 * Ordinary class loader is "parent first, child last", and kafka 0.8 and 0.7 both have classes for a lot of
 * class names like "kafka.consumer.Consumer", etc., so ordinary java URLClassLoader with kafka 0.7 jar will
 * will still load the 0.8 version class.
 *
 * As kafka 0.7 and kafka 0.8 used different version of zkClient, the zkClient jar used by kafka 0.7 should
 * also be used by the class loader.
 *
 * The user need to provide the configuration file for 0.7 consumer and 0.8 producer. For 0.8 producer,
 * the "serializer.class" config is set to "kafka.serializer.DefaultEncoder" by the code.
 */
@SuppressWarnings({"unchecked", "rawtypes"})
public class KafkaMigrationTool
{
  private static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(KafkaMigrationTool.class.getName());
  private static final String KAFKA_07_STATIC_CONSUMER_CLASS_NAME = "kafka.consumer.Consumer";
  private static final String KAFKA_07_CONSUMER_CONFIG_CLASS_NAME = "kafka.consumer.ConsumerConfig";
  private static final String KAFKA_07_CONSUMER_STREAM_CLASS_NAME = "kafka.consumer.KafkaStream";
  private static final String KAFKA_07_CONSUMER_ITERATOR_CLASS_NAME = "kafka.consumer.ConsumerIterator";
  private static final String KAFKA_07_CONSUMER_CONNECTOR_CLASS_NAME = "kafka.javaapi.consumer.ConsumerConnector";
  private static final String KAFKA_07_MESSAGE_AND_METADATA_CLASS_NAME = "kafka.message.MessageAndMetadata";
  private static final String KAFKA_07_MESSAGE_CLASS_NAME = "kafka.message.Message";
  private static final String KAFKA_07_WHITE_LIST_CLASS_NAME = "kafka.consumer.Whitelist";
  private static final String KAFKA_07_TOPIC_FILTER_CLASS_NAME = "kafka.consumer.TopicFilter";
  private static final String KAFKA_07_BLACK_LIST_CLASS_NAME = "kafka.consumer.Blacklist";

  private static Class KafkaStaticConsumer_07 = null;
  private static Class ConsumerConfig_07 = null;
  private static Class ConsumerConnector_07 = null;
  private static Class KafkaStream_07 = null;
  private static Class TopicFilter_07 = null;
  private static Class WhiteList_07 = null;
  private static Class BlackList_07 = null;
  private static Class KafkaConsumerIteratorClass_07 = null;
  private static Class KafkaMessageAndMetatDataClass_07 = null;
  private static Class KafkaMessageClass_07 = null;

  public static void main(String[] args) throws InterruptedException, IOException {
    OptionParser parser = new OptionParser();
    ArgumentAcceptingOptionSpec consumerConfigOpt
      = parser.accepts("consumer.config", "Kafka 0.7 consumer config to consume from the source 0.7 cluster. " + "You man specify multiple of these.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec producerConfigOpt
      =  parser.accepts("producer.config", "Producer config.")
      .withRequiredArg()
      .describedAs("config file")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec numProducersOpt
      =  parser.accepts("num.producers", "Number of producer instances")
      .withRequiredArg()
      .describedAs("Number of producers")
      .ofType(Integer.class)
      .defaultsTo(1);

    ArgumentAcceptingOptionSpec zkClient01JarOpt
      = parser.accepts("zkclient.01.jar", "zkClient 0.1 jar file")
      .withRequiredArg()
      .describedAs("zkClient 0.1 jar file required by Kafka 0.7")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec kafka07JarOpt
      = parser.accepts("kafka.07.jar", "Kafka 0.7 jar file")
      .withRequiredArg()
      .describedAs("kafka 0.7 jar")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec numStreamsOpt
      = parser.accepts("num.streams", "Number of consumer streams")
      .withRequiredArg()
      .describedAs("Number of consumer threads")
      .ofType(Integer.class)
      .defaultsTo(1);

    ArgumentAcceptingOptionSpec whitelistOpt
      = parser.accepts("whitelist", "Whitelist of topics to migrate from the 0.7 cluster")
      .withRequiredArg()
      .describedAs("Java regex (String)")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec blacklistOpt
      = parser.accepts("blacklist", "Blacklist of topics to migrate from the 0.7 cluster")
      .withRequiredArg()
      .describedAs("Java regex (String)")
      .ofType(String.class);

    ArgumentAcceptingOptionSpec queueSizeOpt
      =  parser.accepts("queue.size", "Number of messages that are buffered between the 0.7 consumer and 0.8 producer")
      .withRequiredArg()
      .describedAs("Queue size in terms of number of messages")
      .ofType(Integer.class)
      .defaultsTo(10000);

    OptionSpecBuilder helpOpt
      = parser.accepts("help", "Print this message.");

    OptionSet options = parser.parse(args);

    if (options.has(helpOpt)) {
      parser.printHelpOn(System.out);
      System.exit(0);
    }

    checkRequiredArgs(parser, options, new OptionSpec[]{consumerConfigOpt, producerConfigOpt, zkClient01JarOpt, kafka07JarOpt});
    int whiteListCount = options.has(whitelistOpt) ? 1 : 0;
    int blackListCount = options.has(blacklistOpt) ? 1 : 0;
    if(whiteListCount + blackListCount != 1) {
      System.err.println("Exactly one of whitelist or blacklist is required.");
      System.exit(1);
    }

    String kafkaJarFile_07 = options.valueOf(kafka07JarOpt);
    String zkClientJarFile = options.valueOf(zkClient01JarOpt);
    String consumerConfigFile_07 = options.valueOf(consumerConfigOpt);
    int numConsumers = options.valueOf(numStreamsOpt);
    String producerConfigFile_08 = options.valueOf(producerConfigOpt);
    int numProducers = options.valueOf(numProducersOpt);
    final List migrationThreads = new ArrayList(numConsumers);
    final List producerThreads = new ArrayList(numProducers);

    try {
      File kafkaJar_07 = new File(kafkaJarFile_07);
      File zkClientJar = new File(zkClientJarFile);
      ParentLastURLClassLoader c1 = new ParentLastURLClassLoader(new URL[] {
        kafkaJar_07.toURI().toURL(),
        zkClientJar.toURI().toURL()
      });

      /** Construct the 07 consumer config **/
      ConsumerConfig_07 = c1.loadClass(KAFKA_07_CONSUMER_CONFIG_CLASS_NAME);
      KafkaStaticConsumer_07 = c1.loadClass(KAFKA_07_STATIC_CONSUMER_CLASS_NAME);
      ConsumerConnector_07 = c1.loadClass(KAFKA_07_CONSUMER_CONNECTOR_CLASS_NAME);
      KafkaStream_07 = c1.loadClass(KAFKA_07_CONSUMER_STREAM_CLASS_NAME);
      TopicFilter_07 = c1.loadClass(KAFKA_07_TOPIC_FILTER_CLASS_NAME);
      WhiteList_07 = c1.loadClass(KAFKA_07_WHITE_LIST_CLASS_NAME);
      BlackList_07 = c1.loadClass(KAFKA_07_BLACK_LIST_CLASS_NAME);
      KafkaMessageClass_07 = c1.loadClass(KAFKA_07_MESSAGE_CLASS_NAME);
      KafkaConsumerIteratorClass_07 = c1.loadClass(KAFKA_07_CONSUMER_ITERATOR_CLASS_NAME);
      KafkaMessageAndMetatDataClass_07 = c1.loadClass(KAFKA_07_MESSAGE_AND_METADATA_CLASS_NAME);

      Constructor ConsumerConfigConstructor_07 = ConsumerConfig_07.getConstructor(Properties.class);
      Properties kafkaConsumerProperties_07 = new Properties();
      kafkaConsumerProperties_07.load(new FileInputStream(consumerConfigFile_07));
      /** Disable shallow iteration because the message format is different between 07 and 08, we have to get each individual message **/
      if(kafkaConsumerProperties_07.getProperty("shallow.iterator.enable", "").equals("true")) {
        logger.warn("Shallow iterator should not be used in the migration tool");
        kafkaConsumerProperties_07.setProperty("shallow.iterator.enable", "false");
      }
      Object consumerConfig_07 = ConsumerConfigConstructor_07.newInstance(kafkaConsumerProperties_07);

      /** Construct the 07 consumer connector **/
      Method ConsumerConnectorCreationMethod_07 = KafkaStaticConsumer_07.getMethod("createJavaConsumerConnector", ConsumerConfig_07);
      final Object consumerConnector_07 = ConsumerConnectorCreationMethod_07.invoke(null, consumerConfig_07);
      Method ConsumerConnectorCreateMessageStreamsMethod_07 = ConsumerConnector_07.getMethod(
        "createMessageStreamsByFilter",
        TopicFilter_07, int.class);
      final Method ConsumerConnectorShutdownMethod_07 = ConsumerConnector_07.getMethod("shutdown");
      Constructor WhiteListConstructor_07 = WhiteList_07.getConstructor(String.class);
      Constructor BlackListConstructor_07 = BlackList_07.getConstructor(String.class);
      Object filterSpec = null;
      if(options.has(whitelistOpt))
        filterSpec = WhiteListConstructor_07.newInstance(options.valueOf(whitelistOpt));
      else
        filterSpec = BlackListConstructor_07.newInstance(options.valueOf(blacklistOpt));

      Object retKafkaStreams = ConsumerConnectorCreateMessageStreamsMethod_07.invoke(consumerConnector_07, filterSpec, numConsumers);

      Properties kafkaProducerProperties_08 = new Properties();
      kafkaProducerProperties_08.load(new FileInputStream(producerConfigFile_08));
      kafkaProducerProperties_08.setProperty("serializer.class", "kafka.serializer.DefaultEncoder");
      // create a producer channel instead
      int queueSize = options.valueOf(queueSizeOpt);
      ProducerDataChannel> producerDataChannel = new ProducerDataChannel>(queueSize);
      int threadId = 0;

      Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
          try {
            ConsumerConnectorShutdownMethod_07.invoke(consumerConnector_07);
          } catch(Exception e) {
            logger.error("Error while shutting down Kafka consumer", e);
          }
          for(MigrationThread migrationThread : migrationThreads) {
            migrationThread.shutdown();
          }
          for(ProducerThread producerThread : producerThreads) {
            producerThread.shutdown();
          }
          for(ProducerThread producerThread : producerThreads) {
            producerThread.awaitShutdown();
          }
          logger.info("Kafka migration tool shutdown successfully");
        }
      });

      // start consumer threads
      for(Object stream: (List)retKafkaStreams) {
        MigrationThread thread = new MigrationThread(stream, producerDataChannel, threadId);
        threadId ++;
        thread.start();
        migrationThreads.add(thread);
      }

      String clientId = kafkaProducerProperties_08.getProperty("client.id");
      // start producer threads
      for (int i = 0; i < numProducers; i++) {
        kafkaProducerProperties_08.put("client.id", clientId + "-" + i);
        ProducerConfig producerConfig_08 = new ProducerConfig(kafkaProducerProperties_08);
        DefaultEncoder encoder = new DefaultEncoder(null);
        Producer producer = new Producer(producerConfig_08, encoder, encoder);
        ProducerThread producerThread = new ProducerThread(producerDataChannel, producer, i);
        producerThread.start();
        producerThreads.add(producerThread);
      }
    }
    catch (Throwable e){
      System.out.println("Kafka migration tool failed due to: " + Utils.stackTrace(e));
      logger.error("Kafka migration tool failed: ", e);
    }
  }

  private static void checkRequiredArgs(OptionParser parser, OptionSet options, OptionSpec[] required) throws IOException {
    for(OptionSpec arg : required) {
      if(!options.has(arg)) {
        System.err.println("Missing required argument \"" + arg + "\"");
        parser.printHelpOn(System.err);
        System.exit(1);
      }
    }
  }

  static class ProducerDataChannel {
    private final int producerQueueSize;
    private final BlockingQueue producerRequestQueue;

    public ProducerDataChannel(int queueSize) {
      producerQueueSize = queueSize;
      producerRequestQueue = new ArrayBlockingQueue(producerQueueSize);
    }

    public void sendRequest(T data) throws InterruptedException {
      producerRequestQueue.put(data);
    }

    public T receiveRequest() throws InterruptedException {
      return producerRequestQueue.take();
    }
  }

  private static class MigrationThread extends Thread {
    private final Object stream;
    private final ProducerDataChannel> producerDataChannel;
    private final int threadId;
    private final String threadName;
    private final org.apache.log4j.Logger logger;
    private CountDownLatch shutdownComplete = new CountDownLatch(1);
    private final AtomicBoolean isRunning = new AtomicBoolean(true);

    MigrationThread(Object _stream, ProducerDataChannel> _producerDataChannel, int _threadId) {
      stream = _stream;
      producerDataChannel = _producerDataChannel;
      threadId = _threadId;
      threadName = "MigrationThread-" + threadId;
      logger = org.apache.log4j.Logger.getLogger(MigrationThread.class.getName());
      this.setName(threadName);
    }

    public void run() {
      try {
        Method MessageGetPayloadMethod_07 = KafkaMessageClass_07.getMethod("payload");
        Method KafkaGetMessageMethod_07 = KafkaMessageAndMetatDataClass_07.getMethod("message");
        Method KafkaGetTopicMethod_07 = KafkaMessageAndMetatDataClass_07.getMethod("topic");
        Method ConsumerIteratorMethod = KafkaStream_07.getMethod("iterator");
        Method KafkaStreamHasNextMethod_07 = KafkaConsumerIteratorClass_07.getMethod("hasNext");
        Method KafkaStreamNextMethod_07 = KafkaConsumerIteratorClass_07.getMethod("next");
        Object iterator = ConsumerIteratorMethod.invoke(stream);

        while (((Boolean) KafkaStreamHasNextMethod_07.invoke(iterator)).booleanValue()) {
          Object messageAndMetaData_07 = KafkaStreamNextMethod_07.invoke(iterator);
          Object message_07 = KafkaGetMessageMethod_07.invoke(messageAndMetaData_07);
          Object topic = KafkaGetTopicMethod_07.invoke(messageAndMetaData_07);
          Object payload_07 = MessageGetPayloadMethod_07.invoke(message_07);
          int size = ((ByteBuffer)payload_07).remaining();
          byte[] bytes = new byte[size];
          ((ByteBuffer)payload_07).get(bytes);
          if(logger.isDebugEnabled())
            logger.debug("Migration thread " + threadId + " sending message of size " + bytes.length + " to topic "+ topic);
          KeyedMessage producerData = new KeyedMessage((String)topic, null, bytes);
          producerDataChannel.sendRequest(producerData);
        }
        logger.info("Migration thread " + threadName + " finished running");
      } catch (InvocationTargetException t){
        logger.fatal("Migration thread failure due to root cause ", t.getCause());
      } catch (Throwable t){
        logger.fatal("Migration thread failure due to ", t);
      } finally {
        shutdownComplete.countDown();
      }
    }

    public void shutdown() {
      logger.info("Migration thread " + threadName + " shutting down");
      isRunning.set(false);
      interrupt();
      try {
        shutdownComplete.await();
      } catch(InterruptedException ie) {
        logger.warn("Interrupt during shutdown of MigrationThread", ie);
      }
      logger.info("Migration thread " + threadName + " shutdown complete");
    }
  }

  static class ProducerThread extends Thread {
    private final ProducerDataChannel> producerDataChannel;
    private final Producer producer;
    private final int threadId;
    private String threadName;
    private org.apache.log4j.Logger logger;
    private CountDownLatch shutdownComplete = new CountDownLatch(1);
    private KeyedMessage shutdownMessage = new KeyedMessage("shutdown", null, null);

    public ProducerThread(ProducerDataChannel> _producerDataChannel,
                          Producer _producer,
                          int _threadId) {
      producerDataChannel = _producerDataChannel;
      producer = _producer;
      threadId = _threadId;
      threadName = "ProducerThread-" + threadId;
      logger = org.apache.log4j.Logger.getLogger(ProducerThread.class.getName());
      this.setName(threadName);
    }

    public void run() {
      try{
        while(true) {
          KeyedMessage data = producerDataChannel.receiveRequest();
          if(!data.equals(shutdownMessage)) {
            producer.send(data);
            if(logger.isDebugEnabled()) logger.debug("Sending message %s".format(new String(data.message())));
          }
          else
            break;
        }
        logger.info("Producer thread " + threadName + " finished running");
      } catch (Throwable t){
        logger.fatal("Producer thread failure due to ", t);
      } finally {
        shutdownComplete.countDown();
      }
    }

    public void shutdown() {
      try {
        logger.info("Producer thread " + threadName + " shutting down");
        producerDataChannel.sendRequest(shutdownMessage);
      } catch(InterruptedException ie) {
        logger.warn("Interrupt during shutdown of ProducerThread", ie);
      }
    }

    public void awaitShutdown() {
      try {
        shutdownComplete.await();
        producer.close();
        logger.info("Producer thread " + threadName + " shutdown complete");
      } catch(InterruptedException ie) {
        logger.warn("Interrupt during shutdown of ProducerThread", ie);
      }
    }
  }

  /**
   * A parent-last class loader that will try the child class loader first and then the parent.
   * This takes a fair bit of doing because java really prefers parent-first.
   */
  private static class ParentLastURLClassLoader extends ClassLoader {
    private ChildURLClassLoader childClassLoader;

    /**
     * This class allows me to call findClass on a class loader
     */
    private static class FindClassClassLoader extends ClassLoader {
      public FindClassClassLoader(ClassLoader parent) {
        super(parent);
      }
      @Override
      public Class findClass(String name) throws ClassNotFoundException {
        return super.findClass(name);
      }
    }

    /**
     * This class delegates (child then parent) for the findClass method for a URLClassLoader.
     * We need this because findClass is protected in URLClassLoader
     */
    private static class ChildURLClassLoader extends URLClassLoader {
      private FindClassClassLoader realParent;
      public ChildURLClassLoader( URL[] urls, FindClassClassLoader realParent) {
        super(urls, null);
        this.realParent = realParent;
      }

      @Override
      public Class findClass(String name) throws ClassNotFoundException {
        try{
          // first try to use the URLClassLoader findClass
          return super.findClass(name);
        }
        catch( ClassNotFoundException e ) {
          // if that fails, we ask our real parent class loader to load the class (we give up)
          return realParent.loadClass(name);
        }
      }
    }

    public ParentLastURLClassLoader(URL[] urls) {
      super(Thread.currentThread().getContextClassLoader());
      childClassLoader = new ChildURLClassLoader(urls, new FindClassClassLoader(this.getParent()));
    }

    @Override
    protected synchronized Class loadClass(String name, boolean resolve) throws ClassNotFoundException {
      try {
        // first we try to find a class inside the child class loader
        return childClassLoader.findClass(name);
      }
      catch( ClassNotFoundException e ) {
        // didn't find it, try the parent
        return super.loadClass(name, resolve);
      }
    }
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy