All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sc.ala.kafka.utils.consumer.CountingConsumer.scala Maven / Gradle / Ivy

package sc.ala.kafka.utils.consumer

import kafka.api._
import kafka.utils._
import kafka.consumer._
import kafka.consumer.ConsumerConfig.{SocketTimeout, SocketBufferSize}
import kafka.client.ClientUtils
import kafka.cluster.Broker
import kafka.common.TopicAndPartition
import scala.collection.JavaConversions._
import org.apache.kafka.common.utils.Utils

import sc.ala.rubyist.Using._
import pl.project13.scala.rainbow._

class CountingConsumer(host: String, port: Int, topic: String, partition: Int) extends Logging {
  private def clientId = "api-count"
  private def newSimpleConsumer = new SimpleConsumer(host, port, SocketTimeout, SocketBufferSize, clientId)

  def count(): Long = {
    using(newSimpleConsumer)(countingImpl)
  }

  // derived from "kafka.tools.SimpleConsumerShell"
  private def countingImpl(consumer: SimpleConsumer): Long = {
    var numMessagesConsumed = 0

    val maxMessages = 100000000
    val skipMessageOnError = true
    val replicaId = -1

    val printDebug = false
    
    def buildFetchRequest(offset: Long, fetchSize: Int) = {
      val requestInfos = Map(
        TopicAndPartition(topic, partition) -> PartitionFetchInfo(offset, fetchSize)
      )

      new FetchRequest(
        clientId    = clientId,
        replicaId   = Request.DebuggingConsumerId,  // Request.OrdinaryConsumerId
        maxWait     = 100,
        minBytes    = FetchRequest.DefaultMinBytes,
        requestInfo = requestInfos
      )
    }

    val thread = Utils.newThread(s"$clientId-thread", new Runnable() {
      def run() {
        var offset = resetOffset(topic, partition)
        try {
          while(numMessagesConsumed < maxMessages) {
            val fetchRequest  = buildFetchRequest(offset, 1000000)
            val fetchResponse = consumer.fetch(fetchRequest)
            val messageSet    = fetchResponse.messageSet(topic, partition)
            if (messageSet.validBytes <= 0) {
              if (printDebug) println(s"messageSet not found. exiting".yellow)
              return
            } else {
              if (printDebug) println(s"fetchResponse=$fetchResponse, $messageSet".green)
            }
            if (printDebug) println("multi fetched " + messageSet.sizeInBytes + " bytes from offset " + offset)
            for(messageAndOffset <- messageSet if(numMessagesConsumed < maxMessages)) {
              try {
                offset = messageAndOffset.nextOffset
                if(printDebug) println("next offset = " + offset)
                val message = messageAndOffset.message
                val key = if(message.hasKey) Utils.readBytes(message.key) else null
                numMessagesConsumed += 1
              } catch {
                case e: Throwable =>
                  if (skipMessageOnError)
                    error("Error processing message, skipping this message: ", e)
                  else
                    throw e
              }
              if(System.out.checkError()) {
                // This means no one is listening to our output stream any more, time to shutdown
                System.err.println("Unable to write to standard out, closing consumer.")
                consumer.close()
                throw new RuntimeException("")
              }
            }
          }
        } catch {
          case e: Throwable =>
            error("Error consuming topic, partition, replica (%s, %d, %d) with offset [%d]".format(topic, partition, replicaId, offset), e)
        }finally {
          info("Consumed " + numMessagesConsumed + " messages")
        }
      }
    }, false)

    thread.start()
    thread.join()
    System.out.flush()
    consumer.close()

    numMessagesConsumed
  }

  private def resetOffset(topic: String, partition: Int): Long =
    using(newSimpleConsumer) { _.earliestOrLatestOffset(TopicAndPartition(topic, partition), -2, Request.DebuggingConsumerId) }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy