com.softwaremill.react.kafka.ConsumerProperties.scala Maven / Gradle / Ivy
package com.softwaremill.react.kafka
import java.util.Properties
import kafka.consumer.ConsumerConfig
import kafka.serializer.{DefaultDecoder, Decoder}
object ConsumerProperties {
/**
* Consumer Properties
*
* brokerList
* This is for bootstrapping and the producer will only use it for getting metadata (topics, partitions and replicas).
* The socket connections for sending the actual data will be established based on the broker information returned in
* the metadata. The format is host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a
* subset of brokers.
*
* zooKeeperHost
* Specifies the zookeeper connection string in the form hostname:port where host and port are the host and port of
* a zookeeper server. To allow connecting through other zookeeper nodes when that zookeeper machine is down you can also
* specify multiple hosts in the form hostname1:port1,hostname2:port2,hostname3:port3. The server may also have a zookeeper
* chroot path as part of it's zookeeper connection string which puts its data under some path in the global zookeeper namespace.
* If so the consumer should use the same chroot path in its connection string. For example to give a chroot path of /chroot/path
* you would give the connection string as hostname1:port1,hostname2:port2,hostname3:port3/chroot/path.
*
* topic
* The high-level API hides the details of brokers from the consumer and allows consuming off the cluster of machines
* without concern for the underlying topology. It also maintains the state of what has been consumed. The high-level API
* also provides the ability to subscribe to topics that match a filter expression (i.e., either a whitelist or a blacklist
* regular expression). This topic is a whitelist only but can change with re-factoring below on the filterSpec
*
* groupId
* A string that uniquely identifies the group of consumer processes to which this consumer belongs. By setting the same
* group id multiple processes indicate that they are all part of the same consumer group.
*
*/
def apply[K, V](
brokerList: String,
zooKeeperHost: String,
topic: String,
groupId: String,
decoder: Decoder[V],
keyDecoder: Decoder[K] = new Decoder[Array[Byte]] {
override def fromBytes(bytes: Array[Byte]): Array[Byte] = bytes
}
): ConsumerProperties[K, V] = {
val props = Map[String, String](
"metadata.broker.list" -> brokerList,
"group.id" -> groupId,
"zookeeper.connect" -> zooKeeperHost,
// defaults
"auto.offset.reset" -> "smallest",
"consumer.timeout.ms" -> "1500",
"offsets.storage" -> "zookeeper"
)
new ConsumerProperties(props, topic, groupId, decoder, keyDecoder)
}
}
case class ConsumerProperties[K, V](
params: Map[String, String],
topic: String,
groupId: String,
decoder: Decoder[V],
keyDecoder: Decoder[K]
) {
/**
* Consumer Timeout
* Throw a timeout exception to the consumer if no message is available for consumption after the specified interval
*/
def consumerTimeoutMs(timeInMs: Long): ConsumerProperties[K, V] =
copy(params = params + ("consumer.timeout.ms" -> timeInMs.toString))
/**
* What to do when there is no initial offset in Zookeeper or if an offset is out of range:
* 1) smallest : automatically reset the offset to the smallest offset
* 2) largest : automatically reset the offset to the largest offset
* 3) anything else: throw exception to the consumer. If this is set to largest, the consumer may lose some
* messages when the number of partitions, for the topics it subscribes to, changes on the broker.
*
* ***************************************************************************************
* To prevent data loss during partition addition, set auto.offset.reset to smallest
*
* This make sense to change to true if you know you are listening for new data only as of
* after you connect to the stream new things are coming out. you can audit/reconcile in
* another consumer which this flag allows you to toggle if it is catch-up and new stuff or
* just new stuff coming out of the stream. This will also block waiting for new stuff so
* it makes a good listener.
*
* //readFromStartOfStream: Boolean = true
* readFromStartOfStream: Boolean = false
* ***************************************************************************************
*
*/
def readFromEndOfStream(): ConsumerProperties[K, V] = copy(params = params + ("auto.offset.reset" -> "largest"))
/**
* Store offsets in Kafka and/or ZooKeeper. NOTE: Server instance must be 8.2 or higher
*
* dualCommit = true means store in both ZooKeeper(legacy) and Kafka(new) places.
*/
def kafkaOffsetsStorage(dualCommit: Boolean): ConsumerProperties[K, V] = {
val p = params + (
"offsets.storage" -> "kafka",
"dual.commit.enabled" -> dualCommit.toString
)
copy(params = p)
}
/**
* Set any additional properties as needed
*/
def setProperty(key: String, value: String): ConsumerProperties[K, V] = copy(params = params + (key -> value))
def setProperties(values: (String, String)*): ConsumerProperties[K, V] = copy(params = params ++ values)
/**
* Generate the Kafka ConsumerConfig object
*
*/
def toConsumerConfig: ConsumerConfig = {
new ConsumerConfig(params.foldLeft(new Properties()) { (props, param) => props.put(param._1, param._2); props })
}
// accessors
def zookeeperConnect: Option[String] = params.get("zookeeper.connect")
/**
* Dump current props for debugging
*/
def dump: String = params.map { e => f"${e._1}%-20s : ${e._2.toString}" }.mkString("\n")
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy