io.keen.client.scala.BatchWriterClient.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of keenclient-scala_2.11 Show documentation
Show all versions of keenclient-scala_2.11 Show documentation
Keen IO SDK/client library for Scala
The newest version!
package io.keen.client.scala
import java.util.concurrent.{ Executors, ScheduledThreadPoolExecutor, TimeUnit }
import scala.collection.mutable.ListBuffer
import scala.concurrent.Await
import scala.concurrent.duration._
import com.typesafe.config.{ Config, ConfigFactory }
object BatchWriterClient {
val MinSendIntervalEvents: Long = 100
val MaxSendIntervalEvents: Long = 10000
val MinSendInterval: FiniteDuration = 60.seconds
val MaxSendInterval: FiniteDuration = 1.hour
}
/**
* A BatchWriterClient is a [[Client]] specialized with the capability to write
* events to the Keen API in batches per request.
*
* Each instance creates a threadpool and schedules flush operations on it, which
* will make bulk write calls to the Keen IO API for batches of events until the
* queue is drained.
*
* Events are queued for batch submission with [[queueEvent]]; other operations
* like [[addEvent]] function as with an ordinary `Client with Writer`—that is,
* they are non-blocking but effect discrete API calls per invocation.
*
* Batch size, flush scheduling, queue bounds, etc. can be tuned via the settings
* under the `keen.queue` property tree.
*
* @todo Explain the difference in behavior if send-interval is zero seconds.
*
* @param config Client configuration, by default loaded from `application.conf`.
*/
class BatchWriterClient(config: Config = ConfigFactory.load())
extends Client(config) with Writer {
import BatchWriterClient._
/** @see [[Settings#batchSize]] */
val batchSize: Integer = settings.batchSize
/** @see [[Settings#batchTimeout]] */
val batchTimeout: FiniteDuration = settings.batchTimeout
/** @see [[Settings#sendIntervalEvents]] */
val sendIntervalEvents: Integer = settings.sendIntervalEvents
/** @see [[Settings#sendIntervalDuration]] */
val sendInterval: FiniteDuration = settings.sendIntervalDuration
/** @see [[Settings#shutdownDelay]] */
val shutdownDelay: FiniteDuration = settings.shutdownDelay
// initialize and configure our local event store queue
// FIXME: should be protected, but tests need to be updated; the EventStore
// should be an injectable component like HttpAdapter (though the injection
// pattern still needs work on that too)
val eventStore: EventStore = new RamEventStore
eventStore.maxEventsPerCollection = settings.maxEventsPerCollection
// Schedule sending of queued events.
protected val scheduledThreadPool: Option[ScheduledThreadPoolExecutor] = scheduleSendQueuedEvents()
/**
* Queue an event for batched publishing.
*
* @param collection The collection to which the event will be added.
* @param event The event
*/
def queueEvent(collection: String, event: String): Unit = {
// bypass min/max intervals for testing
// FIXME: There are less kludgey ways to achieve testability here
environment match {
case Some("test") if Some("test").get matches "(?i)test" =>
case _ =>
require(
sendIntervalEvents == 0 || (sendIntervalEvents >= MinSendIntervalEvents && sendIntervalEvents <= MaxSendIntervalEvents),
s"Send events interval must be between $MinSendIntervalEvents and $MaxSendIntervalEvents"
)
}
eventStore.store(projectId, collection, event)
// If we've met a configured event count threshold, flush the queue.
if (sendIntervalEvents != 0 && eventStore.size >= sendIntervalEvents) {
sendQueuedEventsAsync()
}
}
/**
* Schedule periodic sending of queued events, on a threadpool.
*/
private def scheduleSendQueuedEvents(): Option[ScheduledThreadPoolExecutor] = {
// bypass min/max intervals for testing
environment match {
case Some("test") if Some("test").get matches "(?i)test" =>
case _ =>
require(
sendInterval.toSeconds == 0 || (sendInterval >= MinSendInterval && sendInterval <= MaxSendInterval),
s"Send interval must be between $MinSendInterval and $MaxSendInterval"
)
}
// send queued events every n seconds
sendInterval.toSeconds match {
case n if n <= 0 => None // TODO: document what config value of zero means
case _ =>
// use a thread pool for our scheduled threads so we can use daemon threads
val tp = Executors.newScheduledThreadPool(1, new ClientThreadFactory).asInstanceOf[ScheduledThreadPoolExecutor]
// schedule sending from our thread pool at a specific interval
tp.scheduleWithFixedDelay(new Runnable {
def run(): Unit = {
try {
sendQueuedEvents()
} catch {
case ex: Throwable =>
error("Failed to send queued events")
error(s"$ex")
}
}
}, 1, sendInterval.toMillis, TimeUnit.MILLISECONDS)
Some(tp)
}
}
/**
* Flush queued events, removing them from the queue as they are successfully sent.
*/
def sendQueuedEvents(): Unit = {
val handleMap = eventStore.getHandles(projectId)
val handles = ListBuffer.empty[Long]
val events = ListBuffer.empty[String]
// iterate over all of the event handles in the queue, by collection
for ((collection, eventHandles) <- handleMap) {
// get each event, and its handle, then add it to a buffer so we can group the events
// into smaller batches
for (handle <- eventHandles) {
handles += handle
events += eventStore.get(handle)
}
// group handles separately so we can use them to remove events from the queue once they've
// been successfully added
val handleGroup: List[ListBuffer[Long]] = handles.grouped(batchSize).toList
// group the events by batch size, then publish them
for ((batch, index) <- events.grouped(batchSize).zipWithIndex) {
// publish this batch
// TODO: Try to avoid this blocking, see https://github.com/keenlabs/KeenClient-Scala/pull/45
var response = Await.result(
addEvents(s"""{"$collection": [${batch.mkString(",")}]}"""),
batchTimeout
)
// handle addEvents responses properly
response.statusCode match {
case 200 | 201 =>
info(s"""${response.statusCode} ${response.body} | Sent ${batch.size} queued events""")
// remove all of the handles for this batch
for (handle <- handleGroup(index)) {
eventStore.remove(handle)
}
info(s"""Removed ${handleGroup(index).size} events from the queue""")
// log but DO NOT remove events from queue
case _ => error(s"""${response.statusCode} ${response.body} | Failed to send ${batch.size} queued events""")
}
}
}
}
/**
* Flush queued events, sending them to Keen IO on a background thread.
*/
def sendQueuedEventsAsync(): Unit = {
// use a thread pool for our async thread so we can use daemon threads
// TODO: Do we really want a daemon thread?
val tp = Executors.newSingleThreadExecutor(new ClientThreadFactory)
// send our queued events in a separate thread
tp.execute(new Runnable {
def run(): Unit = sendQueuedEvents()
})
}
/**
* Shut down the threadpool for flushing batch writes, before a final flush of
* all events remaining in the queue, run on the main thread.
*/
override def shutdown() = {
// Shut down the threadpool, if there is one.
scheduledThreadPool foreach { pool =>
pool.shutdown()
val terminated = pool.awaitTermination(shutdownDelay.toMillis, TimeUnit.MILLISECONDS)
if (!terminated) { error("Failed to shutdown scheduled thread pool") }
}
sendQueuedEvents() // flush the queue, on the main thread
super.shutdown()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy