All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sumologic.elasticsearch.akkahelpers.BulkIndexerActor.scala Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.sumologic.elasticsearch.akkahelpers

import akka.actor.{Actor, ActorRef, Props}
import akka.pattern.pipe
import com.sumologic.elasticsearch.restlastic.RestlasticSearchClient
import com.sumologic.elasticsearch.restlastic.RestlasticSearchClient.ReturnTypes.BulkItem
import com.sumologic.elasticsearch.restlastic.dsl.Dsl._
import com.sumologic.elasticsearch.util.InstrumentationContext
import org.slf4j.LoggerFactory

import scala.concurrent.duration.FiniteDuration
import scala.util.Random

case class BulkConfig(flushDuration: () => FiniteDuration, maxDocuments: () => Int)

/**
 * The BulkIndexerActor provides a non-bulk (message-by-message) abstraction on top of the ElasticSearch Bulk API.
 *
 * Every message sent will either return `IndexingComplete` or `IndexingFailure`
 *
 * Note: The BulkIndexerActor does not implement it's own backpressure. The client must account for the number of messages
 * in flight to control memory usage.
 * @param restlasticSearchClient Underlying ElasticsearchClient supporting the bulk API
 * @param bulkConfig Flushing configuration
 */
class BulkIndexerActor(restlasticSearchClient: RestlasticSearchClient, bulkConfig: BulkConfig) extends Actor {
  import BulkIndexerActor._

  val logger = LoggerFactory.getLogger(BulkIndexerActor.getClass)

  case class OpWithTarget(operation: BulkOperation, target: ActorRef, sessionId: BulkSession)
  var queue = List.empty[OpWithTarget]

  import scala.concurrent.ExecutionContext.Implicits.global

  private def resetTimer() = context.system.scheduler.scheduleOnce(bulkConfig.flushDuration(), self, ForceFlush)
  var flushTimer = resetTimer()

  override def receive: Receive = {

    case CreateRequest(sess, index, tpe, doc) =>
      queue = OpWithTarget(BulkOperation(create, Some(index -> tpe), doc), sender(), sess) :: queue
      if (queueFull) flush()

    case IndexRequest(sess, indexName, tpe, doc) =>
      queue = OpWithTarget(BulkOperation(index, Some(indexName -> tpe), doc), sender(), sess) :: queue
      if (queueFull) flush()

    case UpdateRequest(sess, index, tpe, doc) =>
      queue = OpWithTarget(BulkOperation(update, Some(index -> tpe), doc), sender(), sess) :: queue
      if (queueFull) flush()

    case ForceFlush => flush()

    case IndexingComplete(items) =>
        items.foreach { case (item, target, sess) =>
          target ! DocumentIndexed(sess, item)
        }

    case IndexingFailure(cause, actorsAndSessions) =>
        actorsAndSessions.foreach { case (target, sess) =>
          target ! DocumentNotIndexed(sess, cause)
        }
  }

  private def now = System.currentTimeMillis()
  private def ago(millis: Long) = now - millis

  private def flush(): Unit = {
    if (queue.isEmpty) {
      flushTimer = resetTimer()
      return
    }

    val instrument = new InstrumentationContext
    flushTimer.cancel()
    val messages = queue

    queue = List.empty[OpWithTarget]
    val bulkRequest = instrument.measureAction("createRequest") {
      Bulk(messages.map(_.operation))
    }
    val startTime = now
    val respFuture = instrument.measureAction("sendRequest") {
      val respMb = bulkRequest.toJsonStr.length / 1024.0 / 1024
      logger.info(f"Flushing ${messages.length} messages ($respMb%.2fMB).")
      restlasticSearchClient.bulkIndex(bulkRequest).map { items =>
        if (items.length == messages.length) {
          val zipped = items.zip(messages).map { case (item, optarget) => (item, optarget.target, optarget.sessionId) }
          logger.info(f"Flushed ${messages.length} messages ($respMb%.2fMB). esTimeMs=${ago(startTime)}; ${instrument.measurementsString}")
          IndexingComplete(zipped)
        } else {
          logger.error(s"Number of responses ${items.length} != requests(${messages.length})")
          IndexingFailure(new IllegalStateException("Bad response count"), messages.map(op => op.target -> op.sessionId))
        }
      }.recover { case ex: Throwable =>
        logger.warn(s"Failed to flush ${messages.length} messages. esTimeMs=${ago(startTime)}; ${instrument.measurementsString}", ex)
        IndexingFailure(ex, messages.map(op => op.target -> op.sessionId))
      }
    }

    flushTimer = resetTimer()
    respFuture pipeTo self
  }

  private def queueFull: Boolean = queue.size >= bulkConfig.maxDocuments()
}

object BulkIndexerActor {

  def props(searchClient: RestlasticSearchClient, config: BulkConfig) = Props(new BulkIndexerActor(searchClient, config))

  // Messages
  case class CreateRequest(sessionId: BulkSession, index: Index, tpe: Type, doc: Document)
  case class IndexRequest(sessionId: BulkSession, index: Index, tpe: Type, doc: Document)
  case class UpdateRequest(sessionId: BulkSession, index: Index, tpe: Type, doc: Document)
  case object ForceFlush

  // Replies
  case class DocumentIndexed(sessionId: BulkSession, doc: BulkItem)
  case class DocumentNotIndexed(sessionId: BulkSession, cause: Throwable)

  case class BulkSession(id: Long)
  case object BulkSession {
    val r = new Random()
    def create() = BulkSession(r.nextLong())
  }

  private case class IndexingComplete(result: Seq[(BulkItem, ActorRef, BulkSession)])
  private case class IndexingFailure(cause: Throwable, replyInfo: Seq[(ActorRef, BulkSession)])
}






© 2015 - 2025 Weber Informatics LLC | Privacy Policy