Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.sumologic.elasticsearch.akkahelpers
import akka.actor.FSM.Failure
import akka.actor.{FSM, Props}
import akka.pattern.pipe
import akka.stream.actor.ActorPublisher
import com.sumologic.elasticsearch.akkahelpers.ScanAndScrollSource.{ScanData, ScanState}
import com.sumologic.elasticsearch.restlastic.RestlasticSearchClient.ReturnTypes.{ScrollId, SearchResponse}
import com.sumologic.elasticsearch.restlastic.ScrollClient
import com.sumologic.elasticsearch.restlastic.dsl.Dsl._
import org.slf4j.LoggerFactory
import scala.concurrent.ExecutionContext
/**
* ScanAndScrollSource wraps Elasticsearch's Scroll API as a akka-streams source. By creating and subscribing to this source,
* you will get a stream of every message in Elasticsearch matching your query. Internally, the messages are batched. The batch
* size is configurable by setting a size parameter on the query. Externally, results are streamed message-by-message.
*
* The Source will only continue getting more results from Elasticsearch will downstream is consuming. In the future, we may need two enhancements:
* - Keep alive messages to keep the source alive >1m
* - Buffering of more messages
* @param index Index to search
* @param tpe Type to search
* @param query Query -- probably want MatchAll. You can also specify that batch size
* @param scrollSource Raw ES scroll interface
*/
class ScanAndScrollSource(index: Index, tpe: Type,
query: QueryRoot,
scrollSource: ScrollClient,
sizeOpt: Option[Int] = None,
executionContext: ExecutionContext = ExecutionContext.Implicits.global)
extends ActorPublisher[SearchResponse]
with FSM[ScanState, ScanData] {
import akka.stream.actor.ActorPublisherMessage
import ScanAndScrollSource._
implicit val ec: ExecutionContext = executionContext
val logger = LoggerFactory.getLogger(ScanAndScrollSource.getClass)
override def preStart(): Unit = {
scrollSource.startScrollRequest(index, tpe, query, sizeOpt = sizeOpt).map { case (scrollId, newData) =>
GotData(scrollId, newData)
}.recover(recovery).pipeTo(self)
startWith(Starting, FirstScroll)
}
when(Starting) {
case Event(ActorPublisherMessage.Request(_), _) => stay()
case Event(ActorPublisherMessage.Cancel, _) => stop()
case Event(GotData(nextId, data), FirstScroll) =>
consumeIfNotComplete(nextId, data)
case Event(ScrollFailure(ex), _) =>
onError(ScrollFailureException("Failed to start the scroll", ex))
stop(Failure(ex))
}
when(Running) {
case Event(ActorPublisherMessage.Request(_), WithIdAndData(id, data)) =>
consumeIfPossible(id, data)
case Event(ActorPublisherMessage.Request(_), WaitingForDataWithId(id)) =>
// Nothing to do, just waiting for data
stay()
case Event(ActorPublisherMessage.Cancel, _) =>
// TODO: cancel the scroll
stop()
case Event(ScrollFailure(ex), _) =>
onError(ScrollFailureException("Failure while running the scroll", ex))
stop(Failure(ex))
case Event(GotData(nextId, data), WaitingForDataWithId(currentId)) =>
consumeIfNotComplete(nextId, data)
}
whenUnhandled {
case Event(otherEvent, otherData) =>
logger.warn(s"Unhandled event: $otherEvent, $otherData")
stay()
}
private def consumeIfNotComplete(nextId: ScrollId, data: SearchResponse) = {
if (data.length == 0) {
onComplete()
stop()
} else {
consumeIfPossible(nextId, data)
}
}
private def consumeIfPossible(id: ScrollId, data: SearchResponse) = {
if (totalDemand > 0) {
onNext(data)
requestMore(id)
goto(Running) using WaitingForDataWithId(id)
} else {
goto(Running) using WithIdAndData(id, data)
}
}
private def requestMore(id: ScrollId) = {
scrollSource.scroll(id).map { case (scrollId, newData) =>
GotData(scrollId, newData)
}.recover(recovery).pipeTo(self)
goto(Running) using WaitingForDataWithId(id)
}
private val recovery: PartialFunction[Throwable, ScrollFailure] = {
case ex => ScrollFailure(ex)
}
}
object ScanAndScrollSource {
def props(index: Index,
tpe: Type,
query: QueryRoot,
scrollSource: ScrollClient,
sizeOpt: Option[Int] = None,
executionContext: ExecutionContext = ExecutionContext.Implicits.global) = {
Props(new ScanAndScrollSource(index, tpe, query, scrollSource, sizeOpt, executionContext))
}
case class ScrollFailureException(message: String, cause: Throwable) extends Exception(message, cause)
sealed trait ScanState
case object Starting extends ScanState
case object Running extends ScanState
sealed trait ScanData
case object FirstScroll extends ScanData
case class WaitingForDataWithId(scrollId: ScrollId) extends ScanData
case class WithIdAndData(scrollId: ScrollId, data: SearchResponse) extends ScanData
case class GotData(nextScrollId: ScrollId, data: SearchResponse)
case class ScrollFailure(cause: Throwable)
}