com.yahoo.maha.service.MahaService.scala Maven / Gradle / Ivy
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.service
import java.nio.charset.StandardCharsets
import java.util.concurrent.Callable
import com.google.common.io.Closer
import com.yahoo.maha.core._
import com.yahoo.maha.core.bucketing.{BucketParams, BucketSelector, BucketingConfig}
import com.yahoo.maha.core.query._
import com.yahoo.maha.core.registry.{DimensionRegistrationFactory, FactRegistrationFactory, Registry, RegistryBuilder}
import com.yahoo.maha.core.request.ReportingRequest
import com.yahoo.maha.log.MahaRequestLogWriter
import com.yahoo.maha.parrequest2.future.{ParRequest, ParallelServiceExecutor}
import com.yahoo.maha.parrequest2.{GeneralError, ParCallable}
import com.yahoo.maha.service.config.JsonMahaServiceConfig._
import com.yahoo.maha.service.config._
import com.yahoo.maha.service.config.dynamic.DynamicConfigurationUtils._
import com.yahoo.maha.service.config.dynamic.{DynamicConfigurations, DynamicPropertyInfo}
import com.yahoo.maha.service.curators.Curator
import com.yahoo.maha.service.error._
import com.yahoo.maha.service.factory._
import com.yahoo.maha.service.request._
import com.yahoo.maha.service.utils.BaseMahaRequestLogBuilder
import grizzled.slf4j.Logging
import javax.sql.DataSource
import org.json4s.JValue
import org.json4s.JsonAST.JObject
import org.json4s.jackson.JsonMethods.parse
import org.json4s.scalaz.JsonScalaz._
import scalaz.Validation.FlatMap._
import scalaz.syntax.validation._
import scalaz.{Failure, ValidationNel, _}
import scala.collection.mutable
import scala.util.Try
/**
* Created by hiral on 5/26/17.
*/
class DynamicRegistryConfig(private var _name: String,
private var _registry: Registry,
private var _queryPipelineFactory: QueryPipelineFactory,
private var _queryExecutorContext: QueryExecutorContext,
private var _bucketSelector: BucketSelector,
private var _utcTimeProvider: UTCTimeProvider,
private var _parallelServiceExecutor: ParallelServiceExecutor,
private var _userTimeZoneProvider: UserTimeZoneProvider
)
extends RegistryConfig {
def updateRegistry(registry: Registry): Unit = {
this._registry = registry
}
def updateQueryPipelineFactory(queryPipelineFactory: QueryPipelineFactory): Unit = {
this._queryPipelineFactory = queryPipelineFactory
}
def updateQueryExecutorContext(queryExecutorContext: QueryExecutorContext): Unit = {
this._queryExecutorContext = queryExecutorContext
}
def updateBucketSelector(bucketSelector: BucketSelector): Unit = {
this._bucketSelector = bucketSelector
}
def updateUTCTimeProvider(utcTimeProvider: UTCTimeProvider): Unit = {
this._utcTimeProvider = utcTimeProvider
}
def updateParallelServiceExecutor(parallelServiceExecutor: ParallelServiceExecutor): Unit = {
this._parallelServiceExecutor = parallelServiceExecutor
}
def updateUserTimeZoneProvider(userTimeZoneProvider: UserTimeZoneProvider): Unit = {
this._userTimeZoneProvider = userTimeZoneProvider
}
override def name = _name
override def registry = _registry
override def queryPipelineFactory = _queryPipelineFactory
override def queryExecutorContext = _queryExecutorContext
override def bucketSelector = _bucketSelector
override def utcTimeProvider = _utcTimeProvider
override def parallelServiceExecutor = _parallelServiceExecutor
override def userTimeZoneProvider: UserTimeZoneProvider = _userTimeZoneProvider
}
trait RegistryConfig {
def name: String
def registry: Registry
def queryPipelineFactory: QueryPipelineFactory
def queryExecutorContext: QueryExecutorContext
def bucketSelector: BucketSelector
def userTimeZoneProvider: UserTimeZoneProvider
def utcTimeProvider: UTCTimeProvider
def parallelServiceExecutor: ParallelServiceExecutor
}
trait MahaServiceConfig {
def context: MahaServiceConfigContext
def registry: Map[String, RegistryConfig]
def mahaRequestLogWriter: MahaRequestLogWriter
def curatorMap: Map[String, Curator]
}
case class DefaultRegistryConfig(name: String, registry: Registry, queryPipelineFactory: QueryPipelineFactory
, queryExecutorContext: QueryExecutorContext, bucketSelector: BucketSelector
, utcTimeProvider: UTCTimeProvider, parallelServiceExecutor: ParallelServiceExecutor
, userTimeZoneProvider: UserTimeZoneProvider
) extends RegistryConfig
case class DefaultMahaServiceConfig(context: MahaServiceConfigContext, registry: Map[String, RegistryConfig], mahaRequestLogWriter: MahaRequestLogWriter, curatorMap: Map[String, Curator]) extends MahaServiceConfig
case class RequestResult(queryPipelineResult: QueryPipelineResult)
case class ParRequestResult(queryPipeline: Try[QueryPipeline], prodRun: ParRequest[RequestResult], dryRunOption: Option[ParRequest[RequestResult]])
trait MahaService {
/**
* validate a registry name
* @param name
* @return
*/
def isValidRegistry(name: String): Boolean
/*
Kafka logger for every Maha Reporting Request
*/
def mahaRequestLogWriter: MahaRequestLogWriter
/**
* Generates own model, executes request, logs failures with request log builder
*/
def processRequest(registryName: String
, reportingRequest: ReportingRequest
, bucketParams: BucketParams
, mahaRequestLogBuilder: BaseMahaRequestLogBuilder): Either[GeneralError, RequestResult]
/**
* Generates own model, create ParRequestResult, logs failures with request log builder
*/
def executeRequest(registryName: String
, reportingRequest: ReportingRequest
, bucketParams: BucketParams
, mahaRequestLogBuilder: BaseMahaRequestLogBuilder): ParRequestResult
/*
Generates the RequestModelResult for given ReportingRequest
*/
def generateRequestModel(registryName: String,
reportingRequest: ReportingRequest,
bucketParams: BucketParams): Try[RequestModelResult]
/**
* generate query pipeline from model and bucketSelector
* @param registryName
* @param requestModel
* @return
*/
def generateQueryPipelines(registryName: String
, requestModel: RequestModel, bucketParams: BucketParams): (Try[QueryPipeline], Option[Try[QueryPipeline]])
/**
* Executes the RequestModel and provide the RequestResult, logs failures with request log builder
*/
def processRequestModel(registryName: String,
requestModel: RequestModel,
mahaRequestLogBuilder: BaseMahaRequestLogBuilder): Either[GeneralError, RequestResult]
/**
* Async execution, returns ParRequests for given RequestModel, logs failures with request log builder
*/
def executeRequestModelResult(registryName: String,
requestModelResult: RequestModelResult,
mahaRequestLogBuilder: BaseMahaRequestLogBuilder): ParRequestResult
def getCubes(registryName: String) : Option[String]
def getDomain(registryName: String) : Option[String]
def getDomainForCube(registryName: String, cube : String) : Option[String]
def getFlattenDomain(registryName: String) : Option[String]
def getFlattenDomainForCube(registryName: String, cube : String, revision: Option[Int]= None) : Option[String]
def getRevisionedDomain(registryName: String): Option[String]
/*
Defines list of engines which are not capable calculating the totalRowCount in one run
*/
def rowCountIncomputableEngineSet: Set[Engine]
def getMahaServiceConfig: MahaServiceConfig
/**
* Note, it is assumed the context has been validated already, e.g. registry name is valid
* @param mahaRequestContext
* @return
*/
def getParallelServiceExecutor(mahaRequestContext: MahaRequestContext) : ParallelServiceExecutor
protected def validateRegistry(mahaRequestContext: MahaRequestContext): Unit = {
validateRegistry(mahaRequestContext.registryName)
}
protected def validateRegistry(name: String): Unit = {
require(isValidRegistry(name), s"Unknown registry : $name")
}
}
case class DefaultMahaService(config: MahaServiceConfig) extends MahaService with Logging {
override def isValidRegistry(name: String): Boolean = config.registry.contains(name)
override val mahaRequestLogWriter: MahaRequestLogWriter = config.mahaRequestLogWriter
val rowCountIncomputableEngineSet: Set[Engine] = Set(DruidEngine)
override def getMahaServiceConfig: MahaServiceConfig = config
/**
* Generates own model and create ParRequestResult and invoke the execution
*/
override def processRequest(registryName: String
, reportingRequest: ReportingRequest
, bucketParams: BucketParams
, mahaRequestLogBuilder: BaseMahaRequestLogBuilder): Either[GeneralError, RequestResult] = {
val requestModelResultTry = generateRequestModel(registryName, reportingRequest, bucketParams)
if (requestModelResultTry.isFailure) {
val message = "Failed to create Report Model:"
val error = requestModelResultTry.failed.toOption
mahaRequestLogBuilder.logFailed(message)
throw new MahaServiceBadRequestException(message, error)
}
val requestModelResult = requestModelResultTry.get
val result = processRequestModel(registryName, requestModelResult.model, mahaRequestLogBuilder)
Try(
requestModelResult
.dryRunModelTry
.foreach(
_.toOption
.foreach( model => asParRequest(registryName, model, "dryRunProcessRequest", mahaRequestLogBuilder))
)
)
result
}
/**
* Generates own model, create ParRequestResult. Invocation is left on the user's implementation details.
*/
override def executeRequest(registryName: String
, reportingRequest: ReportingRequest
, bucketParams: BucketParams
, mahaRequestLogBuilder: BaseMahaRequestLogBuilder): ParRequestResult = {
val parLabel = "executeRequest"
val requestModelResultTry = generateRequestModel(registryName, reportingRequest, bucketParams)
if (requestModelResultTry.isFailure) {
val message = "Failed to create Report Model:"
val error = requestModelResultTry.failed.toOption
mahaRequestLogBuilder.logFailed(message)
logger.error(message, error.get)
throw new MahaServiceBadRequestException(message, error)
}
val requestModelResult = requestModelResultTry.get
val reportingModel = requestModelResult.model
val (queryPipelineTry, finalResult) = asParRequest(registryName, reportingModel, parLabel, mahaRequestLogBuilder)
val dryRunResult = {
if (requestModelResult.dryRunModelTry.isDefined &&
requestModelResult.dryRunModelTry.get.isSuccess) {
val dryRunResult = asParRequest(registryName, requestModelResult.dryRunModelTry.get.get, parLabel, mahaRequestLogBuilder.dryRun())
Option(dryRunResult._2)
} else None
}
ParRequestResult(queryPipelineTry, finalResult, dryRunResult)
}
override def generateRequestModel(registryName: String, reportingRequest: ReportingRequest, bucketParams: BucketParams): Try[RequestModelResult] = {
validateRegistry(registryName)
val registryConfig = config.registry(registryName)
RequestModelFactory.fromBucketSelector(
reportingRequest, bucketParams, registryConfig.registry, registryConfig.bucketSelector
, utcTimeProvider = registryConfig.utcTimeProvider, userTimeZoneProvider = registryConfig.userTimeZoneProvider
)
}
/**
* Executes the RequestModel and provide the RequestResult
*/
override def processRequestModel(registryName: String, requestModel: RequestModel, mahaRequestLogBuilder: BaseMahaRequestLogBuilder):Either[GeneralError, RequestResult] = {
val parLabel = "processRequestModel"
asRequest(registryName, requestModel, parLabel, mahaRequestLogBuilder)
}
/**
* Prepare the ParRequests for given RequestModel. Invocation is left on the user's implementation details.
*/
override def executeRequestModelResult(registryName: String, requestModelResult: RequestModelResult, mahaRequestLogBuilder: BaseMahaRequestLogBuilder): ParRequestResult = {
val parLabel = "executeRequestModelResult"
val reportingModel = requestModelResult.model
val (queryPipelineTry, finalResult) = asParRequest(registryName, reportingModel, parLabel, mahaRequestLogBuilder)
val dryRunResult = {
if (requestModelResult.dryRunModelTry.isDefined &&
requestModelResult.dryRunModelTry.get.isSuccess) {
val dryRunResult = asParRequest(registryName, requestModelResult.dryRunModelTry.get.get, parLabel, mahaRequestLogBuilder.dryRun())
Option(dryRunResult._2)
} else None
}
ParRequestResult(queryPipelineTry, finalResult, dryRunResult)
}
def generateQueryPipelines(registryName: String,
requestModel: RequestModel, bucketParams: BucketParams): (Try[QueryPipeline], Option[Try[QueryPipeline]]) = {
val registryConfig = config.registry(registryName)
val queryPipelineFactory = registryConfig.queryPipelineFactory
val bucketSelector = registryConfig.bucketSelector
val forceQueryGenVersion: Option[Version] = bucketParams.forceQueryGenVersion
val shouldUseQueryGenSelector = if(forceQueryGenVersion.isDefined && forceQueryGenVersion.get != Version.DEFAULT) {
true
} else false
val queryPipelineTry = if(shouldUseQueryGenSelector) {
(queryPipelineFactory.fromQueryGenVersion(requestModel, QueryAttributes.empty, forceQueryGenVersion.get), None)
} else queryPipelineFactory.fromBucketSelector(requestModel, QueryAttributes.empty, bucketSelector, bucketParams)
queryPipelineTry
}
private def processQueryPipeline(registryConfig: RegistryConfig,
queryPipeline: QueryPipeline,
mahaRequestLogBuilder: BaseMahaRequestLogBuilder): Try[QueryPipelineResult] = {
mahaRequestLogBuilder.logQueryPipeline(queryPipeline)
val result = queryPipeline.execute(registryConfig.queryExecutorContext, QueryAttributes.empty)
if(result.isSuccess) {
val queryPipelineResult = result.get
mahaRequestLogBuilder.logQueryStats(queryPipelineResult.queryAttributes)
}
result
}
private def asRequest(registryName: String, requestModel: RequestModel, parRequestLabel: String, mahaRequestLogBuilder: BaseMahaRequestLogBuilder, bucketParams: BucketParams = BucketParams()): Either[GeneralError, RequestResult] = {
validateRegistry(registryName)
val registryConfig = config.registry(registryName)
val queryPipelineTry = generateQueryPipelines(registryName, requestModel, bucketParams)._1
if(queryPipelineTry.isFailure) {
val error = queryPipelineTry.failed.get
val message = s"Failed to compile the query pipeline ${error.getMessage}"
logger.error(message, error)
mahaRequestLogBuilder.logFailed(message)
GeneralError.either("createQueryPipeline", message, error)
} else {
val triedQueryPipelineResult = processQueryPipeline(registryConfig, queryPipelineTry.get, mahaRequestLogBuilder)
if (triedQueryPipelineResult.isFailure) {
val error = triedQueryPipelineResult.failed.get
val message = s"Failed to execute the query pipeline"
logger.error(message, error)
mahaRequestLogBuilder.logFailed(message)
GeneralError.either[RequestResult](parRequestLabel, message, new MahaServiceExecutionException(message, Some(error)))
} else {
new Right[GeneralError, RequestResult](RequestResult(triedQueryPipelineResult.get))
}
}
}
private def asParRequest(registryName: String, requestModel: RequestModel, parRequestLabel: String, mahaRequestLogBuilder: BaseMahaRequestLogBuilder, bucketParams: BucketParams = BucketParams()): (Try[QueryPipeline], ParRequest[RequestResult])= {
validateRegistry(registryName)
val registryConfig = config.registry(registryName)
val parallelServiceExecutor = registryConfig.parallelServiceExecutor
val queryPipelineTry = generateQueryPipelines(registryName, requestModel, bucketParams)._1
if(queryPipelineTry.isFailure) {
val error = queryPipelineTry.failed.get
val message = s"Failed to compile the query pipeline ${error.getMessage}"
logger.error(message, error)
mahaRequestLogBuilder.logFailed(message)
return (queryPipelineTry, parallelServiceExecutor.immediateResult("createParRequest", GeneralError.either("createQueryPipeline", message, error)))
} else {
val parRequest = parallelServiceExecutor.parRequestBuilder[RequestResult].setLabel(parRequestLabel).
setParCallable(ParCallable.from[Either[GeneralError, RequestResult]](
new Callable[Either[GeneralError, RequestResult]]() {
override def call(): Either[GeneralError, RequestResult] = {
val triedQueryPipelineResult = processQueryPipeline(registryConfig, queryPipelineTry.get, mahaRequestLogBuilder)
if (triedQueryPipelineResult.isFailure) {
val error = triedQueryPipelineResult.failed.get
val message = s"Failed to execute the query pipeline"
logger.error(message, error)
mahaRequestLogBuilder.logFailed(message)
GeneralError.either[RequestResult](parRequestLabel, message, new MahaServiceExecutionException(message, Some(error)))
} else {
new Right[GeneralError, RequestResult](RequestResult(triedQueryPipelineResult.get))
}
}
}
)).build()
(queryPipelineTry, parRequest)
}
}
override def getCubes(registryName: String) : Option[String] = {
if (config.registry.contains(registryName)) {
Some(config.registry(registryName).registry.cubesJson)
} else None
}
override def getDomain(registryName: String): Option[String] = {
if (config.registry.contains(registryName)) {
Some(config.registry(registryName).registry.domainJsonAsString)
} else None
}
override def getFlattenDomain(registryName: String): Option[String] = {
if (config.registry.contains(registryName)) {
Some(config.registry(registryName).registry.flattenDomainJsonAsString)
} else None
}
override def getDomainForCube(registryName: String, cube: String): Option[String] = {
if (config.registry.contains(registryName)) {
Some(config.registry(registryName).registry.getCubeJsonAsStringForCube(cube))
} else None
}
override def getFlattenDomainForCube(registryName: String, cube: String, revision: Option[Int]): Option[String] = {
if (config.registry.contains(registryName)) {
if(revision.isDefined) {
Some(config.registry(registryName).registry.getFlattenCubeJsonAsStringForCube(cube, revision.get))
} else {
Some(config.registry(registryName).registry.getFlattenCubeJsonAsStringForCube(cube))
}
} else None
}
override def getRevisionedDomain(registryName: String): Option[String] = {
if (config.registry.contains(registryName)) {
Some(config.registry(registryName).registry.versionedDomainJsonAsString)
} else None
}
/**
* Note, it is assumed the context has been validated already, e.g. registry name is valid
* @param mahaRequestContext
* @return
*/
override def getParallelServiceExecutor(mahaRequestContext: MahaRequestContext) : ParallelServiceExecutor = {
validateRegistry(mahaRequestContext)
config.registry(mahaRequestContext.registryName).parallelServiceExecutor
}
}
trait MahaServiceConfigContext {
def bucketConfigMap: Map[String, BucketingConfig]
def dataSourceMap: Map[String, DataSource]
def userTimeZoneProviderMap: Map[String, UserTimeZoneProvider]
def utcTimeProviderMap: Map[String, UTCTimeProvider]
def generatorMap: Map[String, QueryGenerator[_ <: EngineRequirement]]
def executorMap: Map[String, QueryExecutor]
def registryMap: Map[String, Registry]
def parallelServiceExecutorMap: Map[String, ParallelServiceExecutor]
def curatorMap: Map[String, Curator]
}
case class DefaultMahaServiceConfigContext(bucketConfigMap: Map[String, BucketingConfig] = Map.empty
, dataSourceMap: Map[String, DataSource] = Map.empty
, userTimeZoneProviderMap: Map[String, UserTimeZoneProvider] = Map.empty
, utcTimeProviderMap: Map[String, UTCTimeProvider] = Map.empty
, generatorMap: Map[String, QueryGenerator[_ <: EngineRequirement]] = Map.empty
, executorMap: Map[String, QueryExecutor] = Map.empty
, registryMap: Map[String, Registry] = Map.empty
, parallelServiceExecutorMap: Map[String, ParallelServiceExecutor] = Map.empty
, curatorMap: Map[String, Curator] = Map.empty
) extends MahaServiceConfigContext
object MahaServiceConfig {
private[this] val closer: Closer = Closer.create()
type MahaConfigResult[+A] = scalaz.ValidationNel[MahaServiceError, A]
implicit class Option2MahaConfigResult[A](option: Option[A]) {
def toMahaConfigResult(errFn: => MahaServiceError) : MahaConfigResult[A] = {
option.fold(errFn.failureNel[A])(a => a.successNel[MahaServiceError])
}
}
def fromJson(ba: Array[Byte]): MahaServiceConfig.MahaConfigResult[_<:MahaServiceConfig] = {
val json = {
Try(parse(new String(ba, StandardCharsets.UTF_8))) match {
case t if t.isSuccess => t.get
case t if t.isFailure => {
return Failure(JsonParseError(s"invalidInputJson : ${t.failed.get.getMessage}", Option(t.failed.toOption.get))).toValidationNel
}
}
}
val jsonMahaServiceConfigResult: ValidationNel[MahaServiceError, JsonMahaServiceConfig] = fromJSON[JsonMahaServiceConfig](json).leftMap {
nel => nel.map(err => JsonParseError(err.toString))
}
var defaultContext = DefaultMahaServiceConfigContext()
val mahaServiceConfig = for {
jsonMahaServiceConfig <- jsonMahaServiceConfigResult
validationResult <- validateReferenceByName(jsonMahaServiceConfig)
bucketConfigMap <- initBucketingConfig(jsonMahaServiceConfig.bucketingConfigMap)(defaultContext)
postBucketContext = defaultContext.copy(bucketConfigMap = bucketConfigMap)
dataSourceMap <- initDataSources(jsonMahaServiceConfig.datasourceMap)(postBucketContext)
postDataSourceContext = postBucketContext.copy(dataSourceMap = dataSourceMap)
userTimeZoneProviderMap <- initUserTimeZoneProvider(jsonMahaServiceConfig.userTimeZoneProviderMap)(postDataSourceContext)
postTimeZoneProviderContext = postDataSourceContext.copy(userTimeZoneProviderMap = userTimeZoneProviderMap)
utcTimeProviderMap <- initUTCTimeProvider(jsonMahaServiceConfig.utcTimeProviderMap)(postTimeZoneProviderContext)
postTimeProviderContext = postTimeZoneProviderContext.copy(utcTimeProviderMap = utcTimeProviderMap)
generatorMap <- initGenerators(jsonMahaServiceConfig.generatorMap)(postTimeProviderContext)
postGeneratorContext = postTimeProviderContext.copy(generatorMap = generatorMap)
executorMap <- initExecutors(jsonMahaServiceConfig.executorMap)(postGeneratorContext)
postExecutorContext = postGeneratorContext.copy(executorMap = executorMap)
parallelServiceExecutorConfig <- initParallelServiceExecutors(jsonMahaServiceConfig.parallelServiceExecutorConfigMap)(postExecutorContext)
postParallelServiceExecutorContext = postExecutorContext.copy(parallelServiceExecutorMap = parallelServiceExecutorConfig)
curatorMap <- initCurators(jsonMahaServiceConfig.curatorMap)(postParallelServiceExecutorContext)
postCuratorContext = postParallelServiceExecutorContext.copy(curatorMap = curatorMap)
registryMap <- initRegistry(jsonMahaServiceConfig.registryMap)(postCuratorContext)
postRegistryContext = postCuratorContext.copy(registryMap = registryMap)
mahaRequestLogWriter <- initKafkaLogWriter(jsonMahaServiceConfig.jsonMahaRequestLogConfig)(postCuratorContext)
} yield {
val resultMap: Map[String, _<:RegistryConfig] = registryMap.map {
case (regName, registry) => {
val registryConfig = jsonMahaServiceConfig.registryMap.get(regName.toLowerCase).get
implicit val queryGeneratorRegistry = new QueryGeneratorRegistry
generatorMap.filter(g => registryConfig.generators.contains(g._1)).foreach {
case (name, generator) =>
queryGeneratorRegistry.register(generator.engine, generator, generator.version)
}
val queryExecutorContext = new QueryExecutorContext
executorMap.filter(e => registryConfig.executors.contains(e._1)).foreach {
case (_, executor) =>
queryExecutorContext.register(executor)
}
(regName -> DefaultRegistryConfig(regName,
registry,
new DefaultQueryPipelineFactory(defaultFactEngine = registry.defaultFactEngine, druidMultiQueryEngineList = registry.druidMultiQueryEngineList),
queryExecutorContext,
new BucketSelector(registry, bucketConfigMap.get(registryConfig.bucketConfigName).get),
utcTimeProviderMap.get(registryConfig.utcTimeProviderName).get,
parallelServiceExecutorConfig.get(registryConfig.parallelServiceExecutorName).get,
userTimeZoneProviderMap.get(registryConfig.userTimeZoneProviderName).get
))
}
}
DefaultMahaServiceConfig(postCuratorContext, resultMap, mahaRequestLogWriter, curatorMap)
}
mahaServiceConfig
}
def validateReferenceByName(config: JsonMahaServiceConfig): MahaConfigResult[Boolean] = {
//do some validation
val registryJsonConfigMap = config.registryMap
val errorList = new mutable.HashSet[MahaServiceError]
registryJsonConfigMap.foreach {
case (name, regConfig) =>
if (!config.bucketingConfigMap.contains(regConfig.bucketConfigName)) {
errorList.add(ServiceConfigurationError(s"Unable to find bucket config name ${regConfig.bucketConfigName} in map"))
}
if (!config.parallelServiceExecutorConfigMap.contains(regConfig.parallelServiceExecutorName)) {
errorList.add(ServiceConfigurationError(s"Unable to find parallelServiceExecutor name ${regConfig.parallelServiceExecutorName} in map"))
}
if (!config.utcTimeProviderMap.contains(regConfig.utcTimeProviderName)) {
errorList.add(ServiceConfigurationError(s"Unable to find utcTimeProvider name ${regConfig.utcTimeProviderName} in map"))
}
regConfig.generators.foreach {
generator => if (!config.generatorMap.contains(generator)) {
errorList.add(ServiceConfigurationError(s"Unable to find generator config name $generator in map"))
}
}
regConfig.executors.foreach {
executor => if (!config.executorMap.contains(executor)) {
errorList.add(ServiceConfigurationError(s"Unable to find executor config name $executor in map"))
}
}
}
if (errorList.nonEmpty) {
return Failure(errorList).toValidationNel.asInstanceOf[MahaConfigResult[Boolean]]
}
return true.successNel
}
def initBucketingConfig(bucketConfigMap: Map[String, JsonBucketingConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, BucketingConfig]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, BucketingConfig]] = {
val constructBucketingConfig: Iterable[MahaServiceConfig.MahaConfigResult[(String, BucketingConfig)]] = {
bucketConfigMap.map {
case (name, jsonConfig) =>
for {
factory <- getFactory[BucketingConfigFactory](jsonConfig.className, closer)
built <- factory.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, BucketingConfig)]] =
constructBucketingConfig.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, BucketingConfig)]
resultList.map(_.toMap)
}
result
}
def initUserTimeZoneProvider(userTimeZoneProviderMap: Map[String, JsonUserTimeZoneProviderConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, UserTimeZoneProvider]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, UserTimeZoneProvider]] = {
val constructUserTimeZoneProvider: Iterable[MahaServiceConfig.MahaConfigResult[(String, UserTimeZoneProvider)]] = {
userTimeZoneProviderMap.map {
case (name, jsonConfig) =>
for {
factory <- getFactory[UserTimeZoneProviderFactory](jsonConfig.className, closer)
built <- factory.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, UserTimeZoneProvider)]] =
constructUserTimeZoneProvider.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, UserTimeZoneProvider)]
resultList.map(_.toMap)
}
result
}
def initUTCTimeProvider(utcTimeProviderMap: Map[String, JsonUTCTimeProviderConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, UTCTimeProvider]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, UTCTimeProvider]] = {
val constructUTCTimeProvider: Iterable[MahaServiceConfig.MahaConfigResult[(String, UTCTimeProvider)]] = {
utcTimeProviderMap.map {
case (name, jsonConfig) =>
for {
factory <- getFactory[UTCTimeProviderFactory](jsonConfig.className, closer)
built <- factory.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, UTCTimeProvider)]] =
constructUTCTimeProvider.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, UTCTimeProvider)]
resultList.map(_.toMap)
}
result
}
def initExecutors(executorMap: Map[String, JsonQueryExecutorConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, QueryExecutor]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, QueryExecutor]] = {
val constructExecutor: Iterable[MahaServiceConfig.MahaConfigResult[(String, QueryExecutor)]] = {
executorMap.map {
case (name, jsonConfig) =>
for {
factory <- getFactory[QueryExecutoryFactory](jsonConfig.className, closer)
built <- factory.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, QueryExecutor)]] =
constructExecutor.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, QueryExecutor)]
resultList.map(_.toMap)
}
result
}
def initGenerators(generatorMap: Map[String, JsonQueryGeneratorConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, QueryGenerator[_ <: EngineRequirement]]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, QueryGenerator[_ <: EngineRequirement]]] = {
val constructGenerator: Iterable[MahaServiceConfig.MahaConfigResult[(String, QueryGenerator[_ <: EngineRequirement])]] = {
generatorMap.map {
case (name, jsonConfig) =>
val factoryResult: MahaConfigResult[QueryGeneratorFactory] = getFactory[QueryGeneratorFactory](jsonConfig.className, closer)
val built: MahaConfigResult[QueryGenerator[_ <: EngineRequirement]] = factoryResult.flatMap(_.fromJson(jsonConfig.json))
built.map(g => (name, g))
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, QueryGenerator[_ <: EngineRequirement])]] =
constructGenerator.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, QueryGenerator[_ <: EngineRequirement])]
resultList.map(_.toMap)
}
result
}
def initRegistry(registryMap: Map[String, JsonRegistryConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, Registry]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, Registry]] = {
val constructRegistry: Iterable[MahaServiceConfig.MahaConfigResult[(String, Registry)]] = {
registryMap.map {
case (name, jsonConfig) =>
for {
factRegistrationFactory <- getFactory[FactRegistrationFactory](jsonConfig.factRegistrationFactoryClass, closer)
dimRegistrationFactory <- getFactory[DimensionRegistrationFactory](jsonConfig.dimensionRegistrationFactoryClass, closer)
dimEstimatorFactory <- getFactory[DimCostEstimatorFactory](jsonConfig.dimEstimatorFactoryClass, closer)
factEstimatorFactory <- getFactory[FactCostEstimatorFactory](jsonConfig.factEstimatorFactoryClass, closer)
dimEstimator <- dimEstimatorFactory.fromJson(jsonConfig.dimEstimatorFactoryConfig)
factEstimator <- factEstimatorFactory.fromJson(jsonConfig.factEstimatorFactoryConfig)
requireDefaultFactEngine = Engine.from(jsonConfig.defaultFactEngine).toMahaConfigResult(
ServiceConfigurationError(s"Unknown default fact engine : ${jsonConfig.defaultFactEngine}"))
requireUTCProvider = context.utcTimeProviderMap.get(jsonConfig.utcTimeProviderName).toMahaConfigResult(
ServiceConfigurationError(s"Unknown utcTimeProviderName: ${jsonConfig.utcTimeProviderName}"))
requireUserTimeZoneProvider = context.userTimeZoneProviderMap.get(jsonConfig.userTimeZoneProviderName).toMahaConfigResult(
ServiceConfigurationError(s"Unknown userTimeZoneProviderName: ${jsonConfig.userTimeZoneProviderName}"))
requireParallelServiceExeuctor = context.parallelServiceExecutorMap.get(jsonConfig.parallelServiceExecutorName).toMahaConfigResult(
ServiceConfigurationError(s"Unknown parallelServiceExecutorName: ${jsonConfig.parallelServiceExecutorName}"))
requiredBucketConfig = context.bucketConfigMap.get(jsonConfig.bucketConfigName).toMahaConfigResult(
ServiceConfigurationError(s"Unknown bucketConfigName: ${jsonConfig.bucketConfigName}"))
defaultFactEngine <- (requireDefaultFactEngine |@| requireUTCProvider |@| requireUserTimeZoneProvider |@| requireParallelServiceExeuctor |@| requiredBucketConfig)((a, b, c, d, e) => a)
druidMultiEngineQueryList = jsonConfig.druidMultiEngineQueryList.map(Engine.from).flatten
} yield {
val registryBuilder = new RegistryBuilder
factRegistrationFactory.register(registryBuilder)
dimRegistrationFactory.register(registryBuilder)
(name, registryBuilder.build(dimEstimator, factEstimator, jsonConfig.defaultPublicFactRevisionMap
, jsonConfig.defaultPublicDimRevisionMap, defaultFactEngine, druidMultiEngineQueryList))
}
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, Registry)]] =
constructRegistry.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, Registry)]
resultList.map(_.toMap)
}
result
}
def initParallelServiceExecutors(parallelServiceExecutorConfigMap: Map[String, JsonParallelServiceExecutorConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, ParallelServiceExecutor]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, ParallelServiceExecutor]] = {
val constructParallelServiceExecutor: Iterable[MahaServiceConfig.MahaConfigResult[(String, ParallelServiceExecutor)]] = {
parallelServiceExecutorConfigMap.map {
case (name, jsonConfig) =>
for {
factoryResult <- getFactory[ParallelServiceExecutoryFactory](jsonConfig.className, closer)
built <- factoryResult.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, ParallelServiceExecutor)]] =
constructParallelServiceExecutor.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, ParallelServiceExecutor)]
resultList.map(_.toMap)
}
result
}
def initKafkaLogWriter(jsonMahaRequestLogConfig: JsonMahaRequestLogConfig)(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[MahaRequestLogWriter] = {
val result: MahaServiceConfig.MahaConfigResult[MahaRequestLogWriter] = {
val requestLogWriter = {
for {
factoryResult <- getFactory[MahaRequestLogWriterFactory](jsonMahaRequestLogConfig.className, closer)
requestLogWriter <- factoryResult.fromJson(jsonMahaRequestLogConfig.kafkaConfig, jsonMahaRequestLogConfig.isLoggingEnabled)
} yield (requestLogWriter)
}
requestLogWriter
}
result
}
def initCurators(curatorConfigMap: Map[String, JsonCuratorConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, Curator]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, Curator]] = {
val constructCurator: Iterable[MahaServiceConfig.MahaConfigResult[(String, Curator)]] = {
curatorConfigMap.map {
case (name, jsonConfig) =>
for {
factoryResult <- getFactory[CuratorFactory](jsonConfig.className, closer)
built <- factoryResult.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, Curator)]] =
constructCurator.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, Curator)]
resultList.map(_.toMap)
}
result
}
def initDataSources(curatorConfigMap: Map[String, JsonDataSourceConfig])(implicit context: MahaServiceConfigContext) : MahaServiceConfig.MahaConfigResult[Map[String, DataSource]] = {
import Scalaz._
val result: MahaServiceConfig.MahaConfigResult[Map[String, DataSource]] = {
val constructDataSource: Iterable[MahaServiceConfig.MahaConfigResult[(String, DataSource)]] = {
curatorConfigMap.map {
case (name, jsonConfig) =>
for {
factoryResult <- getFactory[DataSourceFactory](jsonConfig.className, closer)
built <- factoryResult.fromJson(jsonConfig.json)
} yield (name, built)
}
}
val resultList: MahaServiceConfig.MahaConfigResult[List[(String, DataSource)]] =
constructDataSource.toList.sequence[MahaServiceConfig.MahaConfigResult, (String, DataSource)]
resultList.map(_.toMap)
}
result
}
}
case class DynamicMahaServiceConfig(dynamicProperties: Map[String, DynamicPropertyInfo],
context: MahaServiceConfigContext,
registry: Map[String, DynamicRegistryConfig],
mahaRequestLogWriter: MahaRequestLogWriter,
curatorMap: Map[String, Curator],
jsonMahaServiceConfig: JsonMahaServiceConfig) extends MahaServiceConfig
object DynamicMahaServiceConfig {
import MahaServiceConfig._
def createObject(ba: Array[Byte], objectName: String): Option[Object] = {
None
}
def fromJson(ba: Array[Byte], dynamicConfigurations: DynamicConfigurations): MahaServiceConfig.MahaConfigResult[DynamicMahaServiceConfig] = {
setDynamicConfigurations(dynamicConfigurations)
val json = {
Try(parse(new String(ba, StandardCharsets.UTF_8))) match {
case t if t.isSuccess => t.get
case t if t.isFailure => {
return Failure(JsonParseError(s"invalidInputJson : ${t.failed.get.getMessage}", Option(t.failed.toOption.get))).toValidationNel
}
}
}
val jsonMahaServiceConfigResult: ValidationNel[MahaServiceError, JsonMahaServiceConfig] = fromJSON[JsonMahaServiceConfig](json).leftMap {
nel => nel.map(err => {
JsonParseError(err.toString)
})
}
var defaultContext = DefaultMahaServiceConfigContext()
val dynamicMahaServiceConfig = for {
jsonMahaServiceConfig <- jsonMahaServiceConfigResult
validationResult <- validateReferenceByName(jsonMahaServiceConfig)
bucketConfigMap <- initBucketingConfig(jsonMahaServiceConfig.bucketingConfigMap)(defaultContext)
postBucketContext = defaultContext.copy(bucketConfigMap = bucketConfigMap)
dataSourceMap <- initDataSources(jsonMahaServiceConfig.datasourceMap)(postBucketContext)
postDataSourceContext = postBucketContext.copy(dataSourceMap = dataSourceMap)
userTimeZoneProviderMap <- initUserTimeZoneProvider(jsonMahaServiceConfig.userTimeZoneProviderMap)(postDataSourceContext)
postTimeZoneProviderContext = postDataSourceContext.copy(userTimeZoneProviderMap = userTimeZoneProviderMap)
utcTimeProviderMap <- initUTCTimeProvider(jsonMahaServiceConfig.utcTimeProviderMap)(postTimeZoneProviderContext)
postTimeProviderContext = postTimeZoneProviderContext.copy(utcTimeProviderMap = utcTimeProviderMap)
generatorMap <- initGenerators(jsonMahaServiceConfig.generatorMap)(postTimeProviderContext)
postGeneratorContext = postTimeProviderContext.copy(generatorMap = generatorMap)
executorMap <- initExecutors(jsonMahaServiceConfig.executorMap)(postGeneratorContext)
postExecutorContext = postGeneratorContext.copy(executorMap = executorMap)
parallelServiceExecutorConfig <- initParallelServiceExecutors(jsonMahaServiceConfig.parallelServiceExecutorConfigMap)(postExecutorContext)
postParallelServiceExecutorContext = postExecutorContext.copy(parallelServiceExecutorMap = parallelServiceExecutorConfig)
curatorMap <- initCurators(jsonMahaServiceConfig.curatorMap)(postParallelServiceExecutorContext)
postCuratorContext = postParallelServiceExecutorContext.copy(curatorMap = curatorMap)
registryMap <- initRegistry(jsonMahaServiceConfig.registryMap)(postCuratorContext)
postRegistryContext = postCuratorContext.copy(registryMap = registryMap)
mahaRequestLogWriter <- initKafkaLogWriter(jsonMahaServiceConfig.jsonMahaRequestLogConfig)(postCuratorContext)
} yield {
val objectNameMapNew = Map(
BUCKETING_CONFIG_MAP -> bucketConfigMap,
UTC_TIME_PROVIDER_MAP -> utcTimeProviderMap,
GENERATOR_MAP -> generatorMap,
EXECUTOR_MAP -> executorMap,
REGISTRY_MAP -> registryMap,
PSE_CONFIG_MAP -> parallelServiceExecutorConfig,
CURATOR_MAP -> curatorMap
)
val dynamicProperties = findDynamicProperties(json, objectNameMapNew)
val resultMap: Map[String, DynamicRegistryConfig] = registryMap.map {
case (regName, registry) => {
val registryConfig = jsonMahaServiceConfig.registryMap.get(regName.toLowerCase).get
implicit val queryGeneratorRegistry = new QueryGeneratorRegistry
generatorMap.filter(g => registryConfig.generators.contains(g._1)).foreach {
case (name, generator) =>
queryGeneratorRegistry.register(generator.engine, generator, generator.version)
}
val queryExecutorContext = new QueryExecutorContext
executorMap.filter(e => registryConfig.executors.contains(e._1)).foreach {
case (_, executor) =>
queryExecutorContext.register(executor)
}
(regName -> new DynamicRegistryConfig(regName,
registry,
new DefaultQueryPipelineFactory(defaultFactEngine = registry.defaultFactEngine, druidMultiQueryEngineList = registry.druidMultiQueryEngineList),
queryExecutorContext,
new BucketSelector(registry, bucketConfigMap.get(registryConfig.bucketConfigName).get),
utcTimeProviderMap.get(registryConfig.utcTimeProviderName).get,
parallelServiceExecutorConfig.get(registryConfig.parallelServiceExecutorName).get,
userTimeZoneProviderMap.get(registryConfig.userTimeZoneProviderName).get
))
}
}
new DynamicMahaServiceConfig(dynamicProperties, postCuratorContext, resultMap, mahaRequestLogWriter, curatorMap, jsonMahaServiceConfig)
}
dynamicMahaServiceConfig
}
def mergeMaps(maps: Map[String, Object]*): mutable.Map[String, Object] = {
val mergedMap = new scala.collection.mutable.HashMap[String, Object]()
maps.foreach(map => {
map.foreach(f => mergedMap.put(f._1, f._2))
})
mergedMap
}
def findDynamicProperties(json: JValue, objectNameMap: Map[String, Object]): Map[String, DynamicPropertyInfo] = {
val dynamicProperties = new mutable.HashMap[String, DynamicPropertyInfo]()
implicit val formats = org.json4s.DefaultFormats
val jsonMap = new mutable.HashMap[String, String]()
json.asInstanceOf[JObject].values.map(f => f._2.asInstanceOf[Map[String, Any]].keys.foreach(k => jsonMap.put(k, f._1)))
json.children.foreach(c => {
c.asInstanceOf[JObject].obj.foreach(map => {
val dynamicFields = extractDynamicFields(map._2)
for ((propertyKey, (name, defaultValue)) <- dynamicFields) {
val objectName = jsonMap(map._1)
require(objectNameMap.contains(objectName), s"Dynamic object with name $objectName not present in objectMap: $objectNameMap")
if (dynamicProperties.contains(propertyKey)) {
dynamicProperties(propertyKey).objects.put(objectName, objectNameMap(objectName))
} else {
val dynamicObjects = new mutable.HashMap[String, Object]()
dynamicObjects.put(objectName, objectNameMap(objectName))
dynamicProperties.put(propertyKey, new DynamicPropertyInfo(propertyKey, defaultValue, dynamicObjects))
}
}
})
})
dynamicProperties.toMap
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy