All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.maha.service.curators.Curator.scala Maven / Gradle / Ivy

// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.service.curators

import com.yahoo.maha.core.RequestModelResult
import com.yahoo.maha.core.request.{CuratorJsonConfig, Field, ReportingRequest, RowCountQuery, fieldExtended}
import com.yahoo.maha.parrequest2.GeneralError
import com.yahoo.maha.parrequest2.future.{ParFunction, ParRequest, ParallelServiceExecutor}
import com.yahoo.maha.service.error.MahaServiceBadRequestException
import com.yahoo.maha.service.utils.CuratorMahaRequestLogBuilder
import com.yahoo.maha.service.{CuratorInjector, MahaRequestContext, MahaService, ParRequestResult, RequestResult}
import grizzled.slf4j.Logging
import org.json4s.{DefaultFormats, JValue}
import org.json4s.scalaz.JsonScalaz

import scala.util.Try
import scalaz.{NonEmptyList, Validation}

case class CuratorError(curator: Curator, curatorConfig: CuratorConfig, error: GeneralError, index: Option[Int] = None)
  extends GeneralError(error.stage, error.message, error.throwableOption)
case class CuratorResult(curator: Curator
                         , curatorConfig: CuratorConfig
                         , parRequestResultOption: Option[ParRequestResult]
                         , requestModelReference: RequestModelResult
                         , index: Option[Int] = None
                        )

trait CuratorConfig
object NoConfig extends CuratorConfig

trait Curator extends Ordered[Curator] {
  def name: String

  def level: Int

  def priority: Int

  def process(resultMap: Map[String, Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]]]
              , mahaRequestContext: MahaRequestContext
              , mahaService: MahaService
              , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder
              , curatorConfig: CuratorConfig
              , curatorInjector: CuratorInjector
             ): Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]]

  def compare(that: Curator) = {
    if (this.level == that.level) {
      Integer.compare(this.priority, that.priority)
    } else Integer.compare(this.level, that.level)
  }

  def isSingleton: Boolean

  def requiresDefaultCurator: Boolean

  def parseConfig(config: CuratorJsonConfig): Validation[NonEmptyList[JsonScalaz.Error], CuratorConfig] = {
    import scalaz.syntax.validation._
    NoConfig.successNel
  }

  protected def requestModelValidator: CuratorRequestModelValidator

  protected def curatorResultPostProcessor: CuratorResultPostProcessor = NoopCuratorResultPostProcessor

  protected def withResult(label: String, parallelServiceExecutor: ParallelServiceExecutor
                           , curatorResult: CuratorResult): Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {
    new Right(IndexedSeq(parallelServiceExecutor.immediateResult(label, new Right(curatorResult))))
  }

  protected def withError(curatorConfig: CuratorConfig, error: GeneralError): Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {
    new Left(CuratorError(this, curatorConfig, error))
  }

  protected def withRequestResultError(curatorConfig: CuratorConfig, error: GeneralError): Either[CuratorError, RequestResult] = {
    new Left(CuratorError(this, curatorConfig, error))
  }

  protected def withParResult(label: String
                              , parResult: ParRequest[CuratorResult]): Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {
    new Right(IndexedSeq(parResult))
  }

  protected def withParRequestError[T](curatorConfig: CuratorConfig, error: GeneralError): Either[GeneralError, T] = {
    new Left(CuratorError(this, curatorConfig, error))
  }

  protected def withParRequestError[T](curatorConfig: CuratorConfig, error: GeneralError, idx: Int): Either[GeneralError, T] = {
    new Left(CuratorError(this, curatorConfig, error, Option(idx)))
  }
}

object DefaultCurator {
  val name: String = "default"
}

trait CuratorRequestModelValidator {
  def validate(mahaRequestContext: MahaRequestContext, requestModelResult: RequestModelResult) : Unit
}

object NoopCuratorRequestModelValidator extends CuratorRequestModelValidator {
  def validate(mahaRequestContext: MahaRequestContext, requestModelResult: RequestModelResult) : Unit = {
    //do nothing
  }
}

trait CuratorResultPostProcessor {
  def process(mahaRequestContext: MahaRequestContext, requestResult: RequestResult) : Either[GeneralError, RequestResult]
}

object NoopCuratorResultPostProcessor extends CuratorResultPostProcessor {
  override def process(mahaRequestContext: MahaRequestContext
                       , requestResult: RequestResult) : Either[GeneralError, RequestResult] = {
    new Right(requestResult)
  }
}

case class DefaultCurator(protected val requestModelValidator: CuratorRequestModelValidator = NoopCuratorRequestModelValidator,
                          override val curatorResultPostProcessor: CuratorResultPostProcessor = NoopCuratorResultPostProcessor) extends Curator with Logging {

  override val name: String = DefaultCurator.name
  override val level: Int = 0
  override val priority: Int = 0
  override val isSingleton: Boolean = false
  override val requiresDefaultCurator: Boolean = false

  override def process(resultMap: Map[String, Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]]]
                       , mahaRequestContext: MahaRequestContext
                       , mahaService: MahaService
                       , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder
                       , curatorConfig: CuratorConfig
                       , curatorInjector: CuratorInjector
                      ) : Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {

    val parallelServiceExecutor = mahaService.getParallelServiceExecutor(mahaRequestContext)
    val parRequestLabel = "processDefaultCurator"

    val requestModelResultTry = mahaService.generateRequestModel(mahaRequestContext.registryName
      , mahaRequestContext.reportingRequest
      , mahaRequestContext.bucketParams)

    if(requestModelResultTry.isFailure) {
      val message = requestModelResultTry.failed.get.getMessage
      mahaRequestLogBuilder.logFailed(message, Some(400))
      withError(curatorConfig,
        GeneralError.from(parRequestLabel
          , message, new MahaServiceBadRequestException(message, requestModelResultTry.failed.toOption))
      )
    } else {
      try {
        val requestModelResult = requestModelResultTry.get
        requestModelValidator.validate(mahaRequestContext, requestModelResult)
        val parRequestResult: ParRequestResult = mahaService.executeRequestModelResult(mahaRequestContext.registryName
          , requestModelResult, mahaRequestLogBuilder)

        if (parRequestResult.queryPipeline.isSuccess) {
          val queryPipeline = parRequestResult.queryPipeline.get
          //detect row count injection
          //for dim driven multi engine, inject row count
          //for non dim driven, inject row count if requested
          val injectRowCountCurator =
          mahaRequestContext.reportingRequest.includeRowCount &&
            (
              (mahaRequestContext.reportingRequest.forceDimensionDriven
                && queryPipeline.bestDimCandidates.nonEmpty
                && queryPipeline.bestDimCandidates.head.dim.engine != queryPipeline.queryChain.drivingQuery.engine)
              ||
              (!mahaRequestContext.reportingRequest.forceDimensionDriven)
            )

          if (injectRowCountCurator) {
            curatorInjector.injectCurator(RowCountCurator.name, resultMap, mahaRequestContext, NoConfig)
          }
        }
        val postProcessorResult = parRequestResult.prodRun.map("postProcess", ParFunction.fromScala {
          requestResult =>
            try {
              val result = curatorResultPostProcessor.process(mahaRequestContext, requestResult)
              if (result.isRight) {
                mahaRequestLogBuilder.logSuccess()
              } else {
                val ge = result.left.get
                mahaRequestLogBuilder.logFailed(ge.throwableOption.map(_.getMessage).getOrElse(ge.message))
              }
              result
            } catch {
              case e: Exception =>
                val message = "error in post processor, returning original result"
                logger.error(message, e)
                mahaRequestLogBuilder.logFailed(s"$message - ${e.getMessage}")
                new Right(requestResult)
            }
        })
        withResult(parRequestLabel
          , parallelServiceExecutor
          , CuratorResult(this
            , curatorConfig
            , Option(parRequestResult.copy(prodRun = postProcessorResult))
            , requestModelResult))
      }
      catch {
        case e: Exception =>
          mahaRequestLogBuilder.logFailed(e.getMessage)
          withError(curatorConfig, GeneralError.from(parRequestLabel
            , e.getMessage, new MahaServiceBadRequestException(e.getMessage, Option(e))))

      }
    }
  }
}

object RowCountConfig extends Logging {
  implicit val formats: DefaultFormats.type = DefaultFormats

  def parse(curatorJsonConfig: CuratorJsonConfig) : JsonScalaz.Result[RowCountConfig] = {
    val config: JValue = curatorJsonConfig.json
    val isFactDriven: JsonScalaz.Result[Option[Boolean]] = fieldExtended[Option[Boolean]]("isFactDriven")(config)
    isFactDriven.map(isFactDriven => RowCountConfig(isFactDriven))
  }
}

case class RowCountConfig(isFactDriven: Option[Boolean]) extends CuratorConfig

object RowCountCurator {
  val name: String = "rowcount"

  def getRowCount(mahaRequestContext: MahaRequestContext) : Option[Int] = {
    mahaRequestContext.mutableState.get(name) match {
      case Some(i: Int) => Option(i)
      case _ => None
    }
  }
}

case class RowCountCurator(protected val requestModelValidator: CuratorRequestModelValidator = NoopCuratorRequestModelValidator,
                           override val curatorResultPostProcessor: CuratorResultPostProcessor = NoopCuratorResultPostProcessor,
                           private val FACT_ONLY_LIMIT: Int = 5000
                                ) extends Curator with Logging {
  override def name: String = RowCountCurator.name

  override def level: Int = 1

  override def priority: Int = 1

  override def parseConfig(config: CuratorJsonConfig): Validation[NonEmptyList[JsonScalaz.Error], CuratorConfig] = {
    val rowCountConfigTry : JsonScalaz.Result[RowCountConfig] = RowCountConfig.parse(config)
    Validation
      .fromTryCatchNonFatal{
        require(rowCountConfigTry.isSuccess, "Must succeed in creating a rowCountConfig " + rowCountConfigTry)
        rowCountConfigTry.toOption.get}
      .leftMap[JsonScalaz.Error](t => JsonScalaz.UncategorizedError("parseRowCountConfigValidation", t.getMessage, List.empty)).toValidationNel
  }

  override def process(resultMap: Map[String, Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]]]
                         , mahaRequestContext: MahaRequestContext
                         , mahaService: MahaService
                         , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder
                         , curatorConfig: CuratorConfig
                         , curatorInjector: CuratorInjector
                        ) : Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {
    val parallelServiceExecutor = mahaService.getParallelServiceExecutor(mahaRequestContext)
    val parRequestLabel = "processTotalRows"

    val requestModelResultTry = mahaService.generateRequestModel(mahaRequestContext.registryName
      , mahaRequestContext.reportingRequest
      , mahaRequestContext.bucketParams)

    if(requestModelResultTry.isFailure) {
      val message = requestModelResultTry.failed.get.getMessage
      mahaRequestLogBuilder.logFailed(message, Some(400))
      withError(curatorConfig,
        GeneralError.from(parRequestLabel
          , message, new MahaServiceBadRequestException(message, requestModelResultTry.failed.toOption))
      )
    } else {
      try {
        val requestModelResult = requestModelResultTry.get
        requestModelValidator.validate(mahaRequestContext, requestModelResult)
        if(mahaRequestContext.reportingRequest.forceDimensionDriven) {
          val sourcePipelineTry = mahaService.generateQueryPipelines(mahaRequestContext.registryName
            , requestModelResultTry.get.model
          , mahaRequestContext.bucketParams)._1

          if (sourcePipelineTry.isFailure) {
            val exception = sourcePipelineTry.failed.get
            val message = "source pipeline failed"
            mahaRequestLogBuilder.logFailed(s"$message - ${exception.getMessage}")
            withError(curatorConfig, GeneralError.from(parRequestLabel, message, exception))
          } else {
            val sourcePipeline = sourcePipelineTry.get
            //no filters except fk filters
            val totalRowsCountRequestTry =
              Try {
                require(
                  sourcePipeline.bestDimCandidates.nonEmpty
                  , s"Invalid total rows request, no best dim candidates! : ${sourcePipeline.requestModel}")

                //force dim driven
                //remove all fields except primary key
                //remove all sorts
                val primaryKeyAliasFields = sourcePipeline.bestDimCandidates.map(dim => Field(dim.publicDim.primaryKeyByAlias, None, None)).toIndexedSeq
                sourcePipeline.requestModel.reportingRequest.copy(
                  selectFields = primaryKeyAliasFields
                  , sortBy = IndexedSeq.empty
                  , includeRowCount = true
                  , forceDimensionDriven = true
                  , forceFactDriven = false
                  , paginationStartIndex = 0
                  , rowsPerPage = 1
                  , curatorJsonConfigMap = Map.empty
                )
              }

            executeRowCountRequest(
              mahaRequestContext
              , mahaService
              , mahaRequestLogBuilder
              , parallelServiceExecutor
              , parRequestLabel
              , totalRowsCountRequestTry
              , curatorConfig
              , requestModelResult)
          }
        } else {
          val sourcePipelineTry = mahaService.generateQueryPipelines(mahaRequestContext.registryName
            , requestModelResultTry.get.model
            , mahaRequestContext.bucketParams)._1

          if (sourcePipelineTry.isFailure) {
            val exception = sourcePipelineTry.failed.get
            val message = "source pipeline failed"
            mahaRequestLogBuilder.logFailed(s"$message - ${exception.getMessage}")
            withError(curatorConfig, GeneralError.from(parRequestLabel, message, exception))
          } else {

            val sourcePipeline = sourcePipelineTry.get

            val requiredRowCountFlag: Boolean = {
              curatorConfig match {
                case RowCountConfig(isFactDriven) if isFactDriven.isDefined => isFactDriven.get
                case _ => false
              }
            }

            if (!requiredRowCountFlag) { // use legacy code if didn't specific fact driven query
              val model = requestModelResult.model
              val curatorResult = CuratorResult(this, curatorConfig, None, requestModelResult)
              if (model.dimCardinalityEstimate.nonEmpty) {
                if(model.dimCardinalityEstimate.get.intValue <= FACT_ONLY_LIMIT) {
                  val count = model.dimCardinalityEstimate.get.intValue
                  mahaRequestContext.mutableState.put(RowCountCurator.name, count)
                  mahaRequestLogBuilder.logSuccess()
                  withResult(parRequestLabel, parallelServiceExecutor, curatorResult)
                } else {
                  mahaRequestContext.mutableState.put(RowCountCurator.name, FACT_ONLY_LIMIT)
                  mahaRequestLogBuilder.logSuccess()
                  withResult(parRequestLabel, parallelServiceExecutor, curatorResult)
                }
              } else {
                val message = "No row count can be estimated without dim cardinality estimate"
                mahaRequestLogBuilder.logFailed(message, Option(400))
                withError(curatorConfig, GeneralError.from(parRequestLabel, message))
              }
            } else { // if isFactDriven = true is specified in config, queryType = RowCountQuery
              val totalRowsCountRequestTry =
                Try {
                  //force fact driven
                  //added row count field
                  //remove all sorts
                  sourcePipeline.requestModel.reportingRequest.copy(
                    selectFields = sourcePipeline.requestModel.reportingRequest.selectFields
                    , queryType = RowCountQuery
                    , sortBy = IndexedSeq.empty
                    , forceDimensionDriven = false
                    , forceFactDriven = true
                    , paginationStartIndex = 0
                    , rowsPerPage = 1
                    , curatorJsonConfigMap = Map.empty
                  )
                }

              executeRowCountRequest(
                mahaRequestContext
                , mahaService
                , mahaRequestLogBuilder
                , parallelServiceExecutor
                , parRequestLabel
                , totalRowsCountRequestTry
                , curatorConfig
                , requestModelResult)
            }
          }
        }
      }
      catch {
        case e: Exception =>
          mahaRequestLogBuilder.logFailed(e.getMessage)
          withError(curatorConfig, GeneralError.from(parRequestLabel
            , e.getMessage, MahaServiceBadRequestException(e.getMessage, Option(e))))

      }
    }
  }

  override def isSingleton: Boolean = false

  override def requiresDefaultCurator: Boolean = true

  private def executeRowCountRequest(mahaRequestContext: MahaRequestContext
                                     , mahaService: MahaService
                                     , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder
                                     , parallelServiceExecutor: ParallelServiceExecutor
                                     , parRequestLabel: String
                                     , rowCountRequestTry: Try[ReportingRequest]
                                     , curatorConfig: CuratorConfig
                                     , requestModelResult: RequestModelResult
                                    ): Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {
    if (rowCountRequestTry.isFailure) {
      val exception = rowCountRequestTry.failed.get
      val message = "total rows request failed to generate"
      mahaRequestLogBuilder.logFailed(s"${message} - ${exception.getMessage}")
      withError(curatorConfig, GeneralError.from(parRequestLabel, message, exception))
    } else {
      val totalRowsRequest = rowCountRequestTry.get
      val parRequestResult: ParRequestResult = mahaService.executeRequest(mahaRequestContext.registryName
        , totalRowsRequest, mahaRequestContext.bucketParams, mahaRequestLogBuilder)

      val totalRowsRequestModel = parRequestResult.queryPipeline.get.requestModel
      if(totalRowsRequest.isDebugEnabled) {
        info(s"Unfiltered request should not generate any fact candidates!  " +
          s" : Request fields : ${totalRowsRequestModel.reportingRequest.selectFields.foreach(field => field.toString + "\t")} " +
          s" : generated Model columns and candidate names : ${totalRowsRequestModel.requestCols.foreach(colInfo => colInfo.toString + "\t")} " +
          s" : ${totalRowsRequestModel.bestCandidates.foreach(candidate => candidate.requestCols.toString())}")
      }

      val populateRowCount: ParRequest[RequestResult] = parRequestResult.prodRun.map(parRequestLabel, ParFunction.fromScala {
        requestResult =>
          val count = requestResult.queryPipelineResult.rowList.getTotalRowCount
          mahaRequestContext.mutableState.put(RowCountCurator.name, count)
          mahaRequestLogBuilder.logSuccess()
          new Right(requestResult)
      })

      val finalParRequestResult = parRequestResult.copy(prodRun = populateRowCount)
      val curatorResult = CuratorResult(this, curatorConfig, Option(finalParRequestResult), requestModelResult.copy(model = totalRowsRequestModel))
      withResult(parRequestLabel, parallelServiceExecutor, curatorResult)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy