All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.maha.service.curators.TimeShiftCurator.scala Maven / Gradle / Ivy

// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.service.curators

import com.yahoo.maha.core._
import com.yahoo.maha.core.bucketing.BucketParams
import com.yahoo.maha.core.query._
import com.yahoo.maha.core.request._
import com.yahoo.maha.parrequest2.GeneralError
import com.yahoo.maha.parrequest2.future.{ParFunction, ParRequest}
import com.yahoo.maha.service.error.{MahaServiceBadRequestException, MahaServiceExecutionException}
import com.yahoo.maha.service.utils.CuratorMahaRequestLogBuilder
import com.yahoo.maha.service.{CuratorInjector, MahaRequestContext, MahaService, RequestResult}
import grizzled.slf4j.Logging
import org.json4s.{DefaultFormats, JValue}
import org.json4s.scalaz.JsonScalaz

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Try
import scalaz.{NonEmptyList, Validation}

object TimeShiftConfig extends Logging {
  implicit val formats: DefaultFormats.type = DefaultFormats

  def parse(curatorJsonConfig: CuratorJsonConfig) : JsonScalaz.Result[TimeShiftConfig] = {
    val config: JValue = curatorJsonConfig.json

    val sortByResult: JsonScalaz.Result[Option[SortBy]] = fieldExtended[Option[SortBy]]("sortBy")(config)
    val daysOffsetResult: JsonScalaz.Result[Option[Int]] = fieldExtended[Option[Int]]("daysOffset")(config)

    import _root_.scalaz.syntax.applicative._
    (sortByResult |@| daysOffsetResult)((a, b) => TimeShiftConfig(a, b))
  }

  def from(curatorConfig: CuratorConfig): Option[TimeShiftConfig] = {
    curatorConfig match {
      case c:TimeShiftConfig =>
        Option(c)
      case a =>
        error(s"Cannot convert ${a.getClass.getSimpleName} to TimeShiftConfig")
        None
    }
  }
}

case class TimeShiftConfig(sortBy: Option[SortBy], daysOffset: Option[Int]) extends CuratorConfig

object TimeShiftCurator {
  val name: String = "timeshift"
  val PREV_STRING: String = " Prev"
  val PCT_CHANGE_STRING: String = " Pct Change"
  val descOrdering = Ordering.fromLessThan((a: Double, b:Double) => a > b)
  val ascOrdering = Ordering.fromLessThan((a: Double, b:Double) => a < b)
}

class TimeShiftCurator (override val requestModelValidator: CuratorRequestModelValidator = NoopCuratorRequestModelValidator) extends Curator with Logging {

  override val name: String = TimeShiftCurator.name
  override val level: Int = 1
  override val priority: Int = 0
  override val isSingleton: Boolean = true
  override def requiresDefaultCurator: Boolean = false

  override def parseConfig(config: CuratorJsonConfig): Validation[NonEmptyList[JsonScalaz.Error], CuratorConfig] = {
    val timeshiftConfigTry : JsonScalaz.Result[TimeShiftConfig] = TimeShiftConfig.parse(config)
    Validation
      .fromTryCatchNonFatal{
        require(timeshiftConfigTry.isSuccess, "Must succeed in creating a timeshiftConfig " + timeshiftConfigTry)
        timeshiftConfigTry.toOption.get}
      .leftMap[JsonScalaz.Error](t => JsonScalaz.UncategorizedError("parseTimeShiftConfigValidation", t.getMessage, List.empty)).toValidationNel
  }
  private[this] def getRequestModelForPreviousWindow(registryName: String,
                                                     bucketParams: BucketParams,
                                                     reportingRequest: ReportingRequest,
                                                     mahaService: MahaService,
                                                     mahaRequestLogBuilder: CuratorMahaRequestLogBuilder,
                                                     dimensionAndItsValues: List[(String, Set[String])],
                                                     config: Option[TimeShiftConfig]
                                                    ) : Try[RequestModelResult] = {

    val updatedReportingRequest: ReportingRequest = reportingRequest.dayFilter match {
      case BetweenFilter(field, from, to) => {
        val fromDateTime = DailyGrain.fromFormattedString(from)
        val betweenDays = DailyGrain.getDaysBetween(from , to)
        val offset = 1 + config.flatMap(_.daysOffset).getOrElse(0)

        val fromForPreviousWindow: String = DailyGrain.toFormattedString(fromDateTime.minusDays(betweenDays).minusDays(offset))
        val toForPreviousWindow: String = DailyGrain.toFormattedString(fromDateTime.minusDays(offset))
        val previousWindow: BetweenFilter = BetweenFilter(field, fromForPreviousWindow, toForPreviousWindow)
        reportingRequest.copy(dayFilter = previousWindow)
      }
      case _ => reportingRequest
    }

    val newFilters: IndexedSeq[Filter] = updatedReportingRequest.filterExpressions ++ dimensionAndItsValues.map { e =>
      if(e._2.size == 1) {
        EqualityFilter(e._1, e._2.head)
      } else {
        InFilter(e._1, e._2.toList)
      }
    }.toIndexedSeq

    val filterUpdatedReportingRequest: ReportingRequest = updatedReportingRequest.copy(filterExpressions = newFilters)

    if(reportingRequest.isDebugEnabled) {
      info(s"previous period day filter : ${filterUpdatedReportingRequest.dayFilter}")
      info(s"previous period filter expressions : ${filterUpdatedReportingRequest.filterExpressions}")
    }
    val requestModelResultTry: Try[RequestModelResult] = mahaService
      .generateRequestModel(registryName, filterUpdatedReportingRequest, bucketParams)
    requestModelResultTry
  }

  override def process(resultMap: Map[String, Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]]]
                       , mahaRequestContext: MahaRequestContext
                       , mahaService: MahaService
                       , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder
                       , curatorConfig: CuratorConfig
                       , curatorInjector: CuratorInjector
                      ) : Either[CuratorError, IndexedSeq[ParRequest[CuratorResult]]] = {

    val parallelServiceExecutor = mahaService.getParallelServiceExecutor(mahaRequestContext)
    val parRequestLabel = "processTimeshiftCurator"

    val defaultWindowRequestModelResultTry: Try[RequestModelResult] = mahaService
      .generateRequestModel(mahaRequestContext.registryName
        , mahaRequestContext.reportingRequest, mahaRequestContext.bucketParams)
    if(defaultWindowRequestModelResultTry.isFailure) {
      val message = defaultWindowRequestModelResultTry.failed.get.getMessage
      mahaRequestLogBuilder.logFailed(message)
      withError(curatorConfig, GeneralError.from(parRequestLabel
        , message, MahaServiceBadRequestException(message, defaultWindowRequestModelResultTry.failed.toOption)))
    } else {
      try {
        val defaultWindowRequestModelResult = defaultWindowRequestModelResultTry.get
        requestModelValidator.validate(mahaRequestContext, defaultWindowRequestModelResult)

        val parRequestResult = mahaService.executeRequestModelResult(mahaRequestContext.registryName
          , defaultWindowRequestModelResult, mahaRequestLogBuilder)

        val defaultWindowRequestModel = defaultWindowRequestModelResult.model
        val finalRequestResult: ParRequest[RequestResult] = parRequestResult.prodRun.map(
          "previousWindowTimeshiftCurator"
          , ParFunction.fromScala {
            defaultWindowRequestResult =>
              val defaultWindowRowListTry: Try[InMemRowList] = Try {
                defaultWindowRequestResult.queryPipelineResult.rowList match {
                  case inMemRowList: InMemRowList => inMemRowList
                  case rl =>
                    throw new UnsupportedOperationException(s"Unsupported row list ${Option(rl).map(_.getClass.getSimpleName)}")
                }
              }

              if(defaultWindowRowListTry.isFailure) {
                val exception = defaultWindowRowListTry.failed.get
                val message = exception.getMessage
                mahaRequestLogBuilder.logFailed(message)
                withRequestResultError(curatorConfig
                  , GeneralError.from("defaultWindowRowList"
                    , message
                  ))
              } else {
                val defaultWindowRowList = defaultWindowRowListTry.get
                val dimensionKeySet: Set[String] = defaultWindowRequestModel.bestCandidates.get.publicFact.dimCols.map(_.alias)
                  .intersect(defaultWindowRequestModel.reportingRequest.selectFields.map(_.field).toSet)

                val dimensionAndItsValuesMap: mutable.Map[String, mutable.Set[String]] = new mutable.HashMap[String, mutable.Set[String]]()

                defaultWindowRowList.map {
                  defaultRow =>
                    dimensionKeySet.map {
                      dim =>
                        dimensionAndItsValuesMap.put(dim
                          , dimensionAndItsValuesMap.getOrElse(dim, new mutable.HashSet[String]()) += defaultRow.getValue(dim).toString
                        )
                    }
                }

                val timeshiftConfig = TimeShiftConfig.from(curatorConfig)
                val previousWindowRequestModelResultTry: Try[RequestModelResult] =
                  getRequestModelForPreviousWindow(mahaRequestContext.registryName,
                    mahaRequestContext.bucketParams,
                    mahaRequestContext.reportingRequest,
                    mahaService,
                    mahaRequestLogBuilder,
                    dimensionAndItsValuesMap.map(e => (e._1, e._2.toSet)).toList,
                    timeshiftConfig
                  )

                if (previousWindowRequestModelResultTry.isFailure) {
                  val message = previousWindowRequestModelResultTry.failed.get.getMessage
                  mahaRequestLogBuilder.logFailed(message)
                  val result: Either[GeneralError, RequestResult] = withRequestResultError(curatorConfig
                    , GeneralError.from(parRequestLabel, message, new MahaServiceBadRequestException(message)))
                  result
                } else {

                  val previousWindowRequestResultEither = mahaService
                    .processRequestModel(mahaRequestContext.registryName, previousWindowRequestModelResultTry.get.model, mahaRequestLogBuilder)
                  if (previousWindowRequestResultEither.isLeft) {
                    processError(curatorConfig, parRequestLabel, previousWindowRequestResultEither, mahaRequestLogBuilder)
                  } else {

                    val previousWindowRequestResult = previousWindowRequestResultEither.right.get
                    val previousWindowRowList: InMemRowList = {
                      previousWindowRequestResult.queryPipelineResult.rowList match {
                        case inMemRowList: InMemRowList => inMemRowList
                        case rl =>
                          val message = s"Unsupported row list ${Option(rl).map(_.getClass.getSimpleName)}"
                          mahaRequestLogBuilder.logFailed(message)
                          return withError(curatorConfig
                            , GeneralError.from("previousWindowRowList"
                              , message))
                      }
                    }

                    val derivedRowList: DerivedRowList = createDerivedRowList(
                      defaultWindowRequestResult.queryPipelineResult
                      , defaultWindowRequestModel
                      , defaultWindowRowList
                      , previousWindowRowList
                      , dimensionKeySet, timeshiftConfig)

                    mahaRequestLogBuilder.logSuccess()
                    new Right(RequestResult(defaultWindowRequestResult.queryPipelineResult.copy(rowList = derivedRowList)))
                  }
                }
              }
          })
        withResult(
          parRequestLabel
          , parallelServiceExecutor
          , CuratorResult(this, curatorConfig, Option(parRequestResult.copy(prodRun = finalRequestResult)), defaultWindowRequestModelResult))
      }
      catch {
        case e: Exception =>
          mahaRequestLogBuilder.logFailed(e.getMessage)
          withError(curatorConfig, GeneralError.from(parRequestLabel
            , e.getMessage, MahaServiceBadRequestException(e.getMessage, Option(e))))

      }
    }
  }

  private[this] def processError(curatorConfig: CuratorConfig
                                 , parRequestLabel: String, resultEither: Either[GeneralError, RequestResult]
                                 , mahaRequestLogBuilder: CuratorMahaRequestLogBuilder): Either[CuratorError, RequestResult] = {
    val ge = resultEither.left.get
    val message = ge.throwableOption.map(_.getMessage).getOrElse(ge.message)
    mahaRequestLogBuilder.logFailed(message)
    withRequestResultError(curatorConfig, GeneralError.from(parRequestLabel
      , message, MahaServiceExecutionException(message, ge.throwableOption)))
  }

  private[this] def createDerivedRowList(defaultWindowResult: QueryPipelineResult
                                         , defaultWindowRequestModel: RequestModel
                                         , defaultWindowRowList: InMemRowList
                                         , previousWindowRowList: InMemRowList
                                         , dimensionKeySet: Set[String]
                                         , timeshiftConfig: Option[TimeShiftConfig]) : DerivedRowList = {

    val injectableColumns: ArrayBuffer[ColumnInfo] = new collection.mutable.ArrayBuffer[ColumnInfo]()
    defaultWindowRequestModel.bestCandidates.get.factColAliases
      .foreach { colAlias =>
        injectableColumns += FactColumnInfo(colAlias + TimeShiftCurator.PREV_STRING)
        injectableColumns += FactColumnInfo(colAlias + TimeShiftCurator.PCT_CHANGE_STRING)
      }

    val columns: IndexedSeq[ColumnInfo] = defaultWindowRequestModel.requestCols ++ injectableColumns
    val aliasMap : Map[String, Int] = columns.map(_.alias).zipWithIndex.toMap
    val primaryKeyToRowMap = new collection.mutable.HashMap[String, Row]

    previousWindowRowList.foreach(timeShiftRow => {
      val primaryKey = dimensionKeySet.map(alias => alias + timeShiftRow.getValue(alias)).mkString
      primaryKeyToRowMap.put(primaryKey, timeShiftRow)
    })

    val unsortedRows: ArrayBuffer[Row] = new ArrayBuffer[Row](defaultWindowRowList.size)

    defaultWindowRowList.foreach(defaultRow => {
      val primaryKey: String = dimensionKeySet.map(alias => alias + defaultRow.getValue(alias)).mkString
      val timeShiftRowOption: Option[Row] = primaryKeyToRowMap.get(primaryKey)
      val row: Row = new Row(aliasMap, ArrayBuffer.fill[Any](aliasMap.size)(null))
      aliasMap.foreach {
        case (alias, pos) => {
          if (alias.contains(TimeShiftCurator.PREV_STRING)) {
            val originalAlias: String = alias.substring(0, alias.indexOf(TimeShiftCurator.PREV_STRING))
            val value = if (timeShiftRowOption.isDefined) timeShiftRowOption.get.getValue(originalAlias) else 0
            row.addValue(pos, value)

          } else if (alias.contains(TimeShiftCurator.PCT_CHANGE_STRING)) {
            val originalAlias: String = alias.substring(0, alias.indexOf(TimeShiftCurator.PCT_CHANGE_STRING))
            val prevValue = if (timeShiftRowOption.isDefined) timeShiftRowOption.get.getValue(originalAlias) else 0
            val currentValue = defaultRow.getValue(originalAlias)

            if (prevValue.isInstanceOf[Number] && currentValue.isInstanceOf[Number]) {
              val prevValueDouble = prevValue.asInstanceOf[Number].doubleValue()
              val currentValueDouble = currentValue.asInstanceOf[Number].doubleValue()
              val diff = currentValueDouble - prevValueDouble
              val pctChange = if(diff == 0) BigDecimal(0) else {
                if (prevValueDouble == 0) {
                  BigDecimal(100)
                } else {
                  BigDecimal((diff / prevValueDouble) * 100)
                }
              }
              val pctChangeRounded = pctChange
                .setScale(2, BigDecimal.RoundingMode.HALF_UP)
                .toDouble
              row.addValue(pos, pctChangeRounded)
            }

          } else {
            row.addValue(pos, defaultRow.getValue(alias))
          }
        }
      }
      unsortedRows+=row

    })
    //sort by first metric's pct change
    var ordering = TimeShiftCurator.descOrdering
    val sortByAliasOption = {
      if(timeshiftConfig.isDefined && timeshiftConfig.get.sortBy.isDefined) {
        val sortBy = timeshiftConfig.get.sortBy.get
        val metricAlias = sortBy.field
        if(defaultWindowRequestModel.requestColsSet(metricAlias)) {
          sortBy.order match {
            case ASC =>
              ordering = TimeShiftCurator.ascOrdering
            case _ =>
          }
          Option(metricAlias + TimeShiftCurator.PCT_CHANGE_STRING)
        } else {
          injectableColumns.find(_.alias.contains(TimeShiftCurator.PCT_CHANGE_STRING)).map(_.alias)
        }
      } else {
        injectableColumns.find(_.alias.contains(TimeShiftCurator.PCT_CHANGE_STRING)).map(_.alias)
      }
    }
    val sortedList = {
      if(sortByAliasOption.isDefined && aliasMap.contains(sortByAliasOption.get)) {
        val sortByAlias = sortByAliasOption.get
        val pos = aliasMap(sortByAlias)
        unsortedRows.sortBy {
          row =>
            val value = row.getValue(pos)
            if(value != null && value.isInstanceOf[Double]) {
              value.asInstanceOf[Double]
            } else {
              0D
            }
        }(ordering)
      } else {
        unsortedRows
      }
    }
    new DerivedRowList(columns
      , sortedList = sortedList
      , drivingQuery = defaultWindowResult.queryChain.drivingQuery
    )
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy