All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.scalameter.reporting.RegressionReporter.scala Maven / Gradle / Ivy

The newest version!
package org.scalameter
package reporting



import java.util.Date
import scala.collection._
import scala.Numeric.Implicits._
import org.scalameter.utils.Tree
import org.scalameter.utils.Statistics._



case class RegressionReporter[T: Numeric](
  test: RegressionReporter.Tester,
  historian: RegressionReporter.Historian
) extends Reporter[T] {

  private val historyCache = mutable.Map[Context, History[T]]()

  def loadHistory(ctx: Context, persistor: Persistor) = historyCache.get(ctx) match {
    case Some(h) => h
    case None =>
      val h = persistor.load[T](ctx)
      historyCache.put(ctx, h)
      h
  }

  // do nothing - data are persisted at the end
  def report(curvedata: CurveData[T], persistor: Persistor): Unit = {
    log(
      s"Finished test set for ${curvedata.context.scope}, curve ${curvedata.context.curve}")
  }

  def report(results: Tree[CurveData[T]], persistor: Persistor) = {
    log.report("")
    log.report(s"${ansi.green}:::Summary of regression test results - $test:::${ansi.reset}")

    val currentDate = new Date
    val oks = for {
      (context, curves) <- results.scopes
      if curves.nonEmpty
    } yield {
      log.report(s"${ansi.green}Test group: ${context.scope}${ansi.reset}")

      val testedcurves = for (curvedata <- curves) yield {
        val history = loadHistory(curvedata.context, persistor)
        val corresponding =
          if (history.curves.nonEmpty) history.curves else Seq(curvedata)

        val testedcurve = test(context, curvedata, corresponding)

        val newhistory =
          historian.bookkeep(curvedata.context, history, testedcurve, currentDate)
        persistor.save(curvedata.context, newhistory)

        testedcurve
      }

      log.report("")

      val allpassed = testedcurves.forall(_.success)
      if (allpassed)
        events.emit(Event(context.scope, "Test succeeded", Events.Success, null))
      else events.emit(Event(context.scope, "Test failed", Events.Failure, null))

      allpassed
    }

    val failure = oks.count(_ == false)
    val success = oks.count(_ == true)
    val color = if (failure == 0) ansi.green else ansi.red
    log.report(s"${color}Summary: $success tests passed, $failure tests failed.${ansi.reset}")

    failure == 0
  }

}


object RegressionReporter {

  import Key._

  /** Represents a policy for adding the newest result to the history of results.
   */
  trait Historian {
    /** Given an old history and the latest curve and its date, returns a new history,
     *  possibly discarding some of the entries.
     */
    def bookkeep[T](ctx: Context, h: History[T], newest: CurveData[T], d: Date):
      History[T]
  }

  object Historian {

    /** Preserves all historic results.
     */
    case class Complete() extends Historian {
      def bookkeep[T](ctx: Context, h: History[T], newest: CurveData[T], d: Date) =
        History(h.results :+ ((d, ctx, newest)))
    }

    /** Preserves only last `size` results.
     */
    case class Window(size: Int) extends Historian {
      def bookkeep[T](ctx: Context, h: History[T], newest: CurveData[T], d: Date) = {
        val newseries = h.results :+ ((d, ctx, newest))
        val prunedhistory = h.copy(results = newseries.takeRight(size))
        prunedhistory
      }
    }

    /** Implements a checkpointing strategy such that the number of preserved results
     *  decreases exponentially with the age of the result.
     */
    case class ExponentialBackoff() extends Historian {

      def push[T](series: Seq[History.Entry[T]], indices: Seq[Long],
        newest: History.Entry[T]): History[T] = {
        val entries = series.reverse zip indices
        val sizes = Stream.from(0).map(1L << _).scanLeft(0L)(_ + _)
        val buckets = sizes zip sizes.tail
        val bucketed = buckets map {
          case (from, to) => entries filter {
            case (_, idx) => from < idx && idx <= to
          }
        }
        val pruned = bucketed takeWhile { _.nonEmpty } map { elems =>
          val (last, lastidx) = elems.last
          (last, lastidx + 1)
        }
        val (newentries, newindices) = pruned.unzip

        History(
          newentries.toBuffer.reverse :+ newest,
          immutable.Map(reports.regression.timeIndices -> (1L +: newindices.toBuffer)))
      }

      def push[T](h: History[T], newest: History.Entry[T]): History[T] = {
        log.verbose("Pushing to history with info: " + h.infomap)

        val indices = h.info[Seq[Long]](
          reports.regression.timeIndices,
          (0 until h.results.length) map { 1L << _ })
        val newhistory = push(h.results, indices, newest)

        log.verbose("New history info: " + newhistory.infomap)

        newhistory
      }

      def bookkeep[T](ctx: Context, h: History[T], newest: CurveData[T], d: Date) =
        push(h, (d, ctx, newest))
    }

  }

  /** Performance regression testing mechanism.
   */
  trait Tester {
    /** Given a test performed in a specific `context`, the latest curve (set of
     *  measurements) `curvedata` and previous curves (sets of measurements) for this
     *  test `corresponding`, yields a new version of the latest curve, such that if any
     *  of the tests fail, the new sequence of curves will have the `success` field set
     *  to `false` for those measurements that are considered to fail the test.
     */
    def apply[T: Numeric](
      context: Context, curvedata: CurveData[T], corresponding: Seq[CurveData[T]]
    ): CurveData[T]

    /** Returns a confidence interval for a given set of observations.
     */
    def confidenceInterval[T: Numeric](ctx: Context, alt: Seq[T]): (Double, Double) =
      sys.error("Confidence intervals can only be computed by testers which use them.")
  }

  object Tester {

    /** Accepts any test result.
     */
    case class Accepter() extends Tester {
      def cistr(ci: (Double, Double), units: String) =
        f"<${ci._1}%.2f $units, ${ci._2}%.2f $units>"

      def apply[T: Numeric](context: Context, curvedata: CurveData[T],
        corresponding: Seq[CurveData[T]]): CurveData[T] = {
        log.report(
          s"${ansi.green}- ${context.scope}.${curvedata.context.curve} measurements:${ansi.reset}")

        for (measurement <- curvedata.measurements) {
          val color = ansi.green
          val passed = "passed"
          val mean = measurement.complete.sum.toDouble / measurement.complete.size
          val means = f"$mean%.2f ${measurement.units}"
          val ci = confidenceInterval(context, measurement.complete)
          val cis = cistr(ci, measurement.units)
          val sig = context(reports.regression.significance)
          log.report(
            s"$color  - at ${measurement.params.axisData.mkString(", ")}: $passed${ansi.reset}")
          log.report(
            s"$color    (mean = $means, ci = $cis, significance = $sig)${ansi.reset}")
        }

        curvedata
      }

      override def confidenceInterval[T: Numeric](context: Context,
        alt: Seq[T]): (Double, Double) = {
        val significance = context(reports.regression.significance)

        val citest = ConfidenceIntervalTest(true, alt.map(_.toDouble),
          alt.map(_.toDouble), significance)
        citest.ci1
      }
    }

    /** Applies analysis of variance to determine whether some test is statistically different.
     */
    case class ANOVA() extends Tester {
      def apply[T: Numeric](context: Context, curvedata: CurveData[T],
        corresponding: Seq[CurveData[T]]): CurveData[T] = {
        log(s"${ansi.green}- ${context.scope}.${curvedata.context.curve} measurements:${ansi.reset}")

        val significance = curvedata.context(reports.regression.significance)
        val allmeasurements = (corresponding :+ curvedata) map (_.measurements)
        val measurementtable = allmeasurements.flatten.groupBy(_.params)
        val testedmeasurements = for {
          measurement <- curvedata.measurements.sorted
        } yield {
          val units = measurement.units
          val alternatives = measurementtable(measurement.params).filter(_.success).map(_.complete)
          try {
            val ftest = ANOVAFTest(alternatives.map(_.map(_.toDouble)), significance)
            val color = if (ftest) ansi.green else ansi.red
            val passed = if (ftest) "passed" else "failed"

            log.report(
              s"$color  - at ${measurement.params.axisData.mkString(", ")}, ${alternatives.size} alternatives: $passed${ansi.reset}")
            log.report(
              f"$color    (SSA: ${ftest.ssa}%.2f, SSE: ${ftest.sse}%.2f, F: ${ftest.F}%.2f, qf: ${ftest.quantile}%.2f, significance: $significance)${ansi.reset}")
            if (!ftest) {
              def logalt(a: Seq[T], units: String) =
                log.report(
                  s"$color      ${a.map(_.toString + units).mkString(", ")}${ansi.reset}")
              log.report(s"$color    History:")
              for (a <- alternatives.init) logalt(a, units)
              log.report(s"$color    Latest:")
              logalt(alternatives.last, units)
            }

            if (ftest.passed) measurement else measurement.failed
          } catch {
            case e: Exception =>
              log.report(s"${ansi.red}    Error in ANOVA F-test: ${e.getMessage}${ansi.reset}")
              measurement.failed
          }
        }
        val newcurvedata = curvedata.copy(measurements = testedmeasurements)

        newcurvedata
      }
    }

    case class ConfidenceIntervals(strict: Boolean = false) extends Tester {
      import scala.Numeric.Implicits._

      def cistr(ci: (Double, Double), units: String) = f"<${ci._1}%.2f $units, ${ci._2}%.2f $units>"

      def single[T: Numeric](previous: Measurement[T],
        latest: Measurement[T], sig: Double): Measurement[T] = {
        try {
          val citest = ConfidenceIntervalTest(strict, previous.complete.map(_.toDouble),
            latest.complete.map(_.toDouble), sig)
          val units = latest.units
          
          if (!citest) {
            val color = ansi.red
            val ciprev = cistr(citest.ci1, units)
            val cilate = cistr(citest.ci2, units)
            val prevform = previous.complete.map(v => f"${v.toDouble}%.2f")
            val lateform = latest.complete.map(v => f"${v.toDouble}%.2f")
            log.error(
              f"$color      Failed confidence interval test: <${citest.ci._1}%.2f $units, ${citest.ci._2}%.2f $units> ${ansi.reset}\n" +
              f"$color      Previous (mean = ${citest.m1}%.2f $units, stdev = ${citest.s1}%.2f $units, ci = $ciprev): ${prevform.mkString(", ")}${ansi.reset}\n" +
              f"$color      Latest   (mean = ${citest.m2}%.2f $units, stdev = ${citest.s2}%.2f $units, ci = $cilate): ${lateform.mkString(", ")}${ansi.reset}"
            )
            latest.failed
          } else latest
        } catch {
          case e: Exception =>
            log.error(s"${ansi.red}    Error in confidence interval test: ${e.getMessage}${ansi.reset}")
            latest.failed
        }
      }

      def multiple[T: Numeric](context: Context, previouss: Seq[Measurement[T]],
        latest: Measurement[T]): Measurement[T] = {
        val sig = context(reports.regression.significance)
        val tests = for (previous <- previouss if previous.success) yield single(previous, latest, sig)
        val allpass = tests.forall(_.success)
        val color = if (allpass) ansi.green else ansi.red
        val passed = if (allpass) "passed" else "failed"
        val ci = confidenceInterval(context, latest.complete.map(_.toDouble))
        val cis = cistr(ci, latest.units)
        log.report(
          s"$color  - at ${latest.params.axisData.mkString(", ")}, ${previouss.size} alternatives: $passed${ansi.reset}")
        log.report(
          s"$color    (ci = $cis, significance = $sig)${ansi.reset}")
        tests.find(!_.success).getOrElse(latest)
      }

      def apply[T: Numeric](context: Context, curvedata: CurveData[T],
        corresponding: Seq[CurveData[T]]): CurveData[T] = {
        log.report(
          s"${ansi.green}- ${context.scope}.${curvedata.context.curve} measurements:${ansi.reset}")

        val previousmeasurements = corresponding map (_.measurements)
        val measurementtable = previousmeasurements.flatten.groupBy(_.params)
        val newmeasurements = for {
          measurement <- curvedata.measurements
        } yield {
          multiple(curvedata.context, measurementtable(measurement.params), measurement)
        }

        curvedata.copy(measurements = newmeasurements)
      }

      override def confidenceInterval[T: Numeric](
        context: Context,
        alt: Seq[T]
      ): (Double, Double) = {
        val significance = context(reports.regression.significance)

        val citest = ConfidenceIntervalTest(strict, alt.map(_.toDouble),
          alt.map(_.toDouble), significance)
        citest.ci1
      }
    }

    case class OverlapIntervals() extends Tester {
      import scala.Numeric.Implicits._

      def cistr(ci: (Double, Double), units: String) = f"<${ci._1}%.2f $units, ${ci._2}%.2f $units>"

      def single[T: Numeric](
        previous: Measurement[T],
        latest: Measurement[T],
        sig: Double, noiseMagnitude: Double
      ): Measurement[T] = {
        try {
          val citest = OverlapTest(previous.complete.map(_.toDouble),
            latest.complete.map(_.toDouble), sig, noiseMagnitude)
          val units = latest.units
          
          if (!citest) {
            val color = ansi.red
            val ciprev = cistr(citest.ci1, units)
            val cilate = cistr(citest.ci2, units)
            val prevform = previous.complete.map(v => f"${v.toDouble}%.2f")
            val lateform = latest.complete.map(v => f"${v.toDouble}%.2f")
            val msg = {
              f"$color      Failed overlap interval test. ${ansi.reset}\n" +
              f"$color      Previous (mean = ${citest.m1}%.2f $units, stdev = ${citest.s1}%.2f $units, ci = $ciprev): ${prevform.mkString(", ")}${ansi.reset}\n" +
              f"$color      Latest   (mean = ${citest.m2}%.2f $units, stdev = ${citest.s2}%.2f $units, ci = $cilate): ${lateform.mkString(", ")}${ansi.reset}"
            }
            log.error(msg)
            latest.failed
          } else latest
        } catch {
          case e: Exception =>
            log.error(s"${ansi.red}    Error in overlap interval test: ${e.getMessage}${ansi.reset}")
            latest.failed
        }
      }

      def multiple[T: Numeric](context: Context, previouss: Seq[Measurement[T]],
        latest: Measurement[T]): Measurement[T] = {
        val sig = context(reports.regression.significance)
        val noiseMagnitude = context(Key.reports.regression.noiseMagnitude)
        val tests = for (previous <- previouss if previous.success) yield single(previous, latest, sig, noiseMagnitude)
        val allpass = tests.forall(_.success)
        val color = if (allpass) ansi.green else ansi.red
        val passed = if (allpass) "passed" else "failed"
        val ci = confidenceInterval(context, latest.complete.map(_.toDouble))
        val cis = cistr(ci, latest.units)
        log.report(
          s"$color  - at ${latest.params.axisData.mkString(", ")}, ${previouss.size} alternatives: $passed${ansi.reset}")
        log.report(
          s"$color    (ci = $cis, significance = $sig)${ansi.reset}")
        tests.find(!_.success).getOrElse(latest)
      }

      def apply[T: Numeric](context: Context, curvedata: CurveData[T],
        corresponding: Seq[CurveData[T]]): CurveData[T] = {
        log.report(
          s"${ansi.green}- ${context.scope}.${curvedata.context.curve} measurements:${ansi.reset}")

        val previousmeasurements = corresponding map (_.measurements)
        val measurementtable = previousmeasurements.flatten.groupBy(_.params)
        val newmeasurements = for {
          measurement <- curvedata.measurements
        } yield {
          multiple(context, measurementtable(measurement.params), measurement)
        }

        curvedata.copy(measurements = newmeasurements)
      }

      override def confidenceInterval[T: Numeric](context: Context,
        alt: Seq[T]): (Double, Double) = {
        val significance = context(reports.regression.significance)
        val noisemag = context(Key.reports.regression.noiseMagnitude)

        val test = OverlapTest(
          alt.map(_.toDouble), alt.map(_.toDouble), significance, noisemag)
        test.ci1
      }
    }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy