All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.stripe.brushfire.Errors.scala Maven / Gradle / Ivy

package com.stripe.brushfire

import com.twitter.algebird._

/**
 * FrequencyError sets up the most common case when dealing
 * with discrete distributions:
 * - compute and sum errors separately for each component of the actual distribution
 * - provide a zero for when predictions or actuals are missing
 */
trait FrequencyError[L, M, E] extends Error[Map[L, M], Map[L, Double], E] {

  val semigroup = monoid

  def monoid: Monoid[E]

  def create(actual: Map[L, M], predicted: Map[L, Double]) = {
    if (predicted.isEmpty)
      monoid.zero
    else {
      monoid.sum(actual.map { case (label, count) => error(label, count, predicted) })
    }
  }

  def error(label: L, count: M, probabilities: Map[L, Double]): E
}

case class BrierScoreError[L, M](implicit num: Numeric[M])
    extends FrequencyError[L, M, AveragedValue] {
  lazy val monoid = AveragedValue.group

  def error(label: L, count: M, probabilities: Map[L, Double]): AveragedValue = {
    val differences = Group.minus(Map(label -> 1.0), probabilities)
    val sumSquareDifferences = differences.values.map { math.pow(_, 2) }.sum
    AveragedValue(num.toLong(count), sumSquareDifferences / math.max(differences.size, 1L))
  }
}

case class BinnedBinaryError[M: Monoid]()
    extends FrequencyError[Boolean, M, Map[Int, (M, M)]] {
  lazy val monoid = implicitly[Monoid[Map[Int, (M, M)]]]

  private def percentage(p: Double) = (p * 100).toInt

  def error(label: Boolean, count: M, probabilities: Map[Boolean, Double]) = {
    val tuple = if (label) (count, Monoid.zero[M]) else (Monoid.zero[M], count)
    Map(percentage(probabilities.getOrElse(true, 0.0)) -> tuple)
  }

  def thresholds(err: Map[Int, (M,M)])(implicit num: Numeric[M]): List[(Int, ConfusionMatrix)] =
    err.keys.toList.sorted.map{threshold =>
      threshold -> ConfusionMatrix(
        err.filter{_._1 >= threshold}.map{x => num.toDouble(x._2._1)}.sum,
        err.filter{_._1 < threshold}.map{x => num.toDouble(x._2._2)}.sum,
        err.filter{_._1 >= threshold}.map{x => num.toDouble(x._2._2)}.sum,
        err.filter{_._1 < threshold}.map{x => num.toDouble(x._2._1)}.sum)
    }

  def auc(err: Map[Int, (M, M)])(implicit num: Numeric[M]) =
    thresholds(err).map{_._2}.reverse.sliding(2,1).map{cms =>
      val cm1 = cms(0)
      val cm2 = cms(1)
      (cm2.falsePositiveRate - cm1.falsePositiveRate) *
      (cm1.truePositiveRate + cm2.truePositiveRate)
    }.sum / 2.0
}

case class AccuracyError[L, M](implicit m: Monoid[M])
    extends FrequencyError[L, M, (M, M)] {

  lazy val monoid = implicitly[Monoid[(M, M)]]

  def error(label: L, count: M, probabilities: Map[L, Double]) = {
    val mode = probabilities.maxBy { _._2 }._1
    if (mode == label)
      (count, m.zero)
    else
      (m.zero, count)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy