
io.citrine.lolo.stats.StatsUtils.scala Maven / Gradle / Ivy
package io.citrine.lolo.stats
import breeze.stats.distributions.{RandBasis, ThreadLocalRandomGenerator}
import io.citrine.random.Random
import org.apache.commons.math3.random.MersenneTwister
object StatsUtils {
/**
* Compute the range of the data.
*
* @param X to compute the range of
* @return the range, which is zero if there are no values
*/
def range(X: Seq[Double]): Double = {
if (X.nonEmpty) {
X.max - X.min
} else {
0.0
}
}
/** Compute the mean of a (weighted) vector, X */
def mean(X: Seq[Double], weights: Option[Seq[Double]] = None): Double = {
val actualWeights = weights.getOrElse(Seq.fill(X.length)(1.0))
val totalWeight = actualWeights.sum
require(totalWeight > 0.0, s"total weight must be positive, instead got $totalWeight")
X.zip(actualWeights).map { case (x, w) => x * w }.sum / totalWeight
}
/** Compute the variance of a (weighted) vector, X, with dof degrees of freedom. */
def variance(X: Seq[Double], weights: Option[Seq[Double]] = None, dof: Int = 0): Double = {
val actualWeights = weights.getOrElse(Seq.fill(X.length)(1.0))
val totalWeight = actualWeights.sum
require(dof >= 0, s"degrees of freedom must be non-negative, instead got $dof")
require(
totalWeight > dof,
s"Cannot compute variance on a sequence of weight $totalWeight with $dof degrees of freedom."
)
val mu = mean(X, Some(actualWeights))
X.zip(actualWeights).map { case (x, w) => math.pow(x - mu, 2.0) * w }.sum / (totalWeight - dof)
}
/** Compute the (weighted) covariance between two vectors, X and Y, of the same length. */
def covariance(X: Seq[Double], Y: Seq[Double], weights: Option[Seq[Double]] = None): Double = {
require(
X.length == Y.length,
s"Cannot compute covariance between sequences of different lengths (lengths are ${X.length} and ${Y.length})."
)
val actualWeights = weights.getOrElse(Seq.fill(X.length)(1.0))
val totalWeight = actualWeights.sum
val muX = mean(X, Some(actualWeights))
val muY = mean(Y, Some(actualWeights))
X.lazyZip(Y).lazyZip(actualWeights).map { case (x, y, w) => (x - muX) * (y - muY) * w }.sum / totalWeight
}
/** Compute the (weighted) correlation coefficient between two vectors, X and Y, of the same length. */
def correlation(X: Seq[Double], Y: Seq[Double], weights: Option[Seq[Double]] = None): Double = {
val actualWeights = weights.getOrElse(Seq.fill(X.length)(1.0))
val sigma2X = variance(X, Some(actualWeights), dof = 0)
val sigma2Y = variance(Y, Some(actualWeights), dof = 0)
if (sigma2X == 0 || sigma2Y == 0) return 0.0
covariance(X, Y, Some(actualWeights)) / math.sqrt(sigma2X * sigma2Y)
}
def breezeRandBasis(rng: Random): RandBasis =
new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(rng.nextLong())))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy