All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mgo.tools.Stats.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2018-02-22 Guillaume Chérel
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package mgo.tools

import org.apache.commons.math3.distribution.EnumeratedIntegerDistribution
import org.apache.commons.math3.distribution.MultivariateNormalDistribution
import org.apache.commons.math3.distribution.NormalDistribution
import org.apache.commons.math3.linear.LUDecomposition
import org.apache.commons.math3.linear.MatrixUtils
import org.apache.commons.math3.linear.RealMatrix
import org.apache.commons.math3.linear.RealVector
import org.apache.commons.math3.random.RandomGenerator
import org.apache.commons.math3.stat.correlation.Covariance
import scala.math._
import scala.util.Random
import scala.collection.Searching.search

object stats {

  def weightedCovariance(data: RealMatrix, weights: Array[Double]): RealMatrix = {
    val n = data.getRowDimension()
    val weightsSum = weights.sum
    val weightsSumSquared = pow(weightsSum, 2)
    val weightsSquaredSum = weights.map { pow(_, 2) }.sum
    val dataMean = MatrixUtils.createRealVector(
      data.transpose().operate(weights)).mapDivide(weightsSum)
    val dataCenteredWeighted = (0 to n - 1).map { i =>
      data.getRowVector(i).subtract(dataMean).mapMultiply(sqrt(weights(i)))
        .toArray
    }.toArray
    val cov = new Covariance(dataCenteredWeighted, false)
      .getCovarianceMatrix()
      .scalarMultiply(n.toDouble / weightsSum)
      // Compute the unbiased weighted variance
      .scalarMultiply(weightsSumSquared /
        (weightsSumSquared - weightsSquaredSum))
    cov
  }

  def weightedSample[T](n: Int, data: Vector[T], weights: Vector[Double])(implicit rng: Random): Vector[T] = {
    val cumul = weights.drop(1).scanLeft(weights(0)) {
      case (acc, x) => acc + x
    }
    val randoms = Vector.fill(n)(() => rng.nextDouble()).map(_())
    randoms.map { r => data(cumul.search(r).insertionPoint) }
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy