jasima.core.statistics.SummaryStat Maven / Gradle / Ivy
/*******************************************************************************
* This file is part of jasima, v1.3, the Java simulator for manufacturing and
* logistics.
*
* Copyright (c) 2015 jasima solutions UG
* Copyright (c) 2010-2015 Torsten Hildebrandt and jasima contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*******************************************************************************/
package jasima.core.statistics;
import java.io.Serializable;
import org.apache.commons.math3.distribution.TDistribution;
/**
* Class to collect the most important statistics without having to store all
* values encountered. It can return mean, standard deviation, variance, min,
* max etc. in O(1) time. Values are passed by calling the
* {@link #value(double)} method. Values can be weighted, just call
* {@link #value(double, double)} instead.
*
* In other simulation packages this is sometimes called "tally".
*
* This implementation is based on: D. H. D. West (1979). Communications of the
* ACM, 22, 9, 532-535: Updating Mean and Variance Estimates: An Improved Method
*
* @author Torsten Hildebrandt
* @version
* "$Id: SummaryStat.java 753 2015-07-27 15:29:49Z [email protected] $"
*/
public class SummaryStat implements Serializable, Cloneable {
private static final long serialVersionUID = 817115058373461360L;
protected static final double DEF_ERROR_PROB = 0.05;
private String name;
private double meanEst, varEst;
private double weightSum;
private int numObs;
private double max;
private double min;
protected double lastValue, lastWeight;
public SummaryStat() {
this((String) null);
}
public SummaryStat(String name) {
super();
clear();
setName(name);
}
/**
* Create a new SummaryStat-object initialized with the values of "vs". Copy
* constructor.
*/
public SummaryStat(SummaryStat vs) {
this(vs.name);
meanEst = vs.meanEst;
varEst = vs.varEst;
weightSum = vs.weightSum;
numObs = vs.numObs;
max = vs.max;
min = vs.min;
lastValue = vs.lastValue;
lastWeight = vs.lastWeight;
}
/**
* Resets this object.
*/
public void clear() {
meanEst = 0.0;
varEst = 0.0d;
numObs = 0;
weightSum = 0.0d;
min = Double.POSITIVE_INFINITY;
max = Double.NEGATIVE_INFINITY;
lastValue = Double.NaN;
lastWeight = Double.NaN;
}
/**
* Convenience method to add all values given as arguments with a weight of
* 1.
*
* @param vs
* The values to add.
* @return {@code this}, to allow easy chaining of calls.
*/
public SummaryStat values(double... vs) {
for (double v : vs) {
value(v);
}
return this;
}
/**
* Adds the given value with a weight of 1.
*
* @param v
* The value to add.
* @return {@code this}, to allow easy chaining of calls.
*/
public SummaryStat value(double v) {
return value(v, 1.0d);
}
/**
* Adds a value with a given weight.
*
* @param v
* The value to add.
* @param weight
* The weight to give to this value. Has to be positive.
* @return {@code this}, to allow easy chaining of calls.
* @throws IllegalArgumentException
* If weight was negative.
*/
public SummaryStat value(double v, double weight)
throws IllegalArgumentException {
if (!(weight >= 0.0d))
throw new IllegalArgumentException("Weight can't be negative. "
+ weight);
lastValue = v;
lastWeight = weight;
numObs++;
if (v < min)
min = v;
if (v > max)
max = v;
double oldSum = weightSum;
weightSum += weight;
double q = v - meanEst;
double r = weightSum == 0.0 ? 0.0 : q * weight / weightSum;
meanEst += r;
varEst += r * oldSum * q;
return this;
}
/**
* Returns the mean of all values given to {@link #value(double)}.
*
* @return The arithmetic mean of all values seen so far.
*/
public double mean() {
if (numObs < 1)
return Double.NaN;
return meanEst;
}
/**
* The standard deviation of all values.
*
* @return The standard deviation of all values given to
* {@link #value(double)}.
*/
public double stdDev() {
return Math.sqrt(variance());
}
/**
* Returns the sample variance of the values.
*
* @return The (sample) variance of all values given to
* {@link #value(double)}. Returns NaN, if no values were added yet.
*/
public double variance() {
if (numObs < 1)
return Double.NaN;
if (numObs == 1)
return 0.0;
if (weightSum <= 1.0)
throw new IllegalStateException("weight sum is <=1.0: " + weightSum);
return varEst / (weightSum - 1.0);
}
/**
* Returns the population variance of the values.
*
* @return The (sample) variance of all values given to
* {@link #value(double)}. Returns NaN, if no values were added yet.
*/
public double variancePopulation() {
if (numObs < 1)
return Double.NaN;
if (numObs == 1)
return 0.0;
return varEst / weightSum;
}
/**
* Returns the coefficient of variation ({@link #stdDev()} divided by
* {@link #mean()}).
*
* @return The coefficient of variation.
* */
public double varCoeff() {
return stdDev() / mean();
}
/**
* Returns the sum of all {@link #value(double)}s (taking into account
* potential weights if {@link #value(double, double)} is used).
*
* @return The sum of all values.
*/
public double sum() {
if (numObs < 1)
return Double.NaN;
return meanEst * weightSum;
}
/**
* Returns the sum of all weights. If only {@link #value(double)} is used,
* then the value returned is identical to the value returned by
* {@link #numObs}.
*
* @return The weight sum.
*/
public double weightSum() {
if (numObs == 0)
return Double.NaN;
return weightSum;
}
/**
* Returns the number of times, {@link #value(double)} or
* {@link #value(double, double)} were called.
*
* @return The number of calls to {@link #value(double)} or
* {@link #value(double, double)}.
*/
public int numObs() {
return numObs;
}
/**
* Returns the minimum value seen so far.
*
* @return The minimum value seen so far, or NaN, if no values were given so
* far.
*/
public double min() {
if (numObs < 1)
return Double.NaN;
return min;
}
/**
* Returns the maximum value seen so far.
*
* @return The maximum value seen so far, or NaN, if no values were given so
* far.
*/
public double max() {
if (numObs < 1)
return Double.NaN;
return max;
}
/**
* Combines the data in {@code other} with this SummaryStat-Object. The
* combined object behaves as if it had also seen the data of "other".
*
* @param other
* The {@link SummaryStat} to combine with this object.
* @return Returns {@code this} to allow easy chaining of calls.
*/
public SummaryStat combine(SummaryStat other) {
double ws = weightSum + other.weightSum;
double delta = other.meanEst - meanEst;
meanEst = (meanEst * weightSum + other.meanEst * other.weightSum) / ws;
varEst = varEst + other.varEst + delta * delta * weightSum
* other.weightSum / ws;
weightSum = ws;
numObs += other.numObs;
if (other.max > max)
max = other.max;
if (other.min < min)
min = other.min;
lastValue = other.lastValue;
lastWeight = other.lastWeight;
return this;
}
/**
* Clones this object. We can use the standard functionality here, as there
* are only primitive fields.
*
* @return A clone of this {@link SummaryStat}.
*/
public SummaryStat clone() throws CloneNotSupportedException {
return (SummaryStat) super.clone();
}
/**
* @return lower value of a confidence interval with a 0.95-confidence level
*/
public double confidenceIntervalLower() {
return confidenceIntervalLower(DEF_ERROR_PROB);
}
public double confidenceIntervalUpper() {
return confidenceIntervalUpper(DEF_ERROR_PROB);
}
public double confidenceIntervalLower(double errorProb) {
return mean() - confIntRangeSingle(errorProb);
}
public double confidenceIntervalUpper(double errorProb) {
return mean() + confIntRangeSingle(errorProb);
}
// TODO: confidence interval calculation should be factored out
public double confIntRangeSingle(double errorProb) {
if (numObs <= 2)
return Double.NaN;
double deg = weightSum() - 1.0d;
TDistribution dist = new TDistribution(deg);
return Math.abs(dist.inverseCumulativeProbability(errorProb * 0.5d))
* Math.sqrt(variance() / weightSum());
}
/**
* Returns the last value passed to {@link #value(double)} or
* {@link #value(double, double)}.
*
* @return The last value, or NaN if no {@code numObs==0}.
*/
public double lastValue() {
if (numObs == 0)
return Double.NaN;
return lastValue;
}
/**
* Returns the weight of the last value passed to {@link #value(double)} or
* {@link #value(double, double)}.
*
* @return The last value's weight, or NaN if no {@code numObs==0}.
*/
public double lastWeight() {
if (numObs == 0)
return Double.NaN;
return lastWeight;
}
/**
* Sets a descriptive name for this object.
*
* @param name
* A name for this {@code SummaryStat}.
*/
public void setName(String name) {
this.name = name;
}
/**
* Returns the name of this object.
*
* @return The name for this {@code SummaryStat}.
*/
public String getName() {
return name;
}
// ************* static utility methods *************
/**
* This method creates a new {@code SummaryStat} object and passes all
* values to it.
*
* @param values
* The values to use.
* @return A {@code SummaryStat} summarizing the values.
*/
public static SummaryStat summarize(double... values) {
return new SummaryStat().values(values);
}
/**
* This method creates a new {@code SummaryStat} object and passes all
* values to it.
*
* @param values
* The values to use.
* @return A {@code SummaryStat} summarizing the values.
*/
public static SummaryStat summarize(int... values) {
SummaryStat res = new SummaryStat();
for (int v : values) {
res.value(v);
}
return res;
}
/**
* Creates a new {@code SummaryStat} object that behaves if all values seen
* by {@code stats1} and {@code stats2} would have been passed to it.
*
* @param stats1
* {@code SummaryStat} summarizing first set of values.
* @param stats2
* {@code SummaryStat} summarizing second set of values.
* @return New {@code SummaryStat} object summarizing the union of first and
* second value set.
*/
public static SummaryStat combine(SummaryStat stats1, SummaryStat stats2) {
return new SummaryStat(stats1).combine(stats2);
}
}