com.google.common.math.StatsAccumulator Maven / Gradle / Ivy
/*
* Copyright (C) 2012 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.common.math;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.math.DoubleUtils.ensureNonNegative;
import static com.google.common.primitives.Doubles.isFinite;
import static java.lang.Double.NaN;
import static java.lang.Double.isNaN;
import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import java.util.Iterator;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
/**
* A mutable object which accumulates double values and tracks some basic statistics over all the
* values added so far. The values may be added singly or in groups. This class is not thread safe.
*
* @author Pete Gillin
* @author Kevin Bourrillion
* @since 20.0
*/
@Beta
@GwtIncompatible
@ElementTypesAreNonnullByDefault
public final class StatsAccumulator {
// These fields must satisfy the requirements of Stats' constructor as well as those of the stat
// methods of this class.
private long count = 0;
private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
private double sumOfSquaresOfDeltas = 0.0;
private double min = NaN; // any value will do
private double max = NaN; // any value will do
/** Adds the given value to the dataset. */
public void add(double value) {
if (count == 0) {
count = 1;
mean = value;
min = value;
max = value;
if (!isFinite(value)) {
sumOfSquaresOfDeltas = NaN;
}
} else {
count++;
if (isFinite(value) && isFinite(mean)) {
// Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
double delta = value - mean;
mean += delta / count;
sumOfSquaresOfDeltas += delta * (value - mean);
} else {
mean = calculateNewMeanNonFinite(mean, value);
sumOfSquaresOfDeltas = NaN;
}
min = Math.min(min, value);
max = Math.max(max, value);
}
}
/**
* Adds the given values to the dataset.
*
* @param values a series of values, which will be converted to {@code double} values (this may
* cause loss of precision)
*/
public void addAll(Iterable extends Number> values) {
for (Number value : values) {
add(value.doubleValue());
}
}
/**
* Adds the given values to the dataset.
*
* @param values a series of values, which will be converted to {@code double} values (this may
* cause loss of precision)
*/
public void addAll(Iterator extends Number> values) {
while (values.hasNext()) {
add(values.next().doubleValue());
}
}
/**
* Adds the given values to the dataset.
*
* @param values a series of values
*/
public void addAll(double... values) {
for (double value : values) {
add(value);
}
}
/**
* Adds the given values to the dataset.
*
* @param values a series of values
*/
public void addAll(int... values) {
for (int value : values) {
add(value);
}
}
/**
* Adds the given values to the dataset.
*
* @param values a series of values, which will be converted to {@code double} values (this may
* cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
*/
public void addAll(long... values) {
for (long value : values) {
add(value);
}
}
/**
* Adds the given values to the dataset. The stream will be completely consumed by this method.
*
* @param values a series of values
* @since 28.2
*/
public void addAll(DoubleStream values) {
addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
}
/**
* Adds the given values to the dataset. The stream will be completely consumed by this method.
*
* @param values a series of values
* @since 28.2
*/
public void addAll(IntStream values) {
addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
}
/**
* Adds the given values to the dataset. The stream will be completely consumed by this method.
*
* @param values a series of values, which will be converted to {@code double} values (this may
* cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
* @since 28.2
*/
public void addAll(LongStream values) {
addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
}
/**
* Adds the given statistics to the dataset, as if the individual values used to compute the
* statistics had been added directly.
*/
public void addAll(Stats values) {
if (values.count() == 0) {
return;
}
merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
}
/**
* Adds the given statistics to the dataset, as if the individual values used to compute the
* statistics had been added directly.
*
* @since 28.2
*/
public void addAll(StatsAccumulator values) {
if (values.count() == 0) {
return;
}
merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
}
private void merge(
long otherCount,
double otherMean,
double otherSumOfSquaresOfDeltas,
double otherMin,
double otherMax) {
if (count == 0) {
count = otherCount;
mean = otherMean;
sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
min = otherMin;
max = otherMax;
} else {
count += otherCount;
if (isFinite(mean) && isFinite(otherMean)) {
// This is a generalized version of the calculation in add(double) above.
double delta = otherMean - mean;
mean += delta * otherCount / count;
sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
} else {
mean = calculateNewMeanNonFinite(mean, otherMean);
sumOfSquaresOfDeltas = NaN;
}
min = Math.min(min, otherMin);
max = Math.max(max, otherMax);
}
}
/** Returns an immutable snapshot of the current statistics. */
public Stats snapshot() {
return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
}
/** Returns the number of values. */
public long count() {
return count;
}
/**
* Returns the arithmetic mean of the
* values. The count must be non-zero.
*
* If these values are a sample drawn from a population, this is also an unbiased estimator of
* the arithmetic mean of the population.
*
*
Non-finite values
*
* If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
* contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
* result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
* only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
* If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
* Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
*
* @throws IllegalStateException if the dataset is empty
*/
public double mean() {
checkState(count != 0);
return mean;
}
/**
* Returns the sum of the values.
*
*
Non-finite values
*
* If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
* contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
* result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
* only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
* If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
* Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
*/
public final double sum() {
return mean * count;
}
/**
* Returns the population
* variance of the values. The count must be non-zero.
*
*
This is guaranteed to return zero if the dataset contains only exactly one finite value. It
* is not guaranteed to return zero when the dataset consists of the same value multiple times,
* due to numerical errors. However, it is guaranteed never to return a negative result.
*
*
Non-finite values
*
* If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
* Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
*
* @throws IllegalStateException if the dataset is empty
*/
public final double populationVariance() {
checkState(count != 0);
if (isNaN(sumOfSquaresOfDeltas)) {
return NaN;
}
if (count == 1) {
return 0.0;
}
return ensureNonNegative(sumOfSquaresOfDeltas) / count;
}
/**
* Returns the
* population standard deviation of the values. The count must be non-zero.
*
*
This is guaranteed to return zero if the dataset contains only exactly one finite value. It
* is not guaranteed to return zero when the dataset consists of the same value multiple times,
* due to numerical errors. However, it is guaranteed never to return a negative result.
*
*
Non-finite values
*
* If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
* Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
*
* @throws IllegalStateException if the dataset is empty
*/
public final double populationStandardDeviation() {
return Math.sqrt(populationVariance());
}
/**
* Returns the unbiased sample
* variance of the values. If this dataset is a sample drawn from a population, this is an
* unbiased estimator of the population variance of the population. The count must be greater than
* one.
*
*
This is not guaranteed to return zero when the dataset consists of the same value multiple
* times, due to numerical errors. However, it is guaranteed never to return a negative result.
*
*
Non-finite values
*
* If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
* Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
*
* @throws IllegalStateException if the dataset is empty or contains a single value
*/
public final double sampleVariance() {
checkState(count > 1);
if (isNaN(sumOfSquaresOfDeltas)) {
return NaN;
}
return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
}
/**
* Returns the
* corrected sample standard deviation of the values. If this dataset is a sample drawn from a
* population, this is an estimator of the population standard deviation of the population which
* is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
* the distribution). The count must be greater than one.
*
*
This is not guaranteed to return zero when the dataset consists of the same value multiple
* times, due to numerical errors. However, it is guaranteed never to return a negative result.
*
*
Non-finite values
*
* If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
* Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
*
* @throws IllegalStateException if the dataset is empty or contains a single value
*/
public final double sampleStandardDeviation() {
return Math.sqrt(sampleVariance());
}
/**
* Returns the lowest value in the dataset. The count must be non-zero.
*
*
Non-finite values
*
* If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
* contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
* Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
* only then the result is the lowest finite value. If it contains {@link
* Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
*
* @throws IllegalStateException if the dataset is empty
*/
public double min() {
checkState(count != 0);
return min;
}
/**
* Returns the highest value in the dataset. The count must be non-zero.
*
*
Non-finite values
*
* If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
* contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
* Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
* only then the result is the highest finite value. If it contains {@link
* Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
*
* @throws IllegalStateException if the dataset is empty
*/
public double max() {
checkState(count != 0);
return max;
}
double sumOfSquaresOfDeltas() {
return sumOfSquaresOfDeltas;
}
/**
* Calculates the new value for the accumulated mean when a value is added, in the case where at
* least one of the previous mean and the value is non-finite.
*/
static double calculateNewMeanNonFinite(double previousMean, double value) {
/*
* Desired behaviour is to match the results of applying the naive mean formula. In particular,
* the update formula can subtract infinities in cases where the naive formula would add them.
*
* Consequently:
* 1. If the previous mean is finite and the new value is non-finite then the new mean is that
* value (whether it is NaN or infinity).
* 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
* (whether it is NaN or infinity).
* 3. If both the previous mean and the new value are non-finite and...
* 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
* 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
* 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
*/
if (isFinite(previousMean)) {
// This is case 1.
return value;
} else if (isFinite(value) || previousMean == value) {
// This is case 2. or 3b.
return previousMean;
} else {
// This is case 3a. or 3c.
return NaN;
}
}
}