All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.math.StatsAccumulator Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.math;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.math.DoubleUtils.ensureNonNegative;
import static com.google.common.primitives.Doubles.isFinite;
import static java.lang.Double.NaN;
import static java.lang.Double.isNaN;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import java.util.Iterator;

/**
 * A mutable object which accumulates double values and tracks some basic statistics over all the
 * values added so far. The values may be added singly or in groups. This class is not thread safe.
 *
 * @author Pete Gillin
 * @author Kevin Bourrillion
 * @since 20.0
 */
@Beta
@GwtIncompatible
public final class StatsAccumulator {

  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
  // methods of this class.
  private long count = 0;
  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
  private double sumOfSquaresOfDeltas = 0.0;
  private double min = NaN; // any value will do
  private double max = NaN; // any value will do

  /**
   * Adds the given value to the dataset.
   */
  public void add(double value) {
    if (count == 0) {
      count = 1;
      mean = value;
      min = value;
      max = value;
      if (!isFinite(value)) {
        sumOfSquaresOfDeltas = NaN;
      }
    } else {
      count++;
      if (isFinite(value) && isFinite(mean)) {
        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
        double delta = value - mean;
        mean += delta / count;
        sumOfSquaresOfDeltas += delta * (value - mean);
      } else {
        mean = calculateNewMeanNonFinite(mean, value);
        sumOfSquaresOfDeltas = NaN;
      }
      min = Math.min(min, value);
      max = Math.max(max, value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision)
   */
  public void addAll(Iterable values) {
    for (Number value : values) {
      add(value.doubleValue());
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision)
   */
  public void addAll(Iterator values) {
    while (values.hasNext()) {
      add(values.next().doubleValue());
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values
   */
  public void addAll(double... values) {
    for (double value : values) {
      add(value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values
   */
  public void addAll(int... values) {
    for (int value : values) {
      add(value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
   */
  public void addAll(long... values) {
    for (long value : values) {
      add(value);
    }
  }

  /**
   * Adds the given statistics to the dataset, as if the individual values used to compute the
   * statistics had been added directly.
   */
  public void addAll(Stats values) {
    if (values.count() == 0) {
      return;
    }

    if (count == 0) {
      count = values.count();
      mean = values.mean();
      sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas();
      min = values.min();
      max = values.max();
    } else {
      count += values.count();
      if (isFinite(mean) && isFinite(values.mean())) {
        // This is a generalized version of the calculation in add(double) above.
        double delta = values.mean() - mean;
        mean += delta * values.count() / count;
        sumOfSquaresOfDeltas +=
            values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count();
      } else {
        mean = calculateNewMeanNonFinite(mean, values.mean());
        sumOfSquaresOfDeltas = NaN;
      }
      min = Math.min(min, values.min());
      max = Math.max(max, values.max());
    }
  }

  /**
   * Returns an immutable snapshot of the current statistics.
   */
  public Stats snapshot() {
    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
  }

  /**
   * Returns the number of values.
   */
  public long count() {
    return count;
  }

  /**
   * Returns the arithmetic mean of the
   * values. The count must be non-zero.
   *
   * 

If these values are a sample drawn from a population, this is also an unbiased estimator of * the arithmetic mean of the population. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double mean() { checkState(count != 0); return mean; } /** * Returns the sum of the values. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. */ public final double sum() { return mean * count; } /** * Returns the population * variance of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. * It is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationVariance() { checkState(count != 0); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } if (count == 1) { return 0.0; } return ensureNonNegative(sumOfSquaresOfDeltas) / count; } /** * Returns the * * population standard deviation of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. * It is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationStandardDeviation() { return Math.sqrt(populationVariance()); } /** * Returns the unbaised sample * variance of the values. If this dataset is a sample drawn from a population, this is an * unbiased estimator of the population variance of the population. The count must be greater than * one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleVariance() { checkState(count > 1); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); } /** * Returns the * * corrected sample standard deviation of the values. If this dataset is a sample drawn from a * population, this is an estimator of the population standard deviation of the population which * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on * the distribution). The count must be greater than one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleStandardDeviation() { return Math.sqrt(sampleVariance()); } /** * Returns the lowest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite * values only then the result is the lowest finite value. If it contains * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double min() { checkState(count != 0); return min; } /** * Returns the highest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite * values only then the result is the highest finite value. If it contains * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double max() { checkState(count != 0); return max; } double sumOfSquaresOfDeltas() { return sumOfSquaresOfDeltas; } /** * Calculates the new value for the accumulated mean when a value is added, in the case where at * least one of the previous mean and the value is non-finite. */ static double calculateNewMeanNonFinite(double previousMean, double value) { /* * Desired behaviour is to match the results of applying the naive mean formula. In particular, * the update formula can subtract infinities in cases where the naive formula would add them. * * Consequently: * 1. If the previous mean is finite and the new value is non-finite then the new mean is that * value (whether it is NaN or infinity). * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged * (whether it is NaN or infinity). * 3. If both the previous mean and the new value are non-finite and... * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. */ if (isFinite(previousMean)) { // This is case 1. return value; } else if (isFinite(value) || previousMean == value) { // This is case 2. or 3b. return previousMean; } else { // This is case 3a. or 3c. return NaN; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy