All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.math.StatsAccumulator Maven / Gradle / Ivy

/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.math;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.math.DoubleUtils.ensureNonNegative;
import static com.google.common.primitives.Doubles.isFinite;
import static java.lang.Double.NaN;
import static java.lang.Double.isNaN;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import java.util.Iterator;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
import java.util.stream.LongStream;

/**
 * A mutable object which accumulates double values and tracks some basic statistics over all the
 * values added so far. The values may be added singly or in groups. This class is not thread safe.
 *
 * @author Pete Gillin
 * @author Kevin Bourrillion
 * @since 20.0
 */
@Beta
@GwtIncompatible
public final class StatsAccumulator {

  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
  // methods of this class.
  private long count = 0;
  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
  private double sumOfSquaresOfDeltas = 0.0;
  private double min = NaN; // any value will do
  private double max = NaN; // any value will do

  /** Adds the given value to the dataset. */
  public void add(double value) {
    if (count == 0) {
      count = 1;
      mean = value;
      min = value;
      max = value;
      if (!isFinite(value)) {
        sumOfSquaresOfDeltas = NaN;
      }
    } else {
      count++;
      if (isFinite(value) && isFinite(mean)) {
        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
        double delta = value - mean;
        mean += delta / count;
        sumOfSquaresOfDeltas += delta * (value - mean);
      } else {
        mean = calculateNewMeanNonFinite(mean, value);
        sumOfSquaresOfDeltas = NaN;
      }
      min = Math.min(min, value);
      max = Math.max(max, value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision)
   */
  public void addAll(Iterable values) {
    for (Number value : values) {
      add(value.doubleValue());
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision)
   */
  public void addAll(Iterator values) {
    while (values.hasNext()) {
      add(values.next().doubleValue());
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values
   */
  public void addAll(double... values) {
    for (double value : values) {
      add(value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values
   */
  public void addAll(int... values) {
    for (int value : values) {
      add(value);
    }
  }

  /**
   * Adds the given values to the dataset.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
   */
  public void addAll(long... values) {
    for (long value : values) {
      add(value);
    }
  }

  /**
   * Adds the given values to the dataset. The stream will be completely consumed by this method.
   *
   * @param values a series of values
   * @since 28.2
   */
  public void addAll(DoubleStream values) {
    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
  }

  /**
   * Adds the given values to the dataset. The stream will be completely consumed by this method.
   *
   * @param values a series of values
   * @since 28.2
   */
  public void addAll(IntStream values) {
    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
  }

  /**
   * Adds the given values to the dataset. The stream will be completely consumed by this method.
   *
   * @param values a series of values, which will be converted to {@code double} values (this may
   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
   * @since 28.2
   */
  public void addAll(LongStream values) {
    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
  }

  /**
   * Adds the given statistics to the dataset, as if the individual values used to compute the
   * statistics had been added directly.
   */
  public void addAll(Stats values) {
    if (values.count() == 0) {
      return;
    }
    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
  }

  /**
   * Adds the given statistics to the dataset, as if the individual values used to compute the
   * statistics had been added directly.
   *
   * @since 28.2
   */
  public void addAll(StatsAccumulator values) {
    if (values.count() == 0) {
      return;
    }
    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
  }

  private void merge(
      long otherCount,
      double otherMean,
      double otherSumOfSquaresOfDeltas,
      double otherMin,
      double otherMax) {
    if (count == 0) {
      count = otherCount;
      mean = otherMean;
      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
      min = otherMin;
      max = otherMax;
    } else {
      count += otherCount;
      if (isFinite(mean) && isFinite(otherMean)) {
        // This is a generalized version of the calculation in add(double) above.
        double delta = otherMean - mean;
        mean += delta * otherCount / count;
        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
      } else {
        mean = calculateNewMeanNonFinite(mean, otherMean);
        sumOfSquaresOfDeltas = NaN;
      }
      min = Math.min(min, otherMin);
      max = Math.max(max, otherMax);
    }
  }

  /** Returns an immutable snapshot of the current statistics. */
  public Stats snapshot() {
    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
  }

  /** Returns the number of values. */
  public long count() {
    return count;
  }

  /**
   * Returns the arithmetic mean of the
   * values. The count must be non-zero.
   *
   * 

If these values are a sample drawn from a population, this is also an unbiased estimator of * the arithmetic mean of the population. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double mean() { checkState(count != 0); return mean; } /** * Returns the sum of the values. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. */ public final double sum() { return mean * count; } /** * Returns the population * variance of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. It * is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationVariance() { checkState(count != 0); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } if (count == 1) { return 0.0; } return ensureNonNegative(sumOfSquaresOfDeltas) / count; } /** * Returns the * population standard deviation of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. It * is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public final double populationStandardDeviation() { return Math.sqrt(populationVariance()); } /** * Returns the unbiased sample * variance of the values. If this dataset is a sample drawn from a population, this is an * unbiased estimator of the population variance of the population. The count must be greater than * one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleVariance() { checkState(count > 1); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); } /** * Returns the * corrected sample standard deviation of the values. If this dataset is a sample drawn from a * population, this is an estimator of the population standard deviation of the population which * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on * the distribution). The count must be greater than one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public final double sampleStandardDeviation() { return Math.sqrt(sampleVariance()); } /** * Returns the lowest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only then the result is the lowest finite value. If it contains {@link * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double min() { checkState(count != 0); return min; } /** * Returns the highest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values * only then the result is the highest finite value. If it contains {@link * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double max() { checkState(count != 0); return max; } double sumOfSquaresOfDeltas() { return sumOfSquaresOfDeltas; } /** * Calculates the new value for the accumulated mean when a value is added, in the case where at * least one of the previous mean and the value is non-finite. */ static double calculateNewMeanNonFinite(double previousMean, double value) { /* * Desired behaviour is to match the results of applying the naive mean formula. In particular, * the update formula can subtract infinities in cases where the naive formula would add them. * * Consequently: * 1. If the previous mean is finite and the new value is non-finite then the new mean is that * value (whether it is NaN or infinity). * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged * (whether it is NaN or infinity). * 3. If both the previous mean and the new value are non-finite and... * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. */ if (isFinite(previousMean)) { // This is case 1. return value; } else if (isFinite(value) || previousMean == value) { // This is case 2. or 3b. return previousMean; } else { // This is case 3a. or 3c. return NaN; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy