All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.math.Stats Maven / Gradle / Ivy

Go to download

This artifact provides a single jar that contains all classes required to use remote EJB and JMS, including all dependencies. It is intended for use by those not using maven, maven users should just import the EJB and JMS BOM's instead (shaded JAR's cause lots of problems with maven, as it is very easy to inadvertently end up with different versions on classes on the class path).

There is a newer version: 34.0.0.Final
Show newest version
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.math;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.math.DoubleUtils.ensureNonNegative;
import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
import static com.google.common.primitives.Doubles.isFinite;
import static java.lang.Double.NaN;
import static java.lang.Double.doubleToLongBits;
import static java.lang.Double.isNaN;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.base.MoreObjects;
import com.google.common.base.Objects;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Iterator;
import java.util.stream.Collector;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
import javax.annotation.CheckForNull;

/**
 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
 * forms of variance -- that were computed from a single set of zero or more floating-point values.
 *
 * 

There are two ways to obtain a {@code Stats} instance: * *

    *
  • If all the values you want to summarize are already known, use the appropriate {@code * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of * {@code Number}, and primitive varargs are supported. *
  • Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, * feed values to it as you get them, then call {@link StatsAccumulator#snapshot}. *
* *

Static convenience methods called {@code meanOf} are also provided for users who wish to * calculate only the mean. * *

Java 8 users: If you are not using any of the variance statistics, you may wish to use * built-in JDK libraries instead of this class. * * @author Pete Gillin * @author Kevin Bourrillion * @since 20.0 */ @Beta @GwtIncompatible @ElementTypesAreNonnullByDefault public final class Stats implements Serializable { private final long count; private final double mean; private final double sumOfSquaresOfDeltas; private final double min; private final double max; /** * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. * *

To ensure that the created instance obeys its contract, the parameters should satisfy the * following constraints. This is the callers responsibility and is not enforced here. * *

    *
  • If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to * get multiplied by 0 to calculate the sum), and the other parameters may have any values * (they will not be used). *
  • If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link * Double#NaN}. *
*/ Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { this.count = count; this.mean = mean; this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; this.min = min; this.max = max; } /** * Returns statistics over a dataset containing the given values. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) */ public static Stats of(Iterable values) { StatsAccumulator accumulator = new StatsAccumulator(); accumulator.addAll(values); return accumulator.snapshot(); } /** * Returns statistics over a dataset containing the given values. The iterator will be completely * consumed by this method. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) */ public static Stats of(Iterator values) { StatsAccumulator accumulator = new StatsAccumulator(); accumulator.addAll(values); return accumulator.snapshot(); } /** * Returns statistics over a dataset containing the given values. * * @param values a series of values */ public static Stats of(double... values) { StatsAccumulator acummulator = new StatsAccumulator(); acummulator.addAll(values); return acummulator.snapshot(); } /** * Returns statistics over a dataset containing the given values. * * @param values a series of values */ public static Stats of(int... values) { StatsAccumulator acummulator = new StatsAccumulator(); acummulator.addAll(values); return acummulator.snapshot(); } /** * Returns statistics over a dataset containing the given values. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) */ public static Stats of(long... values) { StatsAccumulator acummulator = new StatsAccumulator(); acummulator.addAll(values); return acummulator.snapshot(); } /** * Returns statistics over a dataset containing the given values. The stream will be completely * consumed by this method. * *

If you have a {@code Stream} rather than a {@code DoubleStream}, you should collect * the values using {@link #toStats()} instead. * * @param values a series of values * @since 28.2 */ public static Stats of(DoubleStream values) { return values .collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll) .snapshot(); } /** * Returns statistics over a dataset containing the given values. The stream will be completely * consumed by this method. * *

If you have a {@code Stream} rather than an {@code IntStream}, you should collect * the values using {@link #toStats()} instead. * * @param values a series of values * @since 28.2 */ public static Stats of(IntStream values) { return values .collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll) .snapshot(); } /** * Returns statistics over a dataset containing the given values. The stream will be completely * consumed by this method. * *

If you have a {@code Stream} rather than a {@code LongStream}, you should collect the * values using {@link #toStats()} instead. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) * @since 28.2 */ public static Stats of(LongStream values) { return values .collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll) .snapshot(); } /** * Returns a {@link Collector} which accumulates statistics from a {@link java.util.stream.Stream} * of any type of boxed {@link Number} into a {@link Stats}. Use by calling {@code * boxedNumericStream.collect(toStats())}. The numbers will be converted to {@code double} values * (which may cause loss of precision). * *

If you have any of the primitive streams {@code DoubleStream}, {@code IntStream}, or {@code * LongStream}, you should use the factory method {@link #of} instead. * * @since 28.2 */ public static Collector toStats() { return Collector.of( StatsAccumulator::new, (a, x) -> a.add(x.doubleValue()), (l, r) -> { l.addAll(r); return l; }, StatsAccumulator::snapshot, Collector.Characteristics.UNORDERED); } /** Returns the number of values. */ public long count() { return count; } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

If these values are a sample drawn from a population, this is also an unbiased estimator of * the arithmetic mean of the population. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. * *

If you only want to calculate the mean, use {@link #meanOf} instead of creating a {@link * Stats} instance. * * @throws IllegalStateException if the dataset is empty */ public double mean() { checkState(count != 0); return mean; } /** * Returns the sum of the values. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. */ public double sum() { return mean * count; } /** * Returns the population * variance of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. It * is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public double populationVariance() { checkState(count > 0); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } if (count == 1) { return 0.0; } return ensureNonNegative(sumOfSquaresOfDeltas) / count(); } /** * Returns the * population standard deviation of the values. The count must be non-zero. * *

This is guaranteed to return zero if the dataset contains only exactly one finite value. It * is not guaranteed to return zero when the dataset consists of the same value multiple times, * due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty */ public double populationStandardDeviation() { return Math.sqrt(populationVariance()); } /** * Returns the unbiased sample * variance of the values. If this dataset is a sample drawn from a population, this is an * unbiased estimator of the population variance of the population. The count must be greater than * one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public double sampleVariance() { checkState(count > 1); if (isNaN(sumOfSquaresOfDeltas)) { return NaN; } return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); } /** * Returns the * corrected sample standard deviation of the values. If this dataset is a sample drawn from a * population, this is an estimator of the population standard deviation of the population which * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on * the distribution). The count must be greater than one. * *

This is not guaranteed to return zero when the dataset consists of the same value multiple * times, due to numerical errors. However, it is guaranteed never to return a negative result. * *

Non-finite values

* *

If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. * * @throws IllegalStateException if the dataset is empty or contains a single value */ public double sampleStandardDeviation() { return Math.sqrt(sampleVariance()); } /** * Returns the lowest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values * only then the result is the lowest finite value. If it contains {@link * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double min() { checkState(count != 0); return min; } /** * Returns the highest value in the dataset. The count must be non-zero. * *

Non-finite values

* *

If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values * only then the result is the highest finite value. If it contains {@link * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. * * @throws IllegalStateException if the dataset is empty */ public double max() { checkState(count != 0); return max; } /** * {@inheritDoc} * *

Note: This tests exact equality of the calculated statistics, including the floating * point values. Two instances are guaranteed to be considered equal if one is copied from the * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any * values in between the two calls, or if one is obtained from the other after round-tripping * through java serialization. However, floating point rounding errors mean that it may be false * for some instances where the statistics are mathematically equal, including instances * constructed from the same values in a different order... or (in the general case) even in the * same order. (It is guaranteed to return true for instances constructed from the same values in * the same order if {@code strictfp} is in effect, or if the system architecture guarantees * {@code strictfp}-like semantics.) */ @Override public boolean equals(@CheckForNull Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } Stats other = (Stats) obj; return count == other.count && doubleToLongBits(mean) == doubleToLongBits(other.mean) && doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas) && doubleToLongBits(min) == doubleToLongBits(other.min) && doubleToLongBits(max) == doubleToLongBits(other.max); } /** * {@inheritDoc} * *

Note: This hash code is consistent with exact equality of the calculated statistics, * including the floating point values. See the note on {@link #equals} for details. */ @Override public int hashCode() { return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); } @Override public String toString() { if (count() > 0) { return MoreObjects.toStringHelper(this) .add("count", count) .add("mean", mean) .add("populationStandardDeviation", populationStandardDeviation()) .add("min", min) .add("max", max) .toString(); } else { return MoreObjects.toStringHelper(this).add("count", count).toString(); } } double sumOfSquaresOfDeltas() { return sumOfSquaresOfDeltas; } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

The definition of the mean is the same as {@link Stats#mean}. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) * @throws IllegalArgumentException if the dataset is empty */ public static double meanOf(Iterable values) { return meanOf(values.iterator()); } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

The definition of the mean is the same as {@link Stats#mean}. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision) * @throws IllegalArgumentException if the dataset is empty */ public static double meanOf(Iterator values) { checkArgument(values.hasNext()); long count = 1; double mean = values.next().doubleValue(); while (values.hasNext()) { double value = values.next().doubleValue(); count++; if (isFinite(value) && isFinite(mean)) { // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) mean += (value - mean) / count; } else { mean = calculateNewMeanNonFinite(mean, value); } } return mean; } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

The definition of the mean is the same as {@link Stats#mean}. * * @param values a series of values * @throws IllegalArgumentException if the dataset is empty */ public static double meanOf(double... values) { checkArgument(values.length > 0); double mean = values[0]; for (int index = 1; index < values.length; index++) { double value = values[index]; if (isFinite(value) && isFinite(mean)) { // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) mean += (value - mean) / (index + 1); } else { mean = calculateNewMeanNonFinite(mean, value); } } return mean; } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

The definition of the mean is the same as {@link Stats#mean}. * * @param values a series of values * @throws IllegalArgumentException if the dataset is empty */ public static double meanOf(int... values) { checkArgument(values.length > 0); double mean = values[0]; for (int index = 1; index < values.length; index++) { double value = values[index]; if (isFinite(value) && isFinite(mean)) { // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) mean += (value - mean) / (index + 1); } else { mean = calculateNewMeanNonFinite(mean, value); } } return mean; } /** * Returns the arithmetic mean of the * values. The count must be non-zero. * *

The definition of the mean is the same as {@link Stats#mean}. * * @param values a series of values, which will be converted to {@code double} values (this may * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) * @throws IllegalArgumentException if the dataset is empty */ public static double meanOf(long... values) { checkArgument(values.length > 0); double mean = values[0]; for (int index = 1; index < values.length; index++) { double value = values[index]; if (isFinite(value) && isFinite(mean)) { // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) mean += (value - mean) / (index + 1); } else { mean = calculateNewMeanNonFinite(mean, value); } } return mean; } // Serialization helpers /** The size of byte array representation in bytes. */ static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; /** * Gets a byte array representation of this instance. * *

Note: No guarantees are made regarding stability of the representation between * versions. */ public byte[] toByteArray() { ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); writeTo(buff); return buff.array(); } /** * Writes to the given {@link ByteBuffer} a byte representation of this instance. * *

Note: No guarantees are made regarding stability of the representation between * versions. * * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance * is written. In the process increases the position of {@link ByteBuffer} by BYTES. */ void writeTo(ByteBuffer buffer) { checkNotNull(buffer); checkArgument( buffer.remaining() >= BYTES, "Expected at least Stats.BYTES = %s remaining , got %s", BYTES, buffer.remaining()); buffer .putLong(count) .putDouble(mean) .putDouble(sumOfSquaresOfDeltas) .putDouble(min) .putDouble(max); } /** * Creates a Stats instance from the given byte representation which was obtained by {@link * #toByteArray}. * *

Note: No guarantees are made regarding stability of the representation between * versions. */ public static Stats fromByteArray(byte[] byteArray) { checkNotNull(byteArray); checkArgument( byteArray.length == BYTES, "Expected Stats.BYTES = %s remaining , got %s", BYTES, byteArray.length); return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); } /** * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. * *

Note: No guarantees are made regarding stability of the representation between * versions. * * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. */ static Stats readFrom(ByteBuffer buffer) { checkNotNull(buffer); checkArgument( buffer.remaining() >= BYTES, "Expected at least Stats.BYTES = %s remaining , got %s", BYTES, buffer.remaining()); return new Stats( buffer.getLong(), buffer.getDouble(), buffer.getDouble(), buffer.getDouble(), buffer.getDouble()); } private static final long serialVersionUID = 0; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy