All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.statistics.descriptive.FirstMoment Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.statistics.descriptive;

import java.util.function.DoubleConsumer;

/**
 * Computes the first moment (arithmetic mean) using the definitional formula:
 *
 * 
mean = sum(x_i) / n
* *

To limit numeric errors, the value of the statistic is computed using the * following recursive updating algorithm: *

    *
  1. Initialize {@code m = } the first value
  2. *
  3. For each additional value, update using
    * {@code m = m + (new value - m) / (number of observations)}
  4. *
* *

Returns {@code NaN} if the dataset is empty. Note that * {@code NaN} may also be returned if the input includes {@code NaN} and / or infinite * values of opposite sign. * *

Supports up to 263 (exclusive) observations. * This implementation does not check for overflow of the count. * *

Note that this implementation is not synchronized. If * multiple threads access an instance of this class concurrently, and at least * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. * *

However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} * and {@link StatisticAccumulator#combine(StatisticResult) combine} * as {@code accumulator} and {@code combiner} functions of * {@link java.util.stream.Collector Collector} on a parallel stream, * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. * *

References: *

    *
  • Chan, Golub, Levesque (1983) * Algorithms for Computing the Sample Variance. * American Statistician, vol. 37, no. 3, pp. 242-247. *
  • Ling (1974) * Comparison of Several Algorithms for Computing Sample Means and Variances. * Journal of the American Statistical Association, Vol. 69, No. 348, pp. 859-866. *
* * @since 1.1 */ class FirstMoment implements DoubleConsumer { /** The downscale constant. Used to avoid overflow for all finite input. */ private static final double DOWNSCALE = 0.5; /** The rescale constant. */ private static final double RESCALE = 2; /** Count of values that have been added. */ protected long n; /** * Half the deviation of most recently added value from the previous first moment. * Retained to prevent repeated computation in higher order moments. * *

Note: This is (x - m1) / 2. It is computed as a half value to prevent overflow * when computing for any finite value x and m. * *

This value is not used in the {@link #combine(FirstMoment)} method. */ protected double dev; /** * Half the deviation of most recently added value from the previous first moment, * normalized by current sample size. Retained to prevent repeated * computation in higher order moments. * *

Note: This is (x - m1) / 2n. It is computed as a half value to prevent overflow * when computing for any finite value x and m. * * Note: This value is not used in the {@link #combine(FirstMoment)} method. */ protected double nDev; /** First moment of values that have been added. * This is stored as a half value to prevent overflow for any finite input. * Benchmarks show this has negligible performance impact. */ private double m1; /** * Running sum of values seen so far. * This is not used in the computation of mean. Used as a return value for first moment when * it is non-finite. */ private double nonFiniteValue; /** * Create an instance. */ FirstMoment() { // No-op } /** * Copy constructor. * * @param source Source to copy. */ FirstMoment(FirstMoment source) { m1 = source.m1; n = source.n; nonFiniteValue = source.nonFiniteValue; } /** * Create an instance with the given first moment. * *

This constructor is used when creating the moment from a finite sum of values. * * @param m1 First moment. * @param n Count of values. */ FirstMoment(double m1, long n) { this.m1 = m1 * DOWNSCALE; this.n = n; } /** * Returns an instance populated using the input {@code values}. * *

Note: {@code FirstMoment} computed using {@link #accept} may be different from * this instance. * * @param values Values. * @return {@code FirstMoment} instance. */ static FirstMoment of(double... values) { if (values.length == 0) { return new FirstMoment(); } // In the typical use-case a sum of values will not overflow and // is faster than the rolling algorithm return create(org.apache.commons.numbers.core.Sum.of(values), values); } /** * Creates the first moment. * *

Uses the provided {@code sum} if finite; otherwise reverts to using the rolling moment * to protect from overflow and adds a second pass correction term. * *

This method is used by {@link DoubleStatistics} using a sum that can be reused * for the {@link Sum} statistic. * * @param sum Sum of the values. * @param values Values. * @return {@code FirstMoment} instance. */ static FirstMoment create(org.apache.commons.numbers.core.Sum sum, double[] values) { // Protect against empty values if (values.length == 0) { return new FirstMoment(); } final double s = sum.getAsDouble(); if (Double.isFinite(s)) { return new FirstMoment(s / values.length, values.length); } // "Corrected two-pass algorithm" // First pass final FirstMoment m1 = create(values); final double xbar = m1.getFirstMoment(); if (!Double.isFinite(xbar)) { return m1; } // Second pass double correction = 0; for (final double x : values) { correction += x - xbar; } // Note: Correction may be infinite if (Double.isFinite(correction)) { // Down scale the correction to the half representation m1.m1 += DOWNSCALE * correction / values.length; } return m1; } /** * Creates the first moment using a rolling algorithm. * *

This duplicates the algorithm in the {@link #accept(double)} method * with optimisations due to the processing of an entire array: *

    *
  • Avoid updating (unused) class level working variables. *
  • Only computing the non-finite value if required. *
* * @param values Values. * @return the first moment */ private static FirstMoment create(double[] values) { double m1 = 0; int n = 0; for (final double x : values) { // Downscale to avoid overflow for all finite input m1 += (x * DOWNSCALE - m1) / ++n; } final FirstMoment m = new FirstMoment(); m.n = n; // Note: m1 is already downscaled here m.m1 = m1; // The non-finite value is only relevant if the data contains inf/nan if (!Double.isFinite(m1 * RESCALE)) { m.nonFiniteValue = computeNonFiniteValue(values); } return m; } /** * Compute the result in the event of non-finite values. * * @param values Values. * @return the non-finite result */ private static double computeNonFiniteValue(double[] values) { double sum = 0; for (final double x : values) { // Scaling down values prevents overflow of finites. sum += x * Double.MIN_NORMAL; } return sum; } /** * Updates the state of the statistic to reflect the addition of {@code value}. * * @param value Value. */ @Override public void accept(double value) { // "Updating one-pass algorithm" // See: Chan et al (1983) Equation 1.3a // m_{i+1} = m_i + (x - m_i) / (i + 1) // This is modified with scaling to avoid overflow for all finite input. // Scaling the input down by a factor of two ensures that the scaling is lossless. // Sub-classes must alter their scaling factors when using the computed deviations. // Note: Maintain the correct non-finite result. // Scaling down values prevents overflow of finites. nonFiniteValue += value * Double.MIN_NORMAL; // Scale down the input dev = value * DOWNSCALE - m1; nDev = dev / ++n; m1 += nDev; } /** * Gets the first moment of all input values. * *

When no values have been added, the result is {@code NaN}. * * @return {@code First moment} of all values, if it is finite; * {@code +/-Infinity}, if infinities of the same sign have been encountered; * {@code NaN} otherwise. */ double getFirstMoment() { // Scale back to the original magnitude final double m = m1 * RESCALE; if (Double.isFinite(m)) { return n == 0 ? Double.NaN : m; } // A non-finite value must have been encountered, return nonFiniteValue which represents m1. return nonFiniteValue; } /** * Combines the state of another {@code FirstMoment} into this one. * * @param other Another {@code FirstMoment} to be combined. * @return {@code this} instance after combining {@code other}. */ FirstMoment combine(FirstMoment other) { nonFiniteValue += other.nonFiniteValue; final double mu1 = this.m1; final double mu2 = other.m1; final long n1 = n; final long n2 = other.n; n = n1 + n2; // Adjust the mean with the weighted difference: // m1 = m1 + (m2 - m1) * n2 / (n1 + n2) // The half-representation ensures the difference of means is at most MAX_VALUE // so the combine can avoid scaling. if (n1 == n2) { // Optimisation for equal sizes: m1 = (m1 + m2) / 2 m1 = (mu1 + mu2) * 0.5; } else { m1 = combine(mu1, mu2, n1, n2); } return this; } /** * Combine the moments. This method is used to enforce symmetry. It assumes that * the two sizes are not identical, and at least one size is non-zero. * * @param m1 Moment 1. * @param m2 Moment 2. * @param n1 Size of sample 1. * @param n2 Size of sample 2. * @return the combined first moment */ private static double combine(double m1, double m2, long n1, long n2) { // Note: If either size is zero the weighted difference is zero and // the other moment is unchanged. return n2 < n1 ? m1 + (m2 - m1) * ((double) n2 / (n1 + n2)) : m2 + (m1 - m2) * ((double) n1 / (n1 + n2)); } /** * Gets the difference of the first moment between {@code this} moment and the * {@code other} moment. This is provided for sub-classes. * * @param other Other moment. * @return the difference */ double getFirstMomentDifference(FirstMoment other) { // Scale back to the original magnitude return (m1 - other.m1) * RESCALE; } /** * Gets the half the difference of the first moment between {@code this} moment and * the {@code other} moment. This is provided for sub-classes. * * @param other Other moment. * @return the difference */ double getFirstMomentHalfDifference(FirstMoment other) { return m1 - other.m1; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy