All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.djutils.stats.summarizers.Tally Maven / Gradle / Ivy

The newest version!
package org.djutils.stats.summarizers;

import org.djutils.exceptions.Throw;
import org.djutils.stats.ConfidenceInterval;
import org.djutils.stats.DistNormalTable;
import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;

/**
 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
 * 

* Copyright (c) 2002-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See * for project information https://simulation.tudelft.nl. The DSOL * project is distributed under a three-clause BSD-style license, which can be found at * * https://simulation.tudelft.nl/dsol/3.0/license.html.
* @author Alexander Verbraeck * @author Peter Jacobs * @author Peter Knoppers */ public class Tally implements TallyStatistic { /** */ private static final long serialVersionUID = 20200228L; /** The sum of this tally. */ private double sum; /** The mean of this tally. */ private double m1; /** The summation for the second moment (variance). */ private double m2; /** The summation for the third moment (skewness). */ private double m3; /** The summation for the fourth moment (kurtosis). */ private double m4; /** The minimum observed value of this tally. */ private double min; /** The maximum observed value of this tally. */ private double max; /** The number of measurements of this tally. */ private long n; /** The description of this tally. */ private final String description; /** The quantile accumulator. */ private final QuantileAccumulator quantileAccumulator; /** the synchronized lock. */ @SuppressWarnings("checkstyle:visibilitymodifier") protected Object semaphore = new Object(); /** * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles. * @param description String; the description of this tally */ public Tally(final String description) { this(description, new NoStorageAccumulator()); } /** * Constructs a new Tally. * @param description String; the description of this tally * @param quantileAccumulator QuantileAccumulator; the input series accumulator that can approximate or compute quantiles. */ public Tally(final String description, final QuantileAccumulator quantileAccumulator) { Throw.whenNull(description, "description cannot be null"); Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null"); this.description = description; this.quantileAccumulator = quantileAccumulator; initialize(); } /** {@inheritDoc} */ @Override public void initialize() { synchronized (this.semaphore) { this.min = Double.NaN; this.max = Double.NaN; this.n = 0; this.sum = 0.0; this.m1 = 0.0; this.m2 = 0.0; this.m3 = 0; this.m4 = 0; this.quantileAccumulator.initialize(); } } /** * Ingest an array of values. * @param values double...; the values to register */ public void register(final double... values) { for (double value : values) { register(value); } } /** * Process one observed value. * @param value double; the value to process * @return double; the value */ public double register(final double value) { Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN"); synchronized (this.semaphore) { if (this.n == 0) { this.min = Double.MAX_VALUE; this.max = -Double.MAX_VALUE; } this.n++; double delta = value - this.m1; double oldm2 = this.m2; double oldm3 = this.m3; // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf this.m1 += delta / this.n; // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf this.m2 += delta * (value - this.m1); // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n; // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1) * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n; this.sum += value; if (value < this.min) { this.min = value; } if (value > this.max) { this.max = value; } this.quantileAccumulator.register(value); } return value; } /** {@inheritDoc} */ @Override public String getDescription() { return this.description; } /** {@inheritDoc} */ @Override public double getMax() { return this.max; } /** {@inheritDoc} */ @Override public double getMin() { return this.min; } /** {@inheritDoc} */ @Override public long getN() { return this.n; } /** * Return the sum of the values of the observations. * @return double; the sum of the values of the observations */ public double getSum() { return this.sum; } /** * Returns the sample mean of all observations since the initialization. * @return double; the sample mean */ public double getSampleMean() { if (this.n > 0) { return this.m1; } return Double.NaN; } /** * Returns the population mean of all observations since the initialization. * @return double; the population mean */ public double getPopulationMean() { return getSampleMean(); } /** * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample * standard deviation is defined as the square root of the sample variance. * @return double; the sample standard deviation */ public double getSampleStDev() { synchronized (this.semaphore) { if (this.n > 1) { return Math.sqrt(getSampleVariance()); } return Double.NaN; } } /** * Returns the current (biased) population standard deviation of all observations since the initialization. The population * standard deviation is defined as the square root of the population variance. * @return double; the population standard deviation */ public double getPopulationStDev() { synchronized (this.semaphore) { return Math.sqrt(getPopulationVariance()); } } /** * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the * sample variance in relation to the population variance is undisputed. The formula is:
*   S2 = (1 / (n - 1)) * [ Σx2 - (Σx)2 / n ]
* which can be calculated on the basis of the calculated population variance σ2 as follows:
*   S2 = σ2 * n / (n - 1)
* @return double; the current sample variance of this tally */ public double getSampleVariance() { synchronized (this.semaphore) { if (this.n > 1) { return this.m2 / (this.n - 1); } return Double.NaN; } } /** * Returns the current (biased) population variance of all observations since the initialization. The population variance is * defined as:
* σ2 = (1 / n) * [ Σx2 - (Σx)2 / n ] * @return double; the current population variance of this tally */ public double getPopulationVariance() { synchronized (this.semaphore) { if (this.n > 0) { return this.m2 / this.n; } return Double.NaN; } } /** * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:
*   Skewunbiased = Skewbiased [ ( n - 1) / n ] 3/2
* whereas SAS, SPSS and Excel calculate it as:
*   Skewunbiased = Skewbiased √[ n ( n - 1)] / (n - 2)
* Here we follow the last mentioned formula. All formulas converge to the same value with larger n. * @return double; the sample skewness of the registered data */ public double getSampleSkewness() { if (this.n > 2) { return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2); } return Double.NaN; } /** * Return the (biased) population skewness of the registered data. The population skewness is defined as:
*   Skewbiased = [ Σ ( x - μ ) 3 ] / [ n . S3 ]
* where S2 is the sample variance. So the denominator is equal to [ n . * sample_var3/2 ] . * @return double; the skewness of the registered data */ public double getPopulationSkewness() { if (this.n > 1) { return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5); } return Double.NaN; } /** * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the * following formula:
*   Kurtunbiased = [ Σ ( x - μ ) 4 ] / [ ( n - 1 ) . S4 ]
* where S2 is the sample variance. So the denominator is equal to [ ( n - 1 ) . * sample_var2 ] . * @return double; the sample kurtosis of the registered data */ public double getSampleKurtosis() { if (this.n > 3) { double sVar = getSampleVariance(); return this.m4 / (this.n - 1) / sVar / sVar; } return Double.NaN; } /** * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:
*   Kurtbiased = [ Σ ( x - μ ) 4 ] / [ n . σ4 ]
* where σ2 is the population variance. So the denominator is equal to [ n . * pop_var2 ] . * @return double; the population kurtosis of the registered data */ public double getPopulationKurtosis() { if (this.n > 2) { return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n); } return Double.NaN; } /** * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we * use:
*   ExcessKurtunbiased = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) * * ExcessKurtbiased + 6]
* This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel. * @return double; the sample excess kurtosis of the registered data */ public double getSampleExcessKurtosis() { if (this.n > 3) { double g2 = getPopulationExcessKurtosis(); return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0); } return Double.NaN; } /** * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution. * @return double; the population excess kurtosis of the registered data */ public double getPopulationExcessKurtosis() { if (this.n > 2) { // convert kurtosis to excess kurtosis, shift by -3 return getPopulationKurtosis() - 3.0; } return Double.NaN; } /** * Compute the quantile for the given probability. * @param probability double; the probability for which the quantile is to be computed. The value should be between 0 and 1, * inclusive. * @return double; the quantile for the probability * @throws IllegalArgumentException when the probability is less than 0 or larger than 1 */ public double getQuantile(final double probability) { return this.quantileAccumulator.getQuantile(this, probability); } /** * Get, or estimate fraction of registered values between -infinity up to and including a given quantile. * @param quantile double; the given quantile * @return double; the estimated or observed fraction of registered values between -infinity up to and including the given * quantile. When this TallyInterface has registered zero values; this method shall return NaN. * @throws IllegalArgumentException when quantile is NaN */ public double getCumulativeProbability(final double quantile) { return this.quantileAccumulator.getCumulativeProbability(this, quantile); } /** * returns the confidence interval on either side of the mean. * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. * @return double[]; the confidence interval of this tally * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 */ public double[] getConfidenceInterval(final double alpha) { return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE); } /** * returns the confidence interval based of the mean. * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. * @param side ConfidenceInterval; the side of the confidence interval with respect to the mean * @return double[]; the confidence interval of this tally * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 * @throws NullPointerException when side is null */ public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side) { Throw.whenNull(side, "type of confidence level cannot be null"); Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class, "confidenceLevel should be between 0 and 1 (inclusive)"); synchronized (this.semaphore) { double sampleMean = getSampleMean(); if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN()) { return null; // TODO throw something } double level = 1 - alpha; if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE)) { level = 1 - alpha / 2.0; } double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level); double confidence = z * Math.sqrt(this.getSampleVariance() / this.n); double[] result = {sampleMean - confidence, sampleMean + confidence}; if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE)) { result[1] = sampleMean; } if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE)) { result[0] = sampleMean; } result[0] = Math.max(result[0], this.min); result[1] = Math.min(result[1], this.max); return result; } } /** {@inheritDoc} */ @Override public String toString() { return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description + ", quantileAccumulator=" + this.quantileAccumulator + "]"; } /** * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics. * @return String; header for the textual table. */ public static String reportHeader() { return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name", "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113); } /** {@inheritDoc} */ @Override public String reportLine() { return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(), formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10), formatFixed(getMax(), 10)); } /** * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics. * @return String; footer for the textual table */ public static String reportFooter() { return "-".repeat(113); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy