org.hipparchus.stat.descriptive.DescriptiveStatistics Maven / Gradle / Ivy
Show all versions of hipparchus-stat Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This is not the original file distributed by the Apache Software Foundation
* It has been modified by the Hipparchus project
*/
package org.hipparchus.stat.descriptive;
import java.io.Serializable;
import java.util.Arrays;
import java.util.function.DoubleConsumer;
import org.hipparchus.exception.LocalizedCoreFormats;
import org.hipparchus.exception.MathIllegalArgumentException;
import org.hipparchus.exception.MathIllegalStateException;
import org.hipparchus.stat.descriptive.moment.GeometricMean;
import org.hipparchus.stat.descriptive.moment.Kurtosis;
import org.hipparchus.stat.descriptive.moment.Mean;
import org.hipparchus.stat.descriptive.moment.Skewness;
import org.hipparchus.stat.descriptive.moment.Variance;
import org.hipparchus.stat.descriptive.rank.Max;
import org.hipparchus.stat.descriptive.rank.Min;
import org.hipparchus.stat.descriptive.rank.Percentile;
import org.hipparchus.stat.descriptive.summary.Sum;
import org.hipparchus.stat.descriptive.summary.SumOfSquares;
import org.hipparchus.util.FastMath;
import org.hipparchus.util.MathUtils;
import org.hipparchus.util.ResizableDoubleArray;
/**
* Maintains a dataset of values of a single variable and computes descriptive
* statistics based on stored data.
*
* The {@link #getWindowSize() windowSize} property sets a limit on the number
* of values that can be stored in the dataset. The default value, INFINITE_WINDOW,
* puts no limit on the size of the dataset. This value should be used with
* caution, as the backing store will grow without bound in this case.
*
* For very large datasets, {@link StreamingStatistics}, which does not store
* the dataset, should be used instead of this class. If windowSize
* is not INFINITE_WINDOW and more values are added than can be stored in the
* dataset, new values are added in a "rolling" manner, with new values replacing
* the "oldest" values in the dataset.
*
* Note: this class is not threadsafe.
*/
public class DescriptiveStatistics
implements StatisticalSummary, DoubleConsumer, Serializable {
/**
* Represents an infinite window size. When the {@link #getWindowSize()}
* returns this value, there is no limit to the number of data values
* that can be stored in the dataset.
*/
protected static final int INFINITE_WINDOW = -1;
/** Serialization UID */
private static final long serialVersionUID = 20160411L;
/** The statistic used to calculate the population variance - fixed. */
private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false);
/** Maximum statistic implementation. */
private final UnivariateStatistic maxImpl;
/** Minimum statistic implementation. */
private final UnivariateStatistic minImpl;
/** Sum statistic implementation. */
private final UnivariateStatistic sumImpl;
/** Sum of squares statistic implementation. */
private final UnivariateStatistic sumOfSquaresImpl;
/** Mean statistic implementation. */
private final UnivariateStatistic meanImpl;
/** Variance statistic implementation. */
private final UnivariateStatistic varianceImpl;
/** Geometric mean statistic implementation. */
private final UnivariateStatistic geometricMeanImpl;
/** Kurtosis statistic implementation. */
private final UnivariateStatistic kurtosisImpl;
/** Skewness statistic implementation. */
private final UnivariateStatistic skewnessImpl;
/** Percentile statistic implementation. */
private final Percentile percentileImpl;
/** holds the window size. */
private int windowSize;
/** Stored data values. */
private final ResizableDoubleArray eDA;
/**
* Construct a DescriptiveStatistics instance with an infinite window.
*/
public DescriptiveStatistics() {
this(INFINITE_WINDOW);
}
/**
* Construct a DescriptiveStatistics instance with the specified window.
*
* @param size the window size.
* @throws MathIllegalArgumentException if window size is less than 1 but
* not equal to {@link #INFINITE_WINDOW}
*/
public DescriptiveStatistics(int size) throws MathIllegalArgumentException {
this(size, false, null);
}
/**
* Construct a DescriptiveStatistics instance with an infinite window
* and the initial data values in double[] initialDoubleArray.
*
* @param initialDoubleArray the initial double[].
* @throws org.hipparchus.exception.NullArgumentException if the input array is null
*/
public DescriptiveStatistics(double[] initialDoubleArray) {
this(INFINITE_WINDOW, true, initialDoubleArray);
}
/**
* Copy constructor.
*
* Construct a new DescriptiveStatistics instance that
* is a copy of original.
*
* @param original DescriptiveStatistics instance to copy
* @throws org.hipparchus.exception.NullArgumentException if original is null
*/
protected DescriptiveStatistics(DescriptiveStatistics original) {
MathUtils.checkNotNull(original);
// Copy data and window size
this.windowSize = original.windowSize;
this.eDA = original.eDA.copy();
// Copy implementations
this.maxImpl = original.maxImpl.copy();
this.minImpl = original.minImpl.copy();
this.meanImpl = original.meanImpl.copy();
this.sumImpl = original.sumImpl.copy();
this.sumOfSquaresImpl = original.sumOfSquaresImpl.copy();
this.varianceImpl = original.varianceImpl.copy();
this.geometricMeanImpl = original.geometricMeanImpl.copy();
this.kurtosisImpl = original.kurtosisImpl.copy();
this.skewnessImpl = original.skewnessImpl.copy();
this.percentileImpl = original.percentileImpl.copy();
}
/**
* Construct a DescriptiveStatistics instance with the specified window.
*
* @param windowSize the window size
* @param hasInitialValues if initial values have been provided
* @param initialValues the initial values
* @throws org.hipparchus.exception.NullArgumentException if initialValues is null
* @throws MathIllegalArgumentException if window size is less than 1 but
* not equal to {@link #INFINITE_WINDOW}
*/
DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) {
if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
throw new MathIllegalArgumentException(
LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
}
if (hasInitialValues) {
MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY);
}
this.windowSize = windowSize;
int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize;
this.eDA = hasInitialValues ?
new ResizableDoubleArray(initialValues) :
new ResizableDoubleArray(initialCapacity);
maxImpl = new Max();
minImpl = new Min();
sumImpl = new Sum();
sumOfSquaresImpl = new SumOfSquares();
meanImpl = new Mean();
varianceImpl = new Variance();
geometricMeanImpl = new GeometricMean();
kurtosisImpl = new Kurtosis();
skewnessImpl = new Skewness();
percentileImpl = new Percentile();
}
/**
* Returns a copy of this DescriptiveStatistics instance with the same internal state.
*
* @return a copy of this
*/
public DescriptiveStatistics copy() {
return new DescriptiveStatistics(this);
}
/**
* Adds the value to the dataset. If the dataset is at the maximum size
* (i.e., the number of stored elements equals the currently configured
* windowSize), the first (oldest) element in the dataset is discarded
* to make room for the new value.
*
* @param v the value to be added
*/
public void addValue(double v) {
if (windowSize != INFINITE_WINDOW) {
if (getN() == windowSize) {
eDA.addElementRolling(v);
} else if (getN() < windowSize) {
eDA.addElement(v);
}
} else {
eDA.addElement(v);
}
}
/** {@inheritDoc} */
@Override
public void accept(double v) {
addValue(v);
}
/**
* Resets all statistics and storage.
*/
public void clear() {
eDA.clear();
}
/**
* Removes the most recent value from the dataset.
*
* @throws MathIllegalStateException if there are no elements stored
*/
public void removeMostRecentValue() throws MathIllegalStateException {
try {
eDA.discardMostRecentElements(1);
} catch (MathIllegalArgumentException ex) {
throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA);
}
}
/**
* Replaces the most recently stored value with the given value.
* There must be at least one element stored to call this method.
*
* @param v the value to replace the most recent stored value
* @return replaced value
* @throws MathIllegalStateException if there are no elements stored
*/
public double replaceMostRecentValue(double v) throws MathIllegalStateException {
return eDA.substituteMostRecentElement(v);
}
/**
* Apply the given statistic to the data associated with this set of statistics.
* @param stat the statistic to apply
* @return the computed value of the statistic.
*/
public double apply(UnivariateStatistic stat) {
// No try-catch or advertised exception here because arguments
// are guaranteed valid.
return eDA.compute(stat);
}
/** {@inheritDoc} */
@Override
public double getMean() {
return apply(meanImpl);
}
/**
* Returns the geometric mean of the available values.
*
* See {@link GeometricMean} for details on the computing algorithm.
*
* @see
* Geometric mean
*
* @return The geometricMean, Double.NaN if no values have been added,
* or if any negative values have been added.
*/
public double getGeometricMean() {
return apply(geometricMeanImpl);
}
/**
* Returns the standard deviation of the available values.
* @return The standard deviation, Double.NaN if no values have been added
* or 0.0 for a single value set.
*/
@Override
public double getStandardDeviation() {
double stdDev = Double.NaN;
if (getN() > 0) {
if (getN() > 1) {
stdDev = FastMath.sqrt(getVariance());
} else {
stdDev = 0.0;
}
}
return stdDev;
}
/**
* Returns the quadratic mean of the available values.
*
* @see
* Root Mean Square
*
* @return The quadratic mean or {@code Double.NaN} if no values
* have been added.
*/
public double getQuadraticMean() {
final long n = getN();
return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN;
}
/** {@inheritDoc} */
@Override
public double getVariance() {
return apply(varianceImpl);
}
/**
* Returns the population variance of the available values.
*
* @see
* Population variance
*
* @return The population variance, Double.NaN if no values have been added,
* or 0.0 for a single value set.
*/
public double getPopulationVariance() {
return apply(POPULATION_VARIANCE);
}
/**
* Returns the skewness of the available values. Skewness is a
* measure of the asymmetry of a given distribution.
*
* @return The skewness, Double.NaN if less than 3 values have been added.
*/
public double getSkewness() {
return apply(skewnessImpl);
}
/**
* Returns the Kurtosis of the available values. Kurtosis is a
* measure of the "peakedness" of a distribution.
*
* @return The kurtosis, Double.NaN if less than 4 values have been added.
*/
public double getKurtosis() {
return apply(kurtosisImpl);
}
/** {@inheritDoc} */
@Override
public double getMax() {
return apply(maxImpl);
}
/** {@inheritDoc} */
@Override
public double getMin() {
return apply(minImpl);
}
/** {@inheritDoc} */
@Override
public double getSum() {
return apply(sumImpl);
}
/**
* Returns the sum of the squares of the available values.
* @return The sum of the squares or Double.NaN if no
* values have been added.
*/
public double getSumOfSquares() {
return apply(sumOfSquaresImpl);
}
/**
* Returns an estimate for the pth percentile of the stored values.
*
* The implementation provided here follows the first estimation procedure presented
* here.
*
* Preconditions:
* 0 < p ≤ 100
(otherwise an
* MathIllegalArgumentException
is thrown)
* - at least one value must be stored (returns
Double.NaN
*
otherwise)
*
*
* @param p the requested percentile (scaled from 0 - 100)
* @return An estimate for the pth percentile of the stored data
* @throws MathIllegalArgumentException if p is not a valid quantile
*/
public double getPercentile(final double p)
throws MathIllegalArgumentException {
percentileImpl.setQuantile(p);
return apply(percentileImpl);
}
/** {@inheritDoc} */
@Override
public long getN() {
return eDA.getNumElements();
}
/**
* Returns the maximum number of values that can be stored in the
* dataset, or INFINITE_WINDOW (-1) if there is no limit.
*
* @return The current window size or -1 if its Infinite.
*/
public int getWindowSize() {
return windowSize;
}
/**
* WindowSize controls the number of values that contribute to the
* reported statistics. For example, if windowSize is set to 3 and the
* values {1,2,3,4,5} have been added in that order then
* the available values are {3,4,5} and all reported statistics will
* be based on these values. If {@code windowSize} is decreased as a result
* of this call and there are more than the new value of elements in the
* current dataset, values from the front of the array are discarded to
* reduce the dataset to {@code windowSize} elements.
*
* @param windowSize sets the size of the window.
* @throws MathIllegalArgumentException if window size is less than 1 but
* not equal to {@link #INFINITE_WINDOW}
*/
public void setWindowSize(int windowSize)
throws MathIllegalArgumentException {
if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
throw new MathIllegalArgumentException(
LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
}
this.windowSize = windowSize;
// We need to check to see if we need to discard elements
// from the front of the array. If the windowSize is less than
// the current number of elements.
if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
eDA.discardFrontElements(eDA.getNumElements() - windowSize);
}
}
/**
* Returns the current set of values in an array of double primitives.
* The order of addition is preserved. The returned array is a fresh
* copy of the underlying data -- i.e., it is not a reference to the
* stored data.
*
* @return the current set of numbers in the order in which they
* were added to this set
*/
public double[] getValues() {
return eDA.getElements();
}
/**
* Returns the current set of values in an array of double primitives,
* sorted in ascending order. The returned array is a fresh
* copy of the underlying data -- i.e., it is not a reference to the
* stored data.
* @return returns the current set of
* numbers sorted in ascending order
*/
public double[] getSortedValues() {
double[] sort = getValues();
Arrays.sort(sort);
return sort;
}
/**
* Returns the element at the specified index
* @param index The Index of the element
* @return return the element at the specified index
*/
public double getElement(int index) {
return eDA.getElement(index);
}
/**
* Generates a text report displaying univariate statistics from values
* that have been added. Each statistic is displayed on a separate line.
*
* @return String with line feeds displaying statistics
*/
@Override
public String toString() {
final StringBuilder outBuffer = new StringBuilder(100);
final String endl = "\n";
outBuffer.append("DescriptiveStatistics:").append(endl).
append("n: ").append(getN()).append(endl).
append("min: ").append(getMin()).append(endl).
append("max: ").append(getMax()).append(endl).
append("mean: ").append(getMean()).append(endl).
append("std dev: ").append(getStandardDeviation()).append(endl);
try {
// No catch for MIAE because actual parameter is valid below
outBuffer.append("median: ").append(getPercentile(50)).append(endl);
} catch (MathIllegalStateException ex) {
outBuffer.append("median: unavailable").append(endl);
}
outBuffer.append("skewness: ").append(getSkewness()).append(endl).
append("kurtosis: ").append(getKurtosis()).append(endl);
return outBuffer.toString();
}
}