org.apache.openejb.math.stat.descriptive.DescriptiveStatistics Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.openejb.math.stat.descriptive;
import org.apache.openejb.math.MathRuntimeException;
import org.apache.openejb.math.stat.descriptive.moment.GeometricMean;
import org.apache.openejb.math.stat.descriptive.moment.Kurtosis;
import org.apache.openejb.math.stat.descriptive.moment.Mean;
import org.apache.openejb.math.stat.descriptive.moment.Skewness;
import org.apache.openejb.math.stat.descriptive.moment.Variance;
import org.apache.openejb.math.stat.descriptive.rank.Max;
import org.apache.openejb.math.stat.descriptive.rank.Min;
import org.apache.openejb.math.stat.descriptive.rank.Percentile;
import org.apache.openejb.math.stat.descriptive.summary.Sum;
import org.apache.openejb.math.stat.descriptive.summary.SumOfSquares;
import org.apache.openejb.math.util.ResizableDoubleArray;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
/**
* Maintains a dataset of values of a single variable and computes descriptive
* statistics based on stored data. The {@link #getWindowSize() windowSize}
* property sets a limit on the number of values that can be stored in the
* dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
* the dataset. This value should be used with caution, as the backing store
* will grow without bound in this case. For very large datasets,
* {@link org.apache.commons.math.stat.descriptive.SummaryStatistics}, which does not store the dataset, should be used
* instead of this class. If windowSize
is not INFINITE_WINDOW and
* more values are added than can be stored in the dataset, new values are
* added in a "rolling" manner, with new values replacing the "oldest" values
* in the dataset.
*
* Note: this class is not threadsafe. Use
* {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
* threads is required.
*
* @version $Revision: 885278 $ $Date: 2009-11-29 13:47:51 -0800 (Sun, 29 Nov 2009) $
*/
public class DescriptiveStatistics implements StatisticalSummary, Serializable {
/**
* Represents an infinite window size. When the {@link #getWindowSize()}
* returns this value, there is no limit to the number of data values
* that can be stored in the dataset.
*/
public static final int INFINITE_WINDOW = -1;
/**
* Serialization UID
*/
private static final long serialVersionUID = 1233067267405273064L;
/**
* Name of the setQuantile method.
*/
private static final String SET_QUANTILE_METHOD_NAME = "setQuantile";
/**
* Message for unsupported setQuantile.
*/
private static final String UNSUPPORTED_METHOD_MESSAGE =
"percentile implementation {0} does not support {1}";
/**
* Message for illegal accesson setquantile.
*/
private static final String ILLEGAL_ACCESS_MESSAGE =
"cannot access {0} method in percentile implementation {1}";
/**
* hold the window size *
*/
protected int windowSize = INFINITE_WINDOW;
/**
* Stored data values
*/
protected ResizableDoubleArray eDA = new ResizableDoubleArray();
/**
* Mean statistic implementation - can be reset by setter.
*/
private UnivariateStatistic meanImpl = new Mean();
/**
* Geometric mean statistic implementation - can be reset by setter.
*/
private UnivariateStatistic geometricMeanImpl = new GeometricMean();
/**
* Kurtosis statistic implementation - can be reset by setter.
*/
private UnivariateStatistic kurtosisImpl = new Kurtosis();
/**
* Maximum statistic implementation - can be reset by setter.
*/
private UnivariateStatistic maxImpl = new Max();
/**
* Minimum statistic implementation - can be reset by setter.
*/
private UnivariateStatistic minImpl = new Min();
/**
* Percentile statistic implementation - can be reset by setter.
*/
private UnivariateStatistic percentileImpl = new Percentile();
/**
* Skewness statistic implementation - can be reset by setter.
*/
private UnivariateStatistic skewnessImpl = new Skewness();
/**
* Variance statistic implementation - can be reset by setter.
*/
private UnivariateStatistic varianceImpl = new Variance();
/**
* Sum of squares statistic implementation - can be reset by setter.
*/
private UnivariateStatistic sumsqImpl = new SumOfSquares();
/**
* Sum statistic implementation - can be reset by setter.
*/
private UnivariateStatistic sumImpl = new Sum();
/**
* Construct a DescriptiveStatistics instance with an infinite window
*/
public DescriptiveStatistics() {
}
/**
* Construct a DescriptiveStatistics instance with the specified window
*
* @param window the window size.
*/
public DescriptiveStatistics(final int window) {
setWindowSize(window);
}
/**
* Copy constructor. Construct a new DescriptiveStatistics instance that
* is a copy of original.
*
* @param original DescriptiveStatistics instance to copy
*/
public DescriptiveStatistics(final DescriptiveStatistics original) {
copy(original, this);
}
/**
* Adds the value to the dataset. If the dataset is at the maximum size
* (i.e., the number of stored elements equals the currently configured
* windowSize), the first (oldest) element in the dataset is discarded
* to make room for the new value.
*
* @param v the value to be added
*/
public void addValue(final double v) {
if (windowSize != INFINITE_WINDOW) {
if (getN() == windowSize) {
eDA.addElementRolling(v);
} else if (getN() < windowSize) {
eDA.addElement(v);
}
} else {
eDA.addElement(v);
}
}
/**
* Removes the most recent value from the dataset.
*/
public void removeMostRecentValue() {
eDA.discardMostRecentElements(1);
}
/**
* Replaces the most recently stored value with the given value.
* There must be at least one element stored to call this method.
*
* @param v the value to replace the most recent stored value
* @return replaced value
*/
public double replaceMostRecentValue(final double v) {
return eDA.substituteMostRecentElement(v);
}
/**
* Returns the
* arithmetic mean of the available values
*
* @return The mean or Double.NaN if no values have been added.
*/
public double getMean() {
return apply(meanImpl);
}
/**
* Returns the
* geometric mean of the available values
*
* @return The geometricMean, Double.NaN if no values have been added,
* or if the product of the available values is less than or equal to 0.
*/
public double getGeometricMean() {
return apply(geometricMeanImpl);
}
/**
* Returns the variance of the available values.
*
* @return The variance, Double.NaN if no values have been added
* or 0.0 for a single value set.
*/
public double getVariance() {
return apply(varianceImpl);
}
/**
* Returns the standard deviation of the available values.
*
* @return The standard deviation, Double.NaN if no values have been added
* or 0.0 for a single value set.
*/
public double getStandardDeviation() {
double stdDev = Double.NaN;
if (getN() > 0) {
if (getN() > 1) {
stdDev = Math.sqrt(getVariance());
} else {
stdDev = 0.0;
}
}
return stdDev;
}
/**
* Returns the skewness of the available values. Skewness is a
* measure of the asymmetry of a given distribution.
*
* @return The skewness, Double.NaN if no values have been added
* or 0.0 for a value set <=2.
*/
public double getSkewness() {
return apply(skewnessImpl);
}
/**
* Returns the Kurtosis of the available values. Kurtosis is a
* measure of the "peakedness" of a distribution
*
* @return The kurtosis, Double.NaN if no values have been added, or 0.0
* for a value set <=3.
*/
public double getKurtosis() {
return apply(kurtosisImpl);
}
/**
* Returns the maximum of the available values
*
* @return The max or Double.NaN if no values have been added.
*/
public double getMax() {
return apply(maxImpl);
}
/**
* Returns the minimum of the available values
*
* @return The min or Double.NaN if no values have been added.
*/
public double getMin() {
return apply(minImpl);
}
/**
* Returns the number of available values
*
* @return The number of available values
*/
public long getN() {
return eDA.getNumElements();
}
/**
* Returns the sum of the values that have been added to Univariate.
*
* @return The sum or Double.NaN if no values have been added
*/
public double getSum() {
return apply(sumImpl);
}
/**
* Returns the sum of the squares of the available values.
*
* @return The sum of the squares or Double.NaN if no
* values have been added.
*/
public double getSumsq() {
return apply(sumsqImpl);
}
/**
* Resets all statistics and storage
*/
public void clear() {
eDA.clear();
}
/**
* Returns the maximum number of values that can be stored in the
* dataset, or INFINITE_WINDOW (-1) if there is no limit.
*
* @return The current window size or -1 if its Infinite.
*/
public int getWindowSize() {
return windowSize;
}
/**
* WindowSize controls the number of values which contribute
* to the reported statistics. For example, if
* windowSize is set to 3 and the values {1,2,3,4,5}
* have been added in that order
* then the available values are {3,4,5} and all
* reported statistics will be based on these values
*
* @param windowSize sets the size of the window.
*/
public void setWindowSize(final int windowSize) {
if (windowSize < 1) {
if (windowSize != INFINITE_WINDOW) {
throw MathRuntimeException.createIllegalArgumentException(
"window size must be positive ({0})", windowSize);
}
}
this.windowSize = windowSize;
// We need to check to see if we need to discard elements
// from the front of the array. If the windowSize is less than
// the current number of elements.
if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
eDA.discardFrontElements(eDA.getNumElements() - windowSize);
}
}
/**
* Returns the current set of values in an array of double primitives.
* The order of addition is preserved. The returned array is a fresh
* copy of the underlying data -- i.e., it is not a reference to the
* stored data.
*
* @return returns the current set of numbers in the order in which they
* were added to this set
*/
public double[] getValues() {
return eDA.getElements();
}
/**
* Returns the current set of values in an array of double primitives,
* sorted in ascending order. The returned array is a fresh
* copy of the underlying data -- i.e., it is not a reference to the
* stored data.
*
* @return returns the current set of
* numbers sorted in ascending order
*/
public double[] getSortedValues() {
final double[] sort = getValues();
Arrays.sort(sort);
return sort;
}
/**
* Returns the element at the specified index
*
* @param index The Index of the element
* @return return the element at the specified index
*/
public double getElement(final int index) {
return eDA.getElement(index);
}
/**
* Returns an estimate for the pth percentile of the stored values.
*
* The implementation provided here follows the first estimation procedure presented
* here.
*
* Preconditions:
* 0 < p ≤ 100
(otherwise an
* IllegalArgumentException
is thrown)
* - at least one value must be stored (returns
Double.NaN
*
otherwise)
*
*
* @param p the requested percentile (scaled from 0 - 100)
* @return An estimate for the pth percentile of the stored data
* @throws IllegalStateException if percentile implementation has been
* overridden and the supplied implementation does not support setQuantile
* values
*/
public double getPercentile(final double p) {
if (percentileImpl instanceof Percentile) {
((Percentile) percentileImpl).setQuantile(p);
} else {
try {
percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
new Class[]{Double.TYPE}).invoke(percentileImpl,
new Object[]{Double.valueOf(p)});
} catch (final NoSuchMethodException e1) { // Setter guard should prevent
throw MathRuntimeException.createIllegalArgumentException(
UNSUPPORTED_METHOD_MESSAGE,
percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
} catch (final IllegalAccessException e2) {
throw MathRuntimeException.createIllegalArgumentException(
ILLEGAL_ACCESS_MESSAGE,
SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
} catch (final InvocationTargetException e3) {
throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
}
}
return apply(percentileImpl);
}
/**
* Generates a text report displaying univariate statistics from values
* that have been added. Each statistic is displayed on a separate
* line.
*
* @return String with line feeds displaying statistics
*/
@Override
public String toString() {
final StringBuilder outBuffer = new StringBuilder();
final String endl = "\n";
outBuffer.append("DescriptiveStatistics:").append(endl);
outBuffer.append("n: ").append(getN()).append(endl);
outBuffer.append("min: ").append(getMin()).append(endl);
outBuffer.append("max: ").append(getMax()).append(endl);
outBuffer.append("mean: ").append(getMean()).append(endl);
outBuffer.append("std dev: ").append(getStandardDeviation())
.append(endl);
outBuffer.append("median: ").append(getPercentile(50)).append(endl);
outBuffer.append("skewness: ").append(getSkewness()).append(endl);
outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
return outBuffer.toString();
}
/**
* Apply the given statistic to the data associated with this set of statistics.
*
* @param stat the statistic to apply
* @return the computed value of the statistic.
*/
public double apply(final UnivariateStatistic stat) {
return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
}
// Implementation getters and setter
/**
* Returns the currently configured mean implementation.
*
* @return the UnivariateStatistic implementing the mean
* @since 1.2
*/
public synchronized UnivariateStatistic getMeanImpl() {
return meanImpl;
}
/**
* Sets the implementation for the mean.
*
* @param meanImpl the UnivariateStatistic instance to use
* for computing the mean
* @since 1.2
*/
public synchronized void setMeanImpl(final UnivariateStatistic meanImpl) {
this.meanImpl = meanImpl;
}
/**
* Returns the currently configured geometric mean implementation.
*
* @return the UnivariateStatistic implementing the geometric mean
* @since 1.2
*/
public synchronized UnivariateStatistic getGeometricMeanImpl() {
return geometricMeanImpl;
}
/**
* Sets the implementation for the gemoetric mean.
*
* @param geometricMeanImpl the UnivariateStatistic instance to use
* for computing the geometric mean
* @since 1.2
*/
public synchronized void setGeometricMeanImpl(
final UnivariateStatistic geometricMeanImpl) {
this.geometricMeanImpl = geometricMeanImpl;
}
/**
* Returns the currently configured kurtosis implementation.
*
* @return the UnivariateStatistic implementing the kurtosis
* @since 1.2
*/
public synchronized UnivariateStatistic getKurtosisImpl() {
return kurtosisImpl;
}
/**
* Sets the implementation for the kurtosis.
*
* @param kurtosisImpl the UnivariateStatistic instance to use
* for computing the kurtosis
* @since 1.2
*/
public synchronized void setKurtosisImpl(final UnivariateStatistic kurtosisImpl) {
this.kurtosisImpl = kurtosisImpl;
}
/**
* Returns the currently configured maximum implementation.
*
* @return the UnivariateStatistic implementing the maximum
* @since 1.2
*/
public synchronized UnivariateStatistic getMaxImpl() {
return maxImpl;
}
/**
* Sets the implementation for the maximum.
*
* @param maxImpl the UnivariateStatistic instance to use
* for computing the maximum
* @since 1.2
*/
public synchronized void setMaxImpl(final UnivariateStatistic maxImpl) {
this.maxImpl = maxImpl;
}
/**
* Returns the currently configured minimum implementation.
*
* @return the UnivariateStatistic implementing the minimum
* @since 1.2
*/
public synchronized UnivariateStatistic getMinImpl() {
return minImpl;
}
/**
* Sets the implementation for the minimum.
*
* @param minImpl the UnivariateStatistic instance to use
* for computing the minimum
* @since 1.2
*/
public synchronized void setMinImpl(final UnivariateStatistic minImpl) {
this.minImpl = minImpl;
}
/**
* Returns the currently configured percentile implementation.
*
* @return the UnivariateStatistic implementing the percentile
* @since 1.2
*/
public synchronized UnivariateStatistic getPercentileImpl() {
return percentileImpl;
}
/**
* Sets the implementation to be used by {@link #getPercentile(double)}.
* The supplied UnivariateStatistic
must provide a
* setQuantile(double)
method; otherwise
* IllegalArgumentException
is thrown.
*
* @param percentileImpl the percentileImpl to set
* @throws IllegalArgumentException if the supplied implementation does not
* provide a setQuantile
method
* @since 1.2
*/
public synchronized void setPercentileImpl(
final UnivariateStatistic percentileImpl) {
try {
percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
new Class[]{Double.TYPE}).invoke(percentileImpl,
new Object[]{Double.valueOf(50.0d)});
} catch (final NoSuchMethodException e1) {
throw MathRuntimeException.createIllegalArgumentException(
"percentile implementation {0} does not support setQuantile",
percentileImpl.getClass().getName());
} catch (final IllegalAccessException e2) {
throw MathRuntimeException.createIllegalArgumentException(
ILLEGAL_ACCESS_MESSAGE,
SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
} catch (final InvocationTargetException e3) {
throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
}
this.percentileImpl = percentileImpl;
}
/**
* Returns the currently configured skewness implementation.
*
* @return the UnivariateStatistic implementing the skewness
* @since 1.2
*/
public synchronized UnivariateStatistic getSkewnessImpl() {
return skewnessImpl;
}
/**
* Sets the implementation for the skewness.
*
* @param skewnessImpl the UnivariateStatistic instance to use
* for computing the skewness
* @since 1.2
*/
public synchronized void setSkewnessImpl(
final UnivariateStatistic skewnessImpl) {
this.skewnessImpl = skewnessImpl;
}
/**
* Returns the currently configured variance implementation.
*
* @return the UnivariateStatistic implementing the variance
* @since 1.2
*/
public synchronized UnivariateStatistic getVarianceImpl() {
return varianceImpl;
}
/**
* Sets the implementation for the variance.
*
* @param varianceImpl the UnivariateStatistic instance to use
* for computing the variance
* @since 1.2
*/
public synchronized void setVarianceImpl(
final UnivariateStatistic varianceImpl) {
this.varianceImpl = varianceImpl;
}
/**
* Returns the currently configured sum of squares implementation.
*
* @return the UnivariateStatistic implementing the sum of squares
* @since 1.2
*/
public synchronized UnivariateStatistic getSumsqImpl() {
return sumsqImpl;
}
/**
* Sets the implementation for the sum of squares.
*
* @param sumsqImpl the UnivariateStatistic instance to use
* for computing the sum of squares
* @since 1.2
*/
public synchronized void setSumsqImpl(final UnivariateStatistic sumsqImpl) {
this.sumsqImpl = sumsqImpl;
}
/**
* Returns the currently configured sum implementation.
*
* @return the UnivariateStatistic implementing the sum
* @since 1.2
*/
public synchronized UnivariateStatistic getSumImpl() {
return sumImpl;
}
/**
* Sets the implementation for the sum.
*
* @param sumImpl the UnivariateStatistic instance to use
* for computing the sum
* @since 1.2
*/
public synchronized void setSumImpl(final UnivariateStatistic sumImpl) {
this.sumImpl = sumImpl;
}
/**
* Returns a copy of this DescriptiveStatistics instance with the same internal state.
*
* @return a copy of this
*/
public DescriptiveStatistics copy() {
final DescriptiveStatistics result = new DescriptiveStatistics();
copy(this, result);
return result;
}
/**
* Copies source to dest.
* Neither source nor dest can be null.
*
* @param source DescriptiveStatistics to copy
* @param dest DescriptiveStatistics to copy to
* @throws NullPointerException if either source or dest is null
*/
public static void copy(final DescriptiveStatistics source, final DescriptiveStatistics dest) {
// Copy data and window size
dest.eDA = source.eDA.copy();
dest.windowSize = source.windowSize;
// Copy implementations
dest.maxImpl = source.maxImpl.copy();
dest.meanImpl = source.meanImpl.copy();
dest.minImpl = source.minImpl.copy();
dest.sumImpl = source.sumImpl.copy();
dest.varianceImpl = source.varianceImpl.copy();
dest.sumsqImpl = source.sumsqImpl.copy();
dest.geometricMeanImpl = source.geometricMeanImpl.copy();
dest.kurtosisImpl = source.kurtosisImpl;
dest.skewnessImpl = source.skewnessImpl;
dest.percentileImpl = source.percentileImpl;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy