All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math4.stat.descriptive.AggregateSummaryStatistics Maven / Gradle / Ivy

Go to download

Statistical sampling library for use in virtdata libraries, based on apache commons math 4

There is a newer version: 5.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math4.stat.descriptive;

import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;

import org.apache.commons.math4.exception.NullArgumentException;

/**
 * 

* An aggregator for {@code SummaryStatistics} from several data sets or * data set partitions. In its simplest usage mode, the client creates an * instance via the zero-argument constructor, then uses * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} * for each individual data set / partition. The per-set statistics objects * are used as normal, and at any time the aggregate statistics for all the * contributors can be obtained from this object. *

* Clients with specialized requirements can use alternative constructors to * control the statistics implementations and initial values used by the * contributing and the internal aggregate {@code SummaryStatistics} objects. *

* A static {@link #aggregate(Collection)} method is also included that computes * aggregate statistics directly from a Collection of SummaryStatistics instances. *

* When {@link #createContributingStatistics()} is used to create SummaryStatistics * instances to be aggregated concurrently, the created instances' * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating * instance maintained by this class. In multithreaded environments, if the functionality * provided by {@link #aggregate(Collection)} is adequate, that method should be used * to avoid unnecessary computation and synchronization delays.

* * @since 2.0 * */ public class AggregateSummaryStatistics implements StatisticalSummary, Serializable { /** Serializable version identifier */ private static final long serialVersionUID = -8207112444016386906L; /** * A SummaryStatistics serving as a prototype for creating SummaryStatistics * contributing to this aggregate */ private final SummaryStatistics statisticsPrototype; /** * The SummaryStatistics in which aggregate statistics are accumulated. */ private final SummaryStatistics statistics; /** * Initializes a new AggregateSummaryStatistics with default statistics * implementations. * */ public AggregateSummaryStatistics() { // No try-catch or throws NAE because arg is guaranteed non-null this(new SummaryStatistics()); } /** * Initializes a new AggregateSummaryStatistics with the specified statistics * object as a prototype for contributing statistics and for the internal * aggregate statistics. This provides for customized statistics implementations * to be used by contributing and aggregate statistics. * * @param prototypeStatistics a {@code SummaryStatistics} serving as a * prototype both for the internal aggregate statistics and for * contributing statistics obtained via the * {@code createContributingStatistics()} method. Being a prototype * means that other objects are initialized by copying this object's state. * If {@code null}, a new, default statistics object is used. Any statistic * values in the prototype are propagated to contributing statistics * objects and (once) into these aggregate statistics. * @throws NullArgumentException if prototypeStatistics is null * @see #createContributingStatistics() */ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException { this(prototypeStatistics, prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); } /** * Initializes a new AggregateSummaryStatistics with the specified statistics * object as a prototype for contributing statistics and for the internal * aggregate statistics. This provides for different statistics implementations * to be used by contributing and aggregate statistics and for an initial * state to be supplied for the aggregate statistics. * * @param prototypeStatistics a {@code SummaryStatistics} serving as a * prototype both for the internal aggregate statistics and for * contributing statistics obtained via the * {@code createContributingStatistics()} method. Being a prototype * means that other objects are initialized by copying this object's state. * If {@code null}, a new, default statistics object is used. Any statistic * values in the prototype are propagated to contributing statistics * objects, but not into these aggregate statistics. * @param initialStatistics a {@code SummaryStatistics} to serve as the * internal aggregate statistics object. If {@code null}, a new, default * statistics object is used. * @see #createContributingStatistics() */ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, SummaryStatistics initialStatistics) { this.statisticsPrototype = (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; this.statistics = (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; } /** * {@inheritDoc}. This version returns the maximum over all the aggregated * data. * * @see StatisticalSummary#getMax() */ @Override public double getMax() { synchronized (statistics) { return statistics.getMax(); } } /** * {@inheritDoc}. This version returns the mean of all the aggregated data. * * @see StatisticalSummary#getMean() */ @Override public double getMean() { synchronized (statistics) { return statistics.getMean(); } } /** * {@inheritDoc}. This version returns the minimum over all the aggregated * data. * * @see StatisticalSummary#getMin() */ @Override public double getMin() { synchronized (statistics) { return statistics.getMin(); } } /** * {@inheritDoc}. This version returns a count of all the aggregated data. * * @see StatisticalSummary#getN() */ @Override public long getN() { synchronized (statistics) { return statistics.getN(); } } /** * {@inheritDoc}. This version returns the standard deviation of all the * aggregated data. * * @see StatisticalSummary#getStandardDeviation() */ @Override public double getStandardDeviation() { synchronized (statistics) { return statistics.getStandardDeviation(); } } /** * {@inheritDoc}. This version returns a sum of all the aggregated data. * * @see StatisticalSummary#getSum() */ @Override public double getSum() { synchronized (statistics) { return statistics.getSum(); } } /** * {@inheritDoc}. This version returns the variance of all the aggregated * data. * * @see StatisticalSummary#getVariance() */ @Override public double getVariance() { synchronized (statistics) { return statistics.getVariance(); } } /** * Returns the sum of the logs of all the aggregated data. * * @return the sum of logs * @see SummaryStatistics#getSumOfLogs() */ public double getSumOfLogs() { synchronized (statistics) { return statistics.getSumOfLogs(); } } /** * Returns the geometric mean of all the aggregated data. * * @return the geometric mean * @see SummaryStatistics#getGeometricMean() */ public double getGeometricMean() { synchronized (statistics) { return statistics.getGeometricMean(); } } /** * Returns the sum of the squares of all the aggregated data. * * @return The sum of squares * @see SummaryStatistics#getSumsq() */ public double getSumsq() { synchronized (statistics) { return statistics.getSumsq(); } } /** * Returns a statistic related to the Second Central Moment. Specifically, * what is returned is the sum of squared deviations from the sample mean * among the all of the aggregated data. * * @return second central moment statistic * @see SummaryStatistics#getSecondMoment() */ public double getSecondMoment() { synchronized (statistics) { return statistics.getSecondMoment(); } } /** * Return a {@link StatisticalSummaryValues} instance reporting current * aggregate statistics. * * @return Current values of aggregate statistics */ public StatisticalSummary getSummary() { synchronized (statistics) { return new StatisticalSummaryValues(getMean(), getVariance(), getN(), getMax(), getMin(), getSum()); } } /** * Creates and returns a {@code SummaryStatistics} whose data will be * aggregated with those of this {@code AggregateSummaryStatistics}. * * @return a {@code SummaryStatistics} whose data will be aggregated with * those of this {@code AggregateSummaryStatistics}. The initial state * is a copy of the configured prototype statistics. */ public SummaryStatistics createContributingStatistics() { SummaryStatistics contributingStatistics = new AggregatingSummaryStatistics(statistics); // No try - catch or advertising NAE because neither argument will ever be null SummaryStatistics.copy(statisticsPrototype, contributingStatistics); return contributingStatistics; } /** * Computes aggregate summary statistics. This method can be used to combine statistics * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned * should contain the same values that would have been obtained by computing a single * StatisticalSummary over the combined dataset. *

* Returns null if the collection is empty or null. *

* * @param statistics collection of SummaryStatistics to aggregate * @return summary statistics for the combined dataset */ public static StatisticalSummaryValues aggregate(Collection statistics) { if (statistics == null) { return null; } Iterator iterator = statistics.iterator(); if (!iterator.hasNext()) { return null; } StatisticalSummary current = iterator.next(); long n = current.getN(); double min = current.getMin(); double sum = current.getSum(); double max = current.getMax(); double var = current.getVariance(); double m2 = var * (n - 1d); double mean = current.getMean(); while (iterator.hasNext()) { current = iterator.next(); if (current.getMin() < min || Double.isNaN(min)) { min = current.getMin(); } if (current.getMax() > max || Double.isNaN(max)) { max = current.getMax(); } sum += current.getSum(); final double oldN = n; final double curN = current.getN(); n += curN; final double meanDiff = current.getMean() - mean; mean = sum / n; final double curM2 = current.getVariance() * (curN - 1d); m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n; } final double variance; if (n == 0) { variance = Double.NaN; } else if (n == 1) { variance = 0d; } else { variance = m2 / (n - 1); } return new StatisticalSummaryValues(mean, variance, n, max, min, sum); } /** * A SummaryStatistics that also forwards all values added to it to a second * {@code SummaryStatistics} for aggregation. * * @since 2.0 */ private static class AggregatingSummaryStatistics extends SummaryStatistics { /** * The serialization version of this class */ private static final long serialVersionUID = 1L; /** * An additional SummaryStatistics into which values added to these * statistics (and possibly others) are aggregated */ private final SummaryStatistics aggregateStatistics; /** * Initializes a new AggregatingSummaryStatistics with the specified * aggregate statistics object * * @param aggregateStatistics a {@code SummaryStatistics} into which * values added to this statistics object should be aggregated */ AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { this.aggregateStatistics = aggregateStatistics; } /** * {@inheritDoc}. This version adds the provided value to the configured * aggregate after adding it to these statistics. * * @see SummaryStatistics#addValue(double) */ @Override public void addValue(double value) { super.addValue(value); synchronized (aggregateStatistics) { aggregateStatistics.addValue(value); } } /** * Returns true iff object is a * SummaryStatistics instance and all statistics have the * same values as this. * @param object the object to test equality against. * @return true if object equals this */ @Override public boolean equals(Object object) { if (object == this) { return true; } if (object instanceof AggregatingSummaryStatistics == false) { return false; } AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; return super.equals(stat) && aggregateStatistics.equals(stat.aggregateStatistics); } /** * Returns hash code based on values of statistics * @return hash code */ @Override public int hashCode() { return 123 + super.hashCode() + aggregateStatistics.hashCode(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy