All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jaitools.numeric.StreamingSampleStats Maven / Gradle / Ivy

Go to download

Provides a single jar containing all JAITools modules which you can use instead of including individual modules in your project. Note: It does not include the Jiffle scripting language or Jiffle image operator.

There is a newer version: 1.4.0
Show newest version
/* 
 *  Copyright (c) 2009-2011, Michael Bedward. All rights reserved. 
 *   
 *  Redistribution and use in source and binary forms, with or without modification, 
 *  are permitted provided that the following conditions are met: 
 *   
 *  - Redistributions of source code must retain the above copyright notice, this  
 *    list of conditions and the following disclaimer. 
 *   
 *  - Redistributions in binary form must reproduce the above copyright notice, this 
 *    list of conditions and the following disclaimer in the documentation and/or 
 *    other materials provided with the distribution.   
 *   
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 *  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
 *  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 *  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */   
package org.jaitools.numeric;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.jaitools.CollectionFactory;


/**
 * A class to calculate summary statistics for a sample of Double-valued
 * buffers that is received as a (potentially long) stream of values rather
 * than in a single batch. Any Double.NaN values in the stream will be
 * ignored.
 * 

* Two options are offered to calculate sample median. Where it is known a priori * that the data stream can be accomodated in memory, the exact median can be * requested with Statistic.MEDIAN. Where the length of the data stream is unknown, * or known to be too large to be held in memory, an approximate median can be * calculated using the 'remedian' estimator as described in: *

* PJ Rousseeuw and GW Bassett (1990) * The remedian: a robust averaging method for large data sets. * Journal of the American Statistical Society 85:97-104 *
* This is requested with Statistic.APPROX_MEDIAN. *

* Note: the 'remedian' estimator performs badly with non-stationary data, e.g. a * data stream that is monotonically increasing will result in an estimate for the * median that is too high. If possible, it is best to de-trend or randomly order * the data prior to streaming it. *

* Example of use: *


 * StreamingSampleStats strmStats = new StreamingSampleStats();
 *
 * // set the statistics that will be calculated
 * Statistic[] stats = {
 *     Statistic.MEAN,
 *     Statistic.SDEV,
 *     Statistic.RANGE,
 *     Statistic.APPROX_MEDIAN
 * };
 * strmStats.setStatistics(stats);
 *
 * // some process that generates a long stream of data
 * while (somethingBigIsRunning) {
 *     double value = ...
 *     strmStats.offer(value);
 * }
 *
 * // report the results
 * for (Statistic s : stats) {
 *     System.out.println(String.format("%s: %.4f", s, strmStats.getStatisticValue(s)));
 * }
 *
 * 
* * @author Michael Bedward * @author Daniele Romagnoli, GeoSolutions S.A.S. * @since 1.0 * @version $Id$ */ public class StreamingSampleStats { private static final Logger LOGGER = Logger.getLogger("org.jaitools.numeric"); private ProcessorFactory factory = new ProcessorFactory(); private List processors; private List> ranges; private List> noDataRanges; private final Range.Type rangesType; /** * Creates a new sampler and sets the default range type to * {@link Range.Type#EXCLUDE}. */ public StreamingSampleStats() { this(Range.Type.EXCLUDE); } /** * Creates a new sampler with specified use of {@code Ranges}. * * @param rangesType either {@link Range.Type#INCLUDE} * or {@link Range.Type#EXCLUDE} */ public StreamingSampleStats(Range.Type rangesType) { processors = CollectionFactory.list(); ranges = CollectionFactory.list(); noDataRanges = CollectionFactory.list(); this.rangesType = rangesType; } /** * Adds a statistic to those calculated by this sampler. * If the same statistic was previously set then calling this method * has no effect. * * @param stat the statistic * @see Statistic */ public void setStatistic(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { p = factory.getForStatistic(stat); if (p == null) { LOGGER.log(Level.SEVERE, "Unsupported Statistic: {0}", stat); } else { processors.add(p); // apply cached excluded ranges to the new processor for (Range range : ranges) { p.addRange(range, rangesType); } for (Range nRange : noDataRanges) { p.addNoDataRange(nRange); } } } } /** * Adds the given statistics to those that will be calculated by this sampler. * * @param stats the statistics * * @see #setStatistic(Statistic) */ public void setStatistics(Statistic[] stats) { for (Statistic stat : stats) { setStatistic(stat); } } /** * Tests whether the specified statistic is currently set. Note that * statistics can be set indirectly because of logical groupings. For * example, if {@code Statistic.MEAN} is set then {@code SDEV} and * {@code VARIANCE} will also be set as these three are calculated * together. The same is true for {@code MIN}, {@code MAX} and {@code RANGE}. * * @param stat the statistic * * @return {@code true} if the statistic has been set; {@code false} otherwise. */ public boolean isSet(Statistic stat) { return findProcessor(stat) != null; } /** * Adds a range of values to be considered as NoData and then to be excluded * from the calculation of all statistics. NoData ranges take precedence * over included / excluded data ranges. * * @param noData the range defining NoData values */ public void addNoDataRange(Range noData) { noDataRanges.add(new Range(noData)); for (Processor p : processors) { p.addNoDataRange(noData); } } /** * Adds a single value to be considered as NoData. * * @param noData the value to be treated as NoData * * @see #addNoDataRange(Range) */ public void addNoDataValue(Double noData) { if (noData != null && !noData.isNaN()) { addNoDataRange(new Range(noData)); } } /** * Adds a range of values to include in or exclude from the calculation * of all statistics. If further statistics are set after calling * this method the range will be applied to them as well. * * @param range the range to include/exclude */ public void addRange(Range range) { ranges.add(new Range(range)); for (Processor p : processors) { p.addRange(range); } } /** * Adds a range of values to include in or exclude from the calculation * of all statistics. If further statistics are set after calling * this method the range will be applied to them as well. * * @param range the range to include/exclude * @param rangesType one of {@link Range.Type#INCLUDE} or {@link Range.Type#EXCLUDE} */ public void addRange(Range range, Range.Type rangesType) { for (Processor p : processors) { p.addRange(range, rangesType); } ranges.add(new Range(range)); } /** * Gets the statistics that are currently set. * * @return the statistics */ public Set getStatistics() { Set stats = CollectionFactory.orderedSet(); for (Processor p : processors) { for (Statistic s : p.getSupported()) { stats.add(s); } } return stats; } /** * Gets the current value of a running statistic. If there have not * been enough samples provided to compute the statistic, Double.NaN * is returned. * * @param stat the statistic * @return the current value of the statistic * * @throws IllegalStateException if {@code stat} was not previously set */ public Double getStatisticValue(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalStateException( "requesting a result for a statistic that hasn't been set: " + stat); } return p.get(stat); } /** * Gets the number of sample values that have been accepted for the * specified {@code Statistic}. *

* Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of samples that have been accepted * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumAccepted(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumAccepted(); } /** * Gets the number of sample values that have been offered for the * specified {@code Statistic}. This might be higher than the value * returned by {@link #getNumAccepted} due to {@code nulls}, * {@code Double.NaNs} and excluded values in the data stream. *

* Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of samples that have been accepted * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumOffered(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumOffered(); } /** * Gets the number of NaN values that have been offered. * Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of NaN samples offered * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumNaN(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumNaN(); } /** * Gets the number of NoData values (including NaN) that have been offered. * Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of NoData samples offered * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumNoData(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumNoData(); } /** * Offers a sample value. Offered values are filtered through excluded ranges. * {@code Double.NaNs} and {@code nulls} are excluded by default. * * @param sample the sample value */ public void offer(Double sample) { for (Processor p : processors) { p.offer(sample); } } /** * Offers an array of sample values. * * @param samples the sample values */ public void offer(Double[] samples) { for (int i = 0; i < samples.length; i++) { offer(samples[i]); } } /** * Searches the list of {@code Processors} for one that supports * the given {@code Statistic}. * * @param stat the statistic * * @return the supporting {@code Processor} or null if one has not * been set for the statistic */ private Processor findProcessor(Statistic stat) { for (Processor p : processors) { if (p.getSupported().contains(stat)) { return p; } } return null; } /** * Gets the values of all statistics calculated by this sampler. * * @return calculated values */ public Map getStatisticValues() { Map results = CollectionFactory.orderedMap(); for (Statistic s : getStatistics()) { results.put(s, getStatisticValue(s)); } return results; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy