All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jaitools.numeric.StreamingSampleStats Maven / Gradle / Ivy

Go to download

Provides a single jar containing all JAI-tools modules which you can use instead of including individual modules in your project. Note: It does not include the Jiffle scripting language or Jiffle image operator.

The newest version!
/*
 * Copyright 2009-2011 Michael Bedward
 *
 * This file is part of jai-tools.
 *
 * jai-tools is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * jai-tools is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with jai-tools.  If not, see .
 *
 */
package jaitools.numeric;

import jaitools.CollectionFactory;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * A class to calculate summary statistics for a sample of Double-valued
 * buffers that is received as a (potentially long) stream of values rather
 * than in a single batch. Any Double.NaN values in the stream will be
 * ignored.
 * 

* Two options are offered to calculate sample median. Where it is known a priori * that the data stream can be accomodated in memory, the exact median can be * requested with Statistic.MEDIAN. Where the length of the data stream is unknown, * or known to be too large to be held in memory, an approximate median can be * calculated using the 'remedian' estimator as described in: *

* PJ Rousseeuw and GW Bassett (1990) * The remedian: a robust averaging method for large data sets. * Journal of the American Statistical Society 85:97-104 *
* This is requested with Statistic.APPROX_MEDIAN. *

* Note: the 'remedian' estimator performs badly with non-stationary data, e.g. a * data stream that is monotonically increasing will result in an estimate for the * median that is too high. If possible, it is best to de-trend or randomly order * the data prior to streaming it. *

* Example of use: *


 * StreamingSampleStats strmStats = new StreamingSampleStats();
 *
 * // set the statistics that will be calculated
 * Statistic[] stats = {
 *     Statistic.MEAN,
 *     Statistic.SDEV,
 *     Statistic.RANGE,
 *     Statistic.APPROX_MEDIAN
 * };
 * strmStats.setStatistics(stats);
 *
 * // some process that generates a long stream of data
 * while (somethingBigIsRunning) {
 *     double value = ...
 *     strmStats.offer(value);
 * }
 *
 * // report the results
 * for (Statistic s : stats) {
 *     System.out.println(String.format("%s: %.4f", s, strmStats.getStatisticValue(s)));
 * }
 *
 * 
* * @author Michael Bedward * @author Daniele Romagnoli, GeoSolutions S.A.S. * @since 1.0 * @version $Id: StreamingSampleStats.java 1504 2011-03-05 10:56:10Z michael.bedward $ */ public class StreamingSampleStats { private static final Logger LOGGER = Logger.getLogger("jaitools.numeric"); private ProcessorFactory factory = new ProcessorFactory(); private List processors; private List> ranges; private List> noDataRanges; private final Range.Type rangesType; /** * Creates a new sampler and sets the default range type to * {@link Range.Type#EXCLUDE}. */ public StreamingSampleStats() { this(Range.Type.EXCLUDE); } /** * Creates a new sampler with specified use of {@code Ranges}. * * @param rangesType either {@link Range.Type#INCLUDE} * or {@link Range.Type#EXCLUDE} */ public StreamingSampleStats(Range.Type rangesType) { processors = CollectionFactory.list(); ranges = CollectionFactory.list(); noDataRanges = CollectionFactory.list(); this.rangesType = rangesType; } /** * Adds a statistic to those calculated by this sampler. * If the same statistic was previously set then calling this method * has no effect. * * @param stat the statistic * @see Statistic */ public void setStatistic(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { p = factory.getForStatistic(stat); if (p == null) { LOGGER.log(Level.SEVERE, "Unsupported Statistic: {0}", stat); } else { processors.add(p); // apply cached excluded ranges to the new processor for (Range range : ranges) { p.addRange(range, rangesType); } for (Range nRange : noDataRanges) { p.addNoDataRange(nRange); } } } } /** * Adds the given statistics to those that will be calculated by this sampler. * * @param stats the statistics * * @see #setStatistic(Statistic) */ public void setStatistics(Statistic[] stats) { for (Statistic stat : stats) { setStatistic(stat); } } /** * Tests whether the specified statistic is currently set. Note that * statistics can be set indirectly because of logical groupings. For * example, if {@code Statistic.MEAN} is set then {@code SDEV} and * {@code VARIANCE} will also be set as these three are calculated * together. The same is true for {@code MIN}, {@code MAX} and {@code RANGE}. * * @param stat the statistic * * @return {@code true} if the statistic has been set; {@code false} otherwise. */ public boolean isSet(Statistic stat) { return findProcessor(stat) != null; } /** * Adds a range of values to be considered as NoData and then to be excluded * from the calculation of all statistics. NoData ranges take precedence * over included / excluded data ranges. * * @param noData the range defining NoData values */ public void addNoDataRange(Range noData) { noDataRanges.add(new Range(noData)); for (Processor p : processors) { p.addNoDataRange(noData); } } /** * Adds a single value to be considered as NoData. * * @param noData the value to be treated as NoData * * @see #addNoDataRange(jaitools.numeric.Range) */ public void addNoDataValue(Double noData) { if (noData != null && !noData.isNaN()) { addNoDataRange(new Range(noData)); } } /** * Adds a range of values to include in or exclude from the calculation * of all statistics. If further statistics are set after calling * this method the range will be applied to them as well. * * @param range the range to include/exclude */ public void addRange(Range range) { ranges.add(new Range(range)); for (Processor p : processors) { p.addRange(range); } } /** * Adds a range of values to include in or exclude from the calculation * of all statistics. If further statistics are set after calling * this method the range will be applied to them as well. * * @param range the range to include/exclude * @param rangesType one of {@link Range.Type#INCLUDE} or {@link Range.Type#EXCLUDE} */ public void addRange(Range range, Range.Type rangesType) { for (Processor p : processors) { p.addRange(range, rangesType); } ranges.add(new Range(range)); } /** * Gets the statistics that are currently set. * * @return the statistics */ public Set getStatistics() { Set stats = CollectionFactory.orderedSet(); for (Processor p : processors) { for (Statistic s : p.getSupported()) { stats.add(s); } } return stats; } /** * Gets the current value of a running statistic. If there have not * been enough samples provided to compute the statistic, Double.NaN * is returned. * * @param stat the statistic * @return the current value of the statistic * * @throws IllegalStateException if {@code stat} was not previously set */ public Double getStatisticValue(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalStateException( "requesting a result for a statistic that hasn't been set: " + stat); } return p.get(stat); } /** * Gets the number of sample values that have been accepted for the * specified {@code Statistic}. *

* Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of samples that have been accepted * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumAccepted(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumAccepted(); } /** * Gets the number of sample values that have been offered for the * specified {@code Statistic}. This might be higher than the value * returned by {@link #getNumAccepted} due to {@code nulls}, * {@code Double.NaNs} and excluded values in the data stream. *

* Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of samples that have been accepted * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumOffered(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumOffered(); } /** * Gets the number of NaN values that have been offered. * Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of NaN samples offered * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumNaN(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumNaN(); } /** * Gets the number of NoData values (including NaN) that have been offered. * Note that different statistics might have been set at different * times in the sampling process. * * @param stat the statistic * * @return number of NoData samples offered * * @throws IllegalArgumentException if the statistic hasn't been set */ public long getNumNoData(Statistic stat) { Processor p = findProcessor(stat); if (p == null) { throw new IllegalArgumentException( "requesting sample size for a statistic that is not set: " + stat); } return p.getNumNoData(); } /** * Offers a sample value. Offered values are filtered through excluded ranges. * {@code Double.NaNs} and {@code nulls} are excluded by default. * * @param sample the sample value */ public void offer(Double sample) { for (Processor p : processors) { p.offer(sample); } } /** * Offers an array of sample values. * * @param samples the sample values */ public void offer(Double[] samples) { for (int i = 0; i < samples.length; i++) { offer(samples[i]); } } /** * Searches the list of {@code Processors} for one that supports * the given {@code Statistic}. * * @param stat the statistic * * @return the supporting {@code Processor} or null if one has not * been set for the statistic */ private Processor findProcessor(Statistic stat) { for (Processor p : processors) { if (p.getSupported().contains(stat)) { return p; } } return null; } /** * Gets the values of all statistics calculated by this sampler. * * @return calculated values */ public Map getStatisticValues() { Map results = CollectionFactory.orderedMap(); for (Statistic s : getStatistics()) { results.put(s, getStatisticValue(s)); } return results; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy