All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jaitools.numeric.ApproxMedianProcessor Maven / Gradle / Ivy

/* 
 *  Copyright (c) 2009-2010, Michael Bedward. All rights reserved. 
 *   
 *  Redistribution and use in source and binary forms, with or without modification, 
 *  are permitted provided that the following conditions are met: 
 *   
 *  - Redistributions of source code must retain the above copyright notice, this  
 *    list of conditions and the following disclaimer. 
 *   
 *  - Redistributions in binary form must reproduce the above copyright notice, this 
 *    list of conditions and the following disclaimer in the documentation and/or 
 *    other materials provided with the distribution.   
 *   
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 *  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
 *  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 *  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */   

package org.jaitools.numeric;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.jaitools.CollectionFactory;


/**
 * Processor for the approximate median {@code Statistic.APPROX_MEDIAN}.
 *
 * Calculation of an exact median is only possible by storing all sample
 * values. For very large data streams, this processor will calculate an
 * approximate median using the remedian estimator:
 * 
* PJ Rousseeuw and GW Bassett (1990) * The remedian: a robust averaging method for large data sets. * Journal of the American Statistical Society 85:97-104 *
*

* The remedian estimator performs badly with non-stationary data, e.g. a * data stream that is monotonically increasing will result in an over-estimate. * If possible (which it probably isn't), it will help to de-trend or randomly * order the data prior to streaming it. * * @see Statistic * @see StreamingSampleStats * * @author Michael Bedward * @since 1.0 * @version $Id$ */ public class ApproxMedianProcessor extends AbstractProcessor { private static final Set SUPPORTED = Collections.singleton(Statistic.APPROX_MEDIAN); // this must be an odd value private static final int BASE = 21; private static final int MEDIAN_POS = BASE / 2; private boolean needsCalculation = true; private double remedian; private static class Buffer { double[] data = new double[BASE]; int pos = 0; void add(double value) { data[pos++] = value; } boolean isFull() { return pos >= BASE; } } private List buffers; private Buffer buf0; private static class WeightedSample implements Comparable { double value; long weight; public int compareTo(WeightedSample other) { return Double.compare(value, other.value); } } /** * Default constructor. */ public ApproxMedianProcessor() { buffers = CollectionFactory.list(); buf0 = new Buffer(); buffers.add(buf0); } /** * {@inheritDoc} */ public Collection getSupported() { return SUPPORTED; } /** * {@inheritDoc} */ protected boolean update(Double sample) { if (isAccepted(sample)) { if (buf0.isFull()) { cascade(0); } buf0.add(sample); needsCalculation = true; return true; } return false; } /** * {@inheritDoc} */ public Double get(Statistic stat) { if (SUPPORTED.contains(stat)) { if (getNumAccepted() == 0) { return Double.NaN; } if (getNumAccepted() == 1) { return buf0.data[0]; } if (needsCalculation) { /* * Calculate the remedian as the weighted median of the buffer values * where the weight for each value in buffer i is BASE^i, i = 0..numBuffers-1 */ List samples = CollectionFactory.list(); long weight = 1; for (Buffer buf : buffers) { for (int i = 0; i < buf.pos; i++) { WeightedSample datum = new WeightedSample(); datum.value = buf.data[i]; datum.weight = weight; samples.add(datum); } weight = weight * BASE; } Collections.sort(samples); long nHalf = getNumAccepted() / 2; long n = 0; Iterator iter = samples.iterator(); WeightedSample datum = null; while (n < nHalf) { datum = iter.next(); n += datum.weight; } remedian = datum.value; needsCalculation = false; } return remedian; } throw new IllegalArgumentException(stat + " not supported by " + getClass().getName()); } /* * Calculate the median of the values in the full buffer at * the given level and store the result in the next * available position of the buffer at level+1, creating this * next buffer if necessary. If the next buffer is also full * it is cascaded with a recursive call. */ private void cascade(int level) { Buffer buf = buffers.get(level); Arrays.sort(buf.data); double median = buf.data[MEDIAN_POS]; Buffer nextBuf; if (level + 1 < buffers.size()) { nextBuf = buffers.get(level + 1); } else { nextBuf = new Buffer(); buffers.add(nextBuf); } if (nextBuf.isFull()) { cascade(level + 1); } buf.pos = 0; nextBuf.add(median); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy