All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jasima.core.statistics.SamplingSummaryStat Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010-2015 Torsten Hildebrandt and jasima contributors
 *
 * This file is part of jasima, v1.2.
 *
 * jasima is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * jasima is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with jasima.  If not, see .
 *******************************************************************************/
package jasima.core.statistics;

import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;
import java.util.Random;

/**
 * This class provides efficient sampling of up to a certain maximum number of
 * values. A SummaryStat-object provides an efficient means to collect basic
 * summary statistics like mean, min, max, standard deviation. To do so it does
 * not have to store any observations. Obtaining information like median, or
 * percentile requires storing values, however. To limit the amount of data
 * which has to be stored, sampling can be used and the aforementioned
 * statistics estimated based on this sample.
 * 

* This class can sample and store up to a certain maximum number of values. * Each value passed to a call of {@link #value(double)} has the same * probability of ending up in the final sample accessible by {@link #getData()}. *

* Which values are selected is determined by a random number generator (see * {@link #setRnd(Random)}). If no such random number generator is set, * {@link Math#random()} is used. * * @author Torsten Hildebrandt , 2012-07-06 * @version "$Id: SamplingSummaryStat.java 550 2015-01-23 15:07:23Z [email protected] $" */ public class SamplingSummaryStat extends SummaryStat { private static final int DEF_NUM_SAMPLES = 100; private static final long serialVersionUID = 868112468971365852L; private int numSamples; private Random rnd; private PriorityQueue data; private double threshold; public SamplingSummaryStat() { this(null, DEF_NUM_SAMPLES, null); } public SamplingSummaryStat(String name) { this(name, DEF_NUM_SAMPLES, null); } public SamplingSummaryStat(int numSamples) { this(null, numSamples, null); } public SamplingSummaryStat(Random rnd) { this(null, DEF_NUM_SAMPLES, rnd); } public SamplingSummaryStat(int numSamples, Random rnd) { this(null, numSamples, rnd); } public SamplingSummaryStat(String name, int numSamples, Random rnd) { super(name); setNumSamples(numSamples); setRnd(rnd); } @Override public void value(double v, double weight) { throw new UnsupportedOperationException("Can't handle weights."); } @Override public void value(double v) { super.value(v, 1.0d); if (data == null) init(); double r = getRnd() == null ? Math.random() : getRnd().nextDouble(); if (r < threshold || data.size() < getNumSamples()) { if (data.size() == getNumSamples()) data.poll(); data.offer(new DatEntry(r, numObs(), v)); threshold = data.peek().rand; } } public double[] getData() { // bring data in insertion order DatEntry[] tmp = data.toArray(new DatEntry[data.size()]); Arrays.sort(tmp, new Comparator() { @Override public int compare(DatEntry o1, DatEntry o2) { if (o1.num < o2.num) return -1; else if (o1.num == o2.num) return 0; else return +1; } }); // copy values to result array double[] res = new double[tmp.length]; for (int i = 0; i < tmp.length; i++) { res[i] = tmp[i].data; } return res; } public void init() { data = new PriorityQueue(getNumSamples()); threshold = 1.0d; } public void setRnd(Random rnd) { this.rnd = rnd; } public Random getRnd() { return rnd; } public void setNumSamples(int numSamples) { if (numSamples <= 0) throw new IllegalArgumentException("numSamples must be positive. " + numSamples); this.numSamples = numSamples; } public int getNumSamples() { return numSamples; } /** * Each DatEntry stores a single decision situation plus a random value. */ protected static class DatEntry implements Comparable { public final double rand; public final int num; public final double data; public DatEntry(double r, int num, double rs) { super(); this.rand = r; this.num = num; this.data = rs; } @Override public int compareTo(DatEntry o) { return Double.compare(o.rand, rand); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy