All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hep.aida.tdouble.bin.DynamicDoubleBin1D Maven / Gradle / Ivy

Go to download

Parallel Colt is a multithreaded version of Colt - a library for high performance scientific computing in Java. It contains efficient algorithms for data analysis, linear algebra, multi-dimensional arrays, Fourier transforms, statistics and histogramming.

The newest version!
package hep.aida.tdouble.bin;

import cern.colt.list.tdouble.DoubleArrayList;
import cern.colt.list.tint.IntArrayList;
import cern.jet.random.tdouble.AbstractDoubleDistribution;
import cern.jet.random.tdouble.engine.DoubleRandomEngine;
import cern.jet.stat.tdouble.DoubleDescriptive;

/**
 * 1-dimensional rebinnable bin holding double elements; Efficiently
 * computes advanced statistics of data sequences. Technically speaking, a
 * multiset (or bag) with efficient statistics operations defined upon. First
 * see the package summary and javadoc tree view to get the broad picture.
 * 

* The data filled into a DynamicBin1D is internally preserved in the * bin. As a consequence this bin can compute more than only basic statistics. * On the other hand side, if you add huge amounts of elements, you may run out * of memory (each element takes 8 bytes). If this drawbacks matter, consider to * use {@link StaticDoubleBin1D}, which overcomes them at the expense of limited * functionality. *

* This class is fully thread safe (all public methods are synchronized). Thus, * you can have one or more threads adding to the bin as well as one or more * threads reading and viewing the statistics of the bin while it is * filled. For high performance, add data in large chunks (buffers) via * method addAllOf rather than piecewise via method add. *

* If your favourite statistics measure is not directly provided by this class, * check out {@link cern.jet.stat.tdouble.DoubleDescriptive} in combination with * methods {@link #elements()} and {@link #sortedElements()}. *

* Implementation: Lazy evaluation, caching, incremental maintainance. * * @see cern.jet.stat.tdouble.DoubleDescriptive * @author [email protected] * @version 0.9, 03-Jul-99 */ public class DynamicDoubleBin1D extends QuantileDoubleBin1D { // Never ever use "this.size" as it would be intuitive! // This class abuses "this.size". "this.size" DOES NOT REFLECT the number of // elements contained in the receiver! // Instead, "this.size" reflects the number of elements incremental stats // computation has already processed. /** * */ private static final long serialVersionUID = 1L; /** * The elements contained in this bin. */ protected DoubleArrayList elements = null; /** * The elements contained in this bin, sorted ascending. */ protected DoubleArrayList sortedElements = null; /** * Preserve element order under all circumstances? */ protected boolean fixedOrder = false; // cached parameters // protected double skew = 0.0; // protected double kurtosis = 0.0; // cache states protected boolean isSorted = true; protected boolean isIncrementalStatValid = true; // protected boolean isSkewValid = true; // protected boolean isKurtosisValid = true; protected boolean isSumOfInversionsValid = true; protected boolean isSumOfLogarithmsValid = true; // protected boolean isSumOfPowersValid = true; /** * Constructs and returns an empty bin; implicitly calls * {@link #setFixedOrder(boolean) setFixedOrder(false)}. */ public DynamicDoubleBin1D() { super(); this.clear(); this.elements = new DoubleArrayList(); this.sortedElements = new DoubleArrayList(0); this.fixedOrder = false; this.hasSumOfLogarithms = true; this.hasSumOfInversions = true; } /** * Adds the specified element to the receiver. * * @param element * element to be appended. */ public synchronized void add(double element) { elements.add(element); invalidateAll(); } /** * Adds the part of the specified list between indexes from * (inclusive) and to (inclusive) to the receiver. * * @param list * the list of which elements shall be added. * @param from * the index of the first element to be added (inclusive). * @param to * the index of the last element to be added (inclusive). * @throws IndexOutOfBoundsException * if * list.size()>0 && (from<0 || from>to || to>=list.size()) * . */ public synchronized void addAllOfFromTo(DoubleArrayList list, int from, int to) { this.elements.addAllOfFromTo(list, from, to); this.invalidateAll(); } /** * Applies a function to each element and aggregates the results. Returns a * value v such that v==a(size()) where * a(i) == aggr( a(i-1), f(x(i)) ) and terminators are * a(1) == f(x(0)), a(0)==Double.NaN. *

* Example: * *

     * 	 cern.jet.math.Functions F = cern.jet.math.Functions.functions;
     * 	 bin = 0 1 2 3 
     * 
     * 	 // Sum( x[i]*x[i] ) 
     * 	 bin.aggregate(F.plus,F.square);
     * 	 --> 14
     * 
     * 
* * For further examples, see the package doc. * * @param aggr * an aggregation function taking as first argument the current * aggregation and as second argument the transformed current * element. * @param f * a function transforming the current element. * @return the aggregated measure. * @see cern.jet.math.tdouble.DoubleFunctions */ public synchronized double aggregate(cern.colt.function.tdouble.DoubleDoubleFunction aggr, cern.colt.function.tdouble.DoubleFunction f) { int s = size(); if (s == 0) return Double.NaN; double a = f.apply(elements.getQuick(s - 1)); for (int i = s - 1; --i >= 0;) { a = aggr.apply(a, f.apply(elements.getQuick(i))); } return a; } /** * Removes all elements from the receiver. The receiver will be empty after * this call returns. */ public synchronized void clear() { super.clear(); if (this.elements != null) this.elements.clear(); if (this.sortedElements != null) this.sortedElements.clear(); this.validateAll(); } /** * Resets the values of all measures. */ protected void clearAllMeasures() { super.clearAllMeasures(); // this.skew = 0.0; // this.kurtosis = 0.0; } /** * Returns a deep copy of the receiver. * * @return a deep copy of the receiver. */ public synchronized Object clone() { DynamicDoubleBin1D clone = (DynamicDoubleBin1D) super.clone(); if (this.elements != null) clone.elements = clone.elements.copy(); if (this.sortedElements != null) clone.sortedElements = clone.sortedElements.copy(); return clone; } /** * Returns the correlation of two bins, which is * corr(x,y) = covariance(x,y) / (stdDev(x)*stdDev(y)) (Pearson's * correlation coefficient). A correlation coefficient varies between -1 * (for a perfect negative relationship) to +1 (for a perfect positive * relationship). See the * math definition and another def. * * @param other * the bin to compare with. * @return the correlation. * @throws IllegalArgumentException * if size() != other.size(). */ public synchronized double correlation(DynamicDoubleBin1D other) { synchronized (other) { return covariance(other) / (standardDeviation() * other.standardDeviation()); } } /** * Returns the covariance of two bins, which is * cov(x,y) = (1/size()) * Sum((x[i]-mean(x)) * (y[i]-mean(y))). * See the * math definition. * * @param other * the bin to compare with. * @return the covariance. * @throws IllegalArgumentException * if size() != other.size(). */ public synchronized double covariance(DynamicDoubleBin1D other) { synchronized (other) { if (size() != other.size()) throw new IllegalArgumentException("both bins must have same size"); double s = 0; for (int i = size(); --i >= 0;) { s += this.elements.getQuick(i) * other.elements.getQuick(i); } double cov = (s - sum() * other.sum() / size()) / size(); return cov; } } /** * Returns a copy of the currently stored elements. Concerning the order in * which elements are returned, see {@link #setFixedOrder(boolean)}. * * @return a copy of the currently stored elements. */ public synchronized DoubleArrayList elements() { // safe since we are already synchronized. return elements_unsafe().copy(); } /** * Returns the currently stored elements; WARNING: not a copy of * them. Thus, improper usage of the returned list may not only corrupt the * receiver's internal state, but also break thread safety! Only provided * for performance and memory sensitive applications. Do not modify the * returned list unless you know exactly what you're doing. This method can * be used in a thread safe, clean and performant way by explicitly * synchronizing on the bin, as follows: * *
     * ...
     * double sinSum = 0;
     * synchronized (dynamicBin) { // lock out anybody else
     *     DoubleArrayList elements = dynamicBin.elements_unsafe();
     *     // read each element and do something with it, for example
     * 	   double[] values = elements.elements(); // zero-copy
     * 	   for (int i=dynamicBin.size(); --i >=0; ) {
     *         sinSum += Math.sin(values[i]);
     * 	   }
     * }
     * System.out.println(sinSum);
     * ...
     * 
* * Concerning the order in which elements are returned, see * {@link #setFixedOrder(boolean)}. * * @return the currently stored elements. */ protected synchronized DoubleArrayList elements_unsafe() { return this.elements; } /** * Returns whether two bins are equal. They are equal if the other object is * of the same class or a subclass of this class and both have the same * size, minimum, maximum, sum and sumOfSquares and have the same elements, * order being irrelevant (multiset equality). *

* Definition of Equality for multisets: A,B are equal <=> A is a * superset of B and B is a superset of A. (Elements must occur the same * number of times, order is irrelevant.) */ public synchronized boolean equals(Object object) { if (!(object instanceof DynamicDoubleBin1D)) return false; if (!super.equals(object)) return false; DynamicDoubleBin1D other = (DynamicDoubleBin1D) object; double[] s1 = sortedElements_unsafe().elements(); synchronized (other) { double[] s2 = other.sortedElements_unsafe().elements(); int n = size(); return includes(s1, s2, 0, n, 0, n) && includes(s2, s1, 0, n, 0, n); } } private static boolean includes(double[] array1, double[] array2, int first1, int last1, int first2, int last2) { while (first1 < last1 && first2 < last2) { if (array2[first2] < array1[first1]) return false; else if (array1[first1] < array2[first2]) ++first1; else { ++first1; ++first2; } } return first2 == last2; } /** * Computes the frequency (number of occurances, count) of each distinct * element. After this call returns both distinctElements and * frequencies have a new size (which is equal for both), which is * the number of distinct elements currently contained. *

* Distinct elements are filled into distinctElements, starting at * index 0. The frequency of each distinct element is filled into * frequencies, starting at index 0. Further, both * distinctElements and frequencies are sorted ascending * by "element" (in sync, of course). As a result, the smallest distinct * element (and its frequency) can be found at index 0, the second smallest * distinct element (and its frequency) at index 1, ..., the largest * distinct element (and its frequency) at index * distinctElements.size()-1. *

* Example:
* elements = (8,7,6,6,7) --> distinctElements = (6,7,8), frequencies = (2,2,1) * * @param distinctElements * a list to be filled with the distinct elements; can have any * size. * @param frequencies * a list to be filled with the frequencies; can have any size; * set this parameter to null to ignore it. */ public synchronized void frequencies(DoubleArrayList distinctElements, IntArrayList frequencies) { DoubleDescriptive.frequencies(sortedElements_unsafe(), distinctElements, frequencies); } /** * Returns a map holding the frequency distribution, that is, * (distintElement,frequency) pairs. The frequency (count) of an element is * its number of occurances. *

* Example:
* elements = (8,7,6,6,7) --> map.keys = (8,6,7), map.values = (1,2,2) * * @return a map holding the frequency distribution. */ private synchronized cern.colt.map.tdouble.AbstractDoubleIntMap frequencyMap() { // cern.colt.map.OpenDoubleIntHashMap.hashCollisions = 0; // fill a map that collects frequencies cern.colt.map.tdouble.AbstractDoubleIntMap map = new cern.colt.map.tdouble.OpenDoubleIntHashMap(); // cern.colt.Timer timer = new cern.colt.Timer().start(); for (int i = size(); --i >= 0;) { double element = this.elements.getQuick(i); // double element = i; // benchmark only TODO // double element = i%1000; // benchmark only TODO map.put(element, 1 + map.get(element)); } // timer.stop(); // System.out.println("filling map took = "+timer); // System.out.println("collisions="+cern.colt.map.OpenDoubleIntHashMap.hashCollisions); return map; } /** * Returns Integer.MAX_VALUE, the maximum order k for * which sums of powers are retrievable. * * @see #hasSumOfPowers(int) * @see #sumOfPowers(int) */ public int getMaxOrderForSumOfPowers() { return Integer.MAX_VALUE; } /** * Returns Integer.MIN_VALUE, the minimum order k for * which sums of powers are retrievable. * * @see #hasSumOfPowers(int) * @see #sumOfPowers(int) */ public int getMinOrderForSumOfPowers() { return Integer.MIN_VALUE; } /** * * * @param element * element to be appended. */ protected void invalidateAll() { this.isSorted = false; this.isIncrementalStatValid = false; // this.isSkewValid = false; // this.isKurtosisValid = false; this.isSumOfInversionsValid = false; this.isSumOfLogarithmsValid = false; } /** * Returns true. Returns whether a client can obtain all elements * added to the receiver. In other words, tells whether the receiver * internally preserves all added elements. If the receiver is rebinnable, * the elements can be obtained via elements() methods. * */ public synchronized boolean isRebinnable() { return true; } /** * Returns the maximum. */ public synchronized double max() { if (!isIncrementalStatValid) updateIncrementalStats(); return this.max; } /** * Returns the minimum. */ public synchronized double min() { if (!isIncrementalStatValid) updateIncrementalStats(); return this.min; } /** * Returns the moment of k-th order with value c, which is * Sum( (x[i]-c)k ) / size(). * * @param k * the order; any number - can be less than zero, zero or greater * than zero. * @param c * any number. */ public synchronized double moment(int k, double c) { // currently no caching for this parameter return DoubleDescriptive.moment(this.elements, k, c); } /** * Returns the exact phi-quantile; that is, the smallest contained * element elem for which holds that phi percent of * elements are less than elem. * * @param phi * must satisfy 0 < phi < 1. */ public synchronized double quantile(double phi) { return DoubleDescriptive.quantile(sortedElements_unsafe(), phi); } /** * Returns exactly how many percent of the elements contained in the * receiver are <= element. Does linear interpolation if the * element is not contained but lies in between two contained elements. * * @param element * the element to search for. * @return the exact percentage phi of elements * <= element (0.0 <= phi <= 1.0). */ public synchronized double quantileInverse(double element) { return DoubleDescriptive.quantileInverse(sortedElements_unsafe(), element); } /** * Returns the exact quantiles of the specified percentages. * * @param percentages * the percentages for which quantiles are to be computed. Each * percentage must be in the interval (0.0,1.0]. * percentages must be sorted ascending. * @return the exact quantiles. */ public DoubleArrayList quantiles(DoubleArrayList percentages) { return DoubleDescriptive.quantiles(sortedElements_unsafe(), percentages); } /** * Removes from the receiver all elements that are contained in the * specified list. * * @param list * the elements to be removed. * @return true if the receiver changed as a result of the * call. */ public synchronized boolean removeAllOf(DoubleArrayList list) { boolean changed = this.elements.removeAll(list); if (changed) { clearAllMeasures(); invalidateAll(); this.size = 0; if (fixedOrder) { this.sortedElements.removeAll(list); this.isSorted = true; } } return changed; } /** * Uniformly samples (chooses) n random elements with or without * replacement from the contained elements and adds them to the given * buffer. If the buffer is connected to a bin, the effect is that the * chosen elements are added to the bin connected to the buffer. Also see * {@link #buffered(int) buffered}. * * @param n * the number of elements to choose. * @param withReplacement * true samples with replacement, otherwise samples * without replacement. * @param randomGenerator * a random number generator. Set this parameter to null * to use a default random number generator seeded with the * current time. * @param buffer * the buffer to which chosen elements will be added. * @throws IllegalArgumentException * if !withReplacement && n > size(). * @see cern.jet.random.tdouble.sampling */ public synchronized void sample(int n, boolean withReplacement, DoubleRandomEngine randomGenerator, cern.colt.buffer.tdouble.DoubleBuffer buffer) { if (randomGenerator == null) randomGenerator = AbstractDoubleDistribution.makeDefaultGenerator(); buffer.clear(); if (!withReplacement) { // without if (n > size()) throw new IllegalArgumentException("n must be less than or equal to size()"); cern.jet.random.tdouble.sampling.DoubleRandomSamplingAssistant sampler = new cern.jet.random.tdouble.sampling.DoubleRandomSamplingAssistant( n, size(), randomGenerator); for (int i = n; --i >= 0;) { if (sampler.sampleNextElement()) buffer.add(this.elements.getQuick(i)); } } else { // with cern.jet.random.tdouble.DoubleUniform uniform = new cern.jet.random.tdouble.DoubleUniform(randomGenerator); int s = size(); for (int i = n; --i >= 0;) { buffer.add(this.elements.getQuick(uniform.nextIntFromTo(0, s - 1))); } buffer.flush(); } } /** * Generic bootstrap resampling. Quite optimized - Don't be afraid to try * it. Executes resamples resampling steps. In each resampling step * does the following: *

    *
  • Uniformly samples (chooses) size() random elements with * replacement from this and fills them into an auxiliary bin * b1. *
  • Uniformly samples (chooses) other.size() random elements * with replacement from other and fills them into another * auxiliary bin b2. *
  • Executes the comparison function function on both auxiliary * bins (function.apply(b1,b2)) and adds the result of the function * to an auxiliary bootstrap bin b3. *
*

* Finally returns the auxiliary bootstrap bin b3 from which the * measure of interest can be read off. *

*

* Background: *

*

* Also see a more in-depth * discussion on bootstrapping and related randomization methods. The * classical statistical test for comparing the means of two samples is the * t-test. Unfortunately, this test assumes that the two samples each * come from a normal distribution and that these distributions have the * same standard deviation. Quite often, however, data has a distribution * that is non-normal in many ways. In particular, distributions are often * unsymmetric. For such data, the t-test may produce misleading results and * should thus not be used. Sometimes asymmetric data can be transformed * into normally distributed data by taking e.g. the logarithm and the * t-test will then produce valid results, but this still requires * postulation of a certain distribution underlying the data, which is often * not warranted, because too little is known about the data composition. *

*

* Bootstrap resampling of means differences (and other differences) * is a robust replacement for the t-test and does not require assumptions * about the actual distribution of the data. The idea of bootstrapping is * quite simple: simulation. The only assumption required is that the two * samples a and b are representative for the underlying * distribution with respect to the statistic that is being tested - this * assumption is of course implicit in all statistical tests. We can now * generate lots of further samples that correspond to the two given ones, * by sampling with replacement. This process is called * resampling. A resample can (and usually will) have a different * mean than the original one and by drawing hundreds or thousands of such * resamples ar from a and * br from b we can compute the so-called * bootstrap distribution of all the differences "mean of * ar minus mean of br". That * is, a bootstrap bin filled with the differences. Now we can compute, what * fraction of these differences is, say, greater than zero. Let's assume we * have computed 1000 resamples of both a and b and found * that only 8 of the differences were greater than zero. Then * 8/1000 or 0.008 is the p-value (probability) for the * hypothesis that the mean of the distribution underlying a is * actually larger than the mean of the distribution underlying b. * From this bootstrap test, we can clearly reject the hypothesis. *

*

* Instead of using means differences, we can also use other differences, * for example, the median differences. *

*

* Instead of p-values we can also read arbitrary confidence intervals from * the bootstrap bin. For example, 90% of all bootstrap differences * are left of the value -3.5, hence a left 90% confidence * interval for the difference would be (3.5,infinity); in other * words: the difference is 3.5 or larger with probability * 0.9. *

*

* Sometimes we would like to compare not only means and medians, but also * the variability (spread) of two samples. The conventional method of doing * this is the F-test, which compares the standard deviations. It is * related to the t-test and, like the latter, assumes the two samples to * come from a normal distribution. The F-test is very sensitive to data * with deviations from normality. Instead we can again resort to more * robust bootstrap resampling and compare a measure of spread, for example * the inter-quartile range. This way we compute a bootstrap resampling * of inter-quartile range differences in order to arrive at a test for * inequality or variability. *

*

* Example: *

* *
* *
     * 	 // v1,v2 - the two samples to compare against each other
     * 	 double[] v1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,  21,  22,23,24,25,26,27,28,29,30,31};
     * 	 double[] v2 = {10,11,12,13,14,15,16,17,18,19,  20,  30,31,32,33,34,35,36,37,38,39};
     * 	 hep.aida.bin.DynamicBin1D X = new hep.aida.bin.DynamicBin1D();
     * 	 hep.aida.bin.DynamicBin1D Y = new hep.aida.bin.DynamicBin1D();
     * 	 X.addAllOf(new cern.colt.list.DoubleArrayList(v1));
     * 	 Y.addAllOf(new cern.colt.list.DoubleArrayList(v2));
     * 	 cern.jet.random.engine.RandomEngine random = new cern.jet.random.engine.MersenneTwister();
     * 
     * 	 // bootstrap resampling of differences of means:
     * 	 BinBinFunction1D diff = new BinBinFunction1D() {
     * 	    public double apply(DynamicBin1D x, DynamicBin1D y) {return x.mean() - y.mean();}
     * 	 };
     * 
     * 	 // bootstrap resampling of differences of medians:
     * 	 BinBinFunction1D diff = new BinBinFunction1D() {
     * 	    public double apply(DynamicBin1D x, DynamicBin1D y) {return x.median() - y.median();}
     * 	 };
     * 
     * 	 // bootstrap resampling of differences of inter-quartile ranges:
     * 	 BinBinFunction1D diff = new BinBinFunction1D() {
     * 	    public double apply(DynamicBin1D x, DynamicBin1D y) {return (x.quantile(0.75)-x.quantile(0.25)) - (y.quantile(0.75)-y.quantile(0.25)); }
     * 	 };
     * 
     * 	 DynamicBin1D boot = X.sampleBootstrap(Y,1000,random,diff);
     * 
     * 	 cern.jet.math.Functions F = cern.jet.math.Functions.functions;
     * 	 System.out.println("p-value="+ (boot.aggregate(F.plus, F.greater(0)) / boot.size()));
     * 	 System.out.println("left 90% confidence interval = ("+boot.quantile(0.9) + ",infinity)");
     * 
     * 	 -->
     * 	 // bootstrap resampling of differences of means:
     * 	 p-value=0.0080
     * 	 left 90% confidence interval = (-3.571428571428573,infinity)
     * 
     * 	 // bootstrap resampling of differences of medians:
     * 	 p-value=0.36
     * 	 left 90% confidence interval = (5.0,infinity)
     * 
     * 	 // bootstrap resampling of differences of inter-quartile ranges:
     * 	 p-value=0.5699
     * 	 left 90% confidence interval = (5.0,infinity)
     * 
     * 
* *
* * @param other * the other bin to compare the receiver against. * @param resamples * the number of times resampling shall be done. * @param randomGenerator * a random number generator. Set this parameter to null * to use a default random number generator seeded with the * current time. * @param function * a difference function comparing two samples; takes as first * argument a sample of this and as second argument a * sample of other. * @return a bootstrap bin holding the results of function of each * resampling step. * @see cern.colt.GenericPermuting#permutation(long,int) */ public synchronized DynamicDoubleBin1D sampleBootstrap(DynamicDoubleBin1D other, int resamples, cern.jet.random.tdouble.engine.DoubleRandomEngine randomGenerator, DoubleBinBinFunction1D function) { if (randomGenerator == null) randomGenerator = AbstractDoubleDistribution.makeDefaultGenerator(); // since "resamples" can be quite large, we care about performance and // memory int maxCapacity = 1000; int s1 = size(); int s2 = other.size(); // prepare auxiliary bins and buffers DynamicDoubleBin1D sample1 = new DynamicDoubleBin1D(); cern.colt.buffer.tdouble.DoubleBuffer buffer1 = sample1.buffered(Math.min(maxCapacity, s1)); DynamicDoubleBin1D sample2 = new DynamicDoubleBin1D(); cern.colt.buffer.tdouble.DoubleBuffer buffer2 = sample2.buffered(Math.min(maxCapacity, s2)); DynamicDoubleBin1D bootstrap = new DynamicDoubleBin1D(); cern.colt.buffer.tdouble.DoubleBuffer bootBuffer = bootstrap.buffered(Math.min(maxCapacity, resamples)); // resampling steps for (int i = resamples; --i >= 0;) { sample1.clear(); sample2.clear(); this.sample(s1, true, randomGenerator, buffer1); other.sample(s2, true, randomGenerator, buffer2); bootBuffer.add(function.apply(sample1, sample2)); } bootBuffer.flush(); return bootstrap; } /** * Determines whether the receivers internally preserved elements may be * reordered or not. *
    *
  • fixedOrder==false allows the order in which elements are * returned by method elements() to be different from the order in * which elements are added. *
  • fixedOrder==true guarantees that under all circumstances the * order in which elements are returned by method elements() is * identical to the order in which elements are added. However, the latter * consumes twice as much memory if operations involving sorting are * requested. This option is usually only required if a 2-dimensional bin, * formed by two 1-dimensional bins, needs to be rebinnable. *
*

* Naturally, if fixedOrder is set to true you should not * already have added elements to the receiver; it should be empty. */ public void setFixedOrder(boolean fixedOrder) { // if (size() > 0) throw new RuntimeException("must be called before // starting to add elements."); this.fixedOrder = fixedOrder; } /** * Returns the number of elements contained in the receiver. * * @return the number of elements contained in the receiver. */ public synchronized int size() { return elements.size(); // Never ever use "this.size" as it would be intuitive! // This class abuses "this.size". "this.size" DOES NOT REFLECT the // number of elements contained in the receiver! // Instead, "this.size" reflects the number of elements incremental // stats computation has already processed. } /** * Sorts elements if not already sorted. */ protected void sort() { if (!this.isSorted) { if (this.fixedOrder) { this.sortedElements.clear(); this.sortedElements.addAllOfFromTo(this.elements, 0, this.elements.size() - 1); this.sortedElements.sort(); } else { /* * Call updateIncrementalStats() because after sorting we no * more know what elements are still to be done by * updateIncrementalStats() and would therefore later need to * rebuild incremental stats from scratch. */ updateIncrementalStats(); invalidateAll(); this.elements.sort(); this.isIncrementalStatValid = true; } this.isSorted = true; } } /** * Returns a copy of the currently stored elements, sorted ascending. * Concerning the memory required for operations involving sorting, see * {@link #setFixedOrder(boolean)}. * * @return a copy of the currently stored elements, sorted ascending. */ public synchronized DoubleArrayList sortedElements() { // safe since we are already synchronized. return sortedElements_unsafe().copy(); } /** * Returns the currently stored elements, sorted ascending; WARNING: * not a copy of them; Thus, improper usage of the returned list may not * only corrupt the receiver's internal state, but also break thread safety! * Only provided for performance and memory sensitive applications. Do not * modify the returned elements unless you know exactly what you're doing. * This method can be used in a thread safe, clean and performant way * by explicitly synchronizing on the bin, as follows: * *

     * ...
     * synchronized (dynamicBin) { // lock out anybody else
     *     DoubleArrayList elements = dynamicBin.sortedElements_unsafe();
     * 	   // read each element and do something with it, e.g.
     * 	   double[] values = elements.elements(); // zero-copy
     * 	   for (int i=dynamicBin.size(); --i >=0; ) {
     *         foo(values[i]);
     * 	   }			
     * }
     * ...
     * 
* * Concerning the memory required for operations involving sorting, see * {@link #setFixedOrder(boolean)}. * * @return the currently stored elements, sorted ascending. */ protected synchronized DoubleArrayList sortedElements_unsafe() { sort(); if (fixedOrder) return this.sortedElements; return this.elements; } /** * Modifies the receiver to be standardized. Changes each element * x[i] as follows: x[i] = (x[i]-mean)/standardDeviation. */ public synchronized void standardize(double mean, double standardDeviation) { DoubleDescriptive.standardize(this.elements, mean, standardDeviation); clearAllMeasures(); invalidateAll(); this.size = 0; } /** * Returns the sum of all elements, which is Sum( x[i] ). */ public synchronized double sum() { if (!isIncrementalStatValid) updateIncrementalStats(); return this.sum; } /** * Returns the sum of inversions, which is Sum( 1 / x[i] ). */ public synchronized double sumOfInversions() { if (!isSumOfInversionsValid) updateSumOfInversions(); return this.sumOfInversions; } /** * Returns the sum of logarithms, which is Sum( Log(x[i]) ). */ public synchronized double sumOfLogarithms() { if (!isSumOfLogarithmsValid) updateSumOfLogarithms(); return this.sumOfLogarithms; } /** * Returns the k-th order sum of powers, which is * Sum( x[i]k ). * * @param k * the order of the powers. * @return the sum of powers. */ public synchronized double sumOfPowers(int k) { // no chaching for this measure if (k >= -1 && k <= 2) return super.sumOfPowers(k); return DoubleDescriptive.sumOfPowers(this.elements, k); } /** * Returns the sum of squares, which is Sum( x[i] * x[i] ). */ public synchronized double sumOfSquares() { if (!isIncrementalStatValid) updateIncrementalStats(); return this.sum_xx; } /** * Returns a String representation of the receiver. */ public synchronized String toString() { StringBuffer buf = new StringBuffer(super.toString()); DoubleArrayList distinctElements = new DoubleArrayList(); IntArrayList frequencies = new IntArrayList(); frequencies(distinctElements, frequencies); if (distinctElements.size() < 100) { // don't cause unintended floods buf.append("Distinct elements: " + distinctElements + "\n"); buf.append("Frequencies: " + frequencies + "\n"); } else { buf.append("Distinct elements & frequencies not printed (too many)."); } return buf.toString(); } /** * Removes the s smallest and l largest elements from the * receiver. The receivers size will be reduced by s + l elements. * * @param s * the number of smallest elements to trim away (s >= 0 * ). * @param l * the number of largest elements to trim away (l >= 0). */ public synchronized void trim(int s, int l) { DoubleArrayList elems = sortedElements(); clear(); addAllOfFromTo(elems, s, elems.size() - 1 - l); } /** * Returns the trimmed mean. That is the mean of the data if the * s smallest and l largest elements would be * removed from the receiver (they are not removed). * * @param s * the number of smallest elements to trim away (s >= 0 * ). * @param l * the number of largest elements to trim away (l >= 0). * @return the trimmed mean. */ public synchronized double trimmedMean(int s, int l) { // no caching for this parameter. return DoubleDescriptive.trimmedMean(sortedElements_unsafe(), mean(), s, l); } /** * Trims the capacity of the receiver to be the receiver's current size. * (This has nothing to do with trimming away smallest and largest elements. * The method name is used to be consistent with JDK practice.) *

* Releases any superfluos internal memory. An application can use this * operation to minimize the storage of the receiver. Does not affect * functionality. */ public synchronized void trimToSize() { this.elements.trimToSize(); this.sortedElements.clear(); this.sortedElements.trimToSize(); if (fixedOrder) this.isSorted = false; } /** * assertion: isBasicParametersValid == false * */ protected void updateIncrementalStats() { // prepare arguments double[] arguments = new double[4]; arguments[0] = this.min; arguments[1] = this.max; arguments[2] = this.sum; arguments[3] = this.sum_xx; DoubleDescriptive.incrementalUpdate(this.elements, this.size, this.elements.size() - 1, arguments); // store the new parameters back this.min = arguments[0]; this.max = arguments[1]; this.sum = arguments[2]; this.sum_xx = arguments[3]; this.isIncrementalStatValid = true; this.size = this.elements.size(); // next time we don't need to redo // the stuff we have just done... } /** * assertion: isBasicParametersValid == false * */ protected void updateSumOfInversions() { this.sumOfInversions = DoubleDescriptive.sumOfInversions(this.elements, 0, size() - 1); this.isSumOfInversionsValid = true; } /** * */ protected void updateSumOfLogarithms() { this.sumOfLogarithms = DoubleDescriptive.sumOfLogarithms(this.elements, 0, size() - 1); this.isSumOfLogarithmsValid = true; } /** * * * @param element * element to be appended. */ protected void validateAll() { this.isSorted = true; this.isIncrementalStatValid = true; // this.isSkewValid = true; // this.isKurtosisValid = true; this.isSumOfInversionsValid = true; this.isSumOfLogarithmsValid = true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy