hep.aida.tdouble.bin.DynamicDoubleBin1D Maven / Gradle / Ivy
Show all versions of parallelcolt Show documentation
package hep.aida.tdouble.bin;
import cern.colt.list.tdouble.DoubleArrayList;
import cern.colt.list.tint.IntArrayList;
import cern.jet.random.tdouble.AbstractDoubleDistribution;
import cern.jet.random.tdouble.engine.DoubleRandomEngine;
import cern.jet.stat.tdouble.DoubleDescriptive;
/**
* 1-dimensional rebinnable bin holding double elements; Efficiently
* computes advanced statistics of data sequences. Technically speaking, a
* multiset (or bag) with efficient statistics operations defined upon. First
* see the package summary and javadoc tree view to get the broad picture.
*
* The data filled into a DynamicBin1D is internally preserved in the
* bin. As a consequence this bin can compute more than only basic statistics.
* On the other hand side, if you add huge amounts of elements, you may run out
* of memory (each element takes 8 bytes). If this drawbacks matter, consider to
* use {@link StaticDoubleBin1D}, which overcomes them at the expense of limited
* functionality.
*
* This class is fully thread safe (all public methods are synchronized). Thus,
* you can have one or more threads adding to the bin as well as one or more
* threads reading and viewing the statistics of the bin while it is
* filled. For high performance, add data in large chunks (buffers) via
* method addAllOf rather than piecewise via method add.
*
* If your favourite statistics measure is not directly provided by this class,
* check out {@link cern.jet.stat.tdouble.DoubleDescriptive} in combination with
* methods {@link #elements()} and {@link #sortedElements()}.
*
* Implementation: Lazy evaluation, caching, incremental maintainance.
*
* @see cern.jet.stat.tdouble.DoubleDescriptive
* @author [email protected]
* @version 0.9, 03-Jul-99
*/
public class DynamicDoubleBin1D extends QuantileDoubleBin1D {
// Never ever use "this.size" as it would be intuitive!
// This class abuses "this.size". "this.size" DOES NOT REFLECT the number of
// elements contained in the receiver!
// Instead, "this.size" reflects the number of elements incremental stats
// computation has already processed.
/**
*
*/
private static final long serialVersionUID = 1L;
/**
* The elements contained in this bin.
*/
protected DoubleArrayList elements = null;
/**
* The elements contained in this bin, sorted ascending.
*/
protected DoubleArrayList sortedElements = null;
/**
* Preserve element order under all circumstances?
*/
protected boolean fixedOrder = false;
// cached parameters
// protected double skew = 0.0;
// protected double kurtosis = 0.0;
// cache states
protected boolean isSorted = true;
protected boolean isIncrementalStatValid = true;
// protected boolean isSkewValid = true;
// protected boolean isKurtosisValid = true;
protected boolean isSumOfInversionsValid = true;
protected boolean isSumOfLogarithmsValid = true;
// protected boolean isSumOfPowersValid = true;
/**
* Constructs and returns an empty bin; implicitly calls
* {@link #setFixedOrder(boolean) setFixedOrder(false)}.
*/
public DynamicDoubleBin1D() {
super();
this.clear();
this.elements = new DoubleArrayList();
this.sortedElements = new DoubleArrayList(0);
this.fixedOrder = false;
this.hasSumOfLogarithms = true;
this.hasSumOfInversions = true;
}
/**
* Adds the specified element to the receiver.
*
* @param element
* element to be appended.
*/
public synchronized void add(double element) {
elements.add(element);
invalidateAll();
}
/**
* Adds the part of the specified list between indexes from
* (inclusive) and to (inclusive) to the receiver.
*
* @param list
* the list of which elements shall be added.
* @param from
* the index of the first element to be added (inclusive).
* @param to
* the index of the last element to be added (inclusive).
* @throws IndexOutOfBoundsException
* if
* list.size()>0 && (from<0 || from>to || to>=list.size())
* .
*/
public synchronized void addAllOfFromTo(DoubleArrayList list, int from, int to) {
this.elements.addAllOfFromTo(list, from, to);
this.invalidateAll();
}
/**
* Applies a function to each element and aggregates the results. Returns a
* value v such that v==a(size()) where
* a(i) == aggr( a(i-1), f(x(i)) ) and terminators are
* a(1) == f(x(0)), a(0)==Double.NaN.
*
* Example:
*
*
* cern.jet.math.Functions F = cern.jet.math.Functions.functions;
* bin = 0 1 2 3
*
* // Sum( x[i]*x[i] )
* bin.aggregate(F.plus,F.square);
* --> 14
*
*
*
* For further examples, see the package doc.
*
* @param aggr
* an aggregation function taking as first argument the current
* aggregation and as second argument the transformed current
* element.
* @param f
* a function transforming the current element.
* @return the aggregated measure.
* @see cern.jet.math.tdouble.DoubleFunctions
*/
public synchronized double aggregate(cern.colt.function.tdouble.DoubleDoubleFunction aggr,
cern.colt.function.tdouble.DoubleFunction f) {
int s = size();
if (s == 0)
return Double.NaN;
double a = f.apply(elements.getQuick(s - 1));
for (int i = s - 1; --i >= 0;) {
a = aggr.apply(a, f.apply(elements.getQuick(i)));
}
return a;
}
/**
* Removes all elements from the receiver. The receiver will be empty after
* this call returns.
*/
public synchronized void clear() {
super.clear();
if (this.elements != null)
this.elements.clear();
if (this.sortedElements != null)
this.sortedElements.clear();
this.validateAll();
}
/**
* Resets the values of all measures.
*/
protected void clearAllMeasures() {
super.clearAllMeasures();
// this.skew = 0.0;
// this.kurtosis = 0.0;
}
/**
* Returns a deep copy of the receiver.
*
* @return a deep copy of the receiver.
*/
public synchronized Object clone() {
DynamicDoubleBin1D clone = (DynamicDoubleBin1D) super.clone();
if (this.elements != null)
clone.elements = clone.elements.copy();
if (this.sortedElements != null)
clone.sortedElements = clone.sortedElements.copy();
return clone;
}
/**
* Returns the correlation of two bins, which is
* corr(x,y) = covariance(x,y) / (stdDev(x)*stdDev(y)) (Pearson's
* correlation coefficient). A correlation coefficient varies between -1
* (for a perfect negative relationship) to +1 (for a perfect positive
* relationship). See the
* math definition and another def.
*
* @param other
* the bin to compare with.
* @return the correlation.
* @throws IllegalArgumentException
* if size() != other.size().
*/
public synchronized double correlation(DynamicDoubleBin1D other) {
synchronized (other) {
return covariance(other) / (standardDeviation() * other.standardDeviation());
}
}
/**
* Returns the covariance of two bins, which is
* cov(x,y) = (1/size()) * Sum((x[i]-mean(x)) * (y[i]-mean(y))).
* See the
* math definition.
*
* @param other
* the bin to compare with.
* @return the covariance.
* @throws IllegalArgumentException
* if size() != other.size().
*/
public synchronized double covariance(DynamicDoubleBin1D other) {
synchronized (other) {
if (size() != other.size())
throw new IllegalArgumentException("both bins must have same size");
double s = 0;
for (int i = size(); --i >= 0;) {
s += this.elements.getQuick(i) * other.elements.getQuick(i);
}
double cov = (s - sum() * other.sum() / size()) / size();
return cov;
}
}
/**
* Returns a copy of the currently stored elements. Concerning the order in
* which elements are returned, see {@link #setFixedOrder(boolean)}.
*
* @return a copy of the currently stored elements.
*/
public synchronized DoubleArrayList elements() {
// safe since we are already synchronized.
return elements_unsafe().copy();
}
/**
* Returns the currently stored elements; WARNING: not a copy of
* them. Thus, improper usage of the returned list may not only corrupt the
* receiver's internal state, but also break thread safety! Only provided
* for performance and memory sensitive applications. Do not modify the
* returned list unless you know exactly what you're doing. This method can
* be used in a thread safe, clean and performant way by explicitly
* synchronizing on the bin, as follows:
*
*
* ...
* double sinSum = 0;
* synchronized (dynamicBin) { // lock out anybody else
* DoubleArrayList elements = dynamicBin.elements_unsafe();
* // read each element and do something with it, for example
* double[] values = elements.elements(); // zero-copy
* for (int i=dynamicBin.size(); --i >=0; ) {
* sinSum += Math.sin(values[i]);
* }
* }
* System.out.println(sinSum);
* ...
*
*
* Concerning the order in which elements are returned, see
* {@link #setFixedOrder(boolean)}.
*
* @return the currently stored elements.
*/
protected synchronized DoubleArrayList elements_unsafe() {
return this.elements;
}
/**
* Returns whether two bins are equal. They are equal if the other object is
* of the same class or a subclass of this class and both have the same
* size, minimum, maximum, sum and sumOfSquares and have the same elements,
* order being irrelevant (multiset equality).
*
* Definition of Equality for multisets: A,B are equal <=> A is a
* superset of B and B is a superset of A. (Elements must occur the same
* number of times, order is irrelevant.)
*/
public synchronized boolean equals(Object object) {
if (!(object instanceof DynamicDoubleBin1D))
return false;
if (!super.equals(object))
return false;
DynamicDoubleBin1D other = (DynamicDoubleBin1D) object;
double[] s1 = sortedElements_unsafe().elements();
synchronized (other) {
double[] s2 = other.sortedElements_unsafe().elements();
int n = size();
return includes(s1, s2, 0, n, 0, n) && includes(s2, s1, 0, n, 0, n);
}
}
private static boolean includes(double[] array1, double[] array2, int first1, int last1, int first2, int last2) {
while (first1 < last1 && first2 < last2) {
if (array2[first2] < array1[first1])
return false;
else if (array1[first1] < array2[first2])
++first1;
else {
++first1;
++first2;
}
}
return first2 == last2;
}
/**
* Computes the frequency (number of occurances, count) of each distinct
* element. After this call returns both distinctElements and
* frequencies have a new size (which is equal for both), which is
* the number of distinct elements currently contained.
*
* Distinct elements are filled into distinctElements, starting at
* index 0. The frequency of each distinct element is filled into
* frequencies, starting at index 0. Further, both
* distinctElements and frequencies are sorted ascending
* by "element" (in sync, of course). As a result, the smallest distinct
* element (and its frequency) can be found at index 0, the second smallest
* distinct element (and its frequency) at index 1, ..., the largest
* distinct element (and its frequency) at index
* distinctElements.size()-1.
*
* Example:
* elements = (8,7,6,6,7) --> distinctElements = (6,7,8), frequencies = (2,2,1)
*
* @param distinctElements
* a list to be filled with the distinct elements; can have any
* size.
* @param frequencies
* a list to be filled with the frequencies; can have any size;
* set this parameter to null to ignore it.
*/
public synchronized void frequencies(DoubleArrayList distinctElements, IntArrayList frequencies) {
DoubleDescriptive.frequencies(sortedElements_unsafe(), distinctElements, frequencies);
}
/**
* Returns a map holding the frequency distribution, that is,
* (distintElement,frequency) pairs. The frequency (count) of an element is
* its number of occurances.
*
* Example:
* elements = (8,7,6,6,7) --> map.keys = (8,6,7), map.values = (1,2,2)
*
* @return a map holding the frequency distribution.
*/
private synchronized cern.colt.map.tdouble.AbstractDoubleIntMap frequencyMap() {
// cern.colt.map.OpenDoubleIntHashMap.hashCollisions = 0;
// fill a map that collects frequencies
cern.colt.map.tdouble.AbstractDoubleIntMap map = new cern.colt.map.tdouble.OpenDoubleIntHashMap();
// cern.colt.Timer timer = new cern.colt.Timer().start();
for (int i = size(); --i >= 0;) {
double element = this.elements.getQuick(i);
// double element = i; // benchmark only TODO
// double element = i%1000; // benchmark only TODO
map.put(element, 1 + map.get(element));
}
// timer.stop();
// System.out.println("filling map took = "+timer);
// System.out.println("collisions="+cern.colt.map.OpenDoubleIntHashMap.hashCollisions);
return map;
}
/**
* Returns Integer.MAX_VALUE, the maximum order k for
* which sums of powers are retrievable.
*
* @see #hasSumOfPowers(int)
* @see #sumOfPowers(int)
*/
public int getMaxOrderForSumOfPowers() {
return Integer.MAX_VALUE;
}
/**
* Returns Integer.MIN_VALUE, the minimum order k for
* which sums of powers are retrievable.
*
* @see #hasSumOfPowers(int)
* @see #sumOfPowers(int)
*/
public int getMinOrderForSumOfPowers() {
return Integer.MIN_VALUE;
}
/**
*
*
* @param element
* element to be appended.
*/
protected void invalidateAll() {
this.isSorted = false;
this.isIncrementalStatValid = false;
// this.isSkewValid = false;
// this.isKurtosisValid = false;
this.isSumOfInversionsValid = false;
this.isSumOfLogarithmsValid = false;
}
/**
* Returns true. Returns whether a client can obtain all elements
* added to the receiver. In other words, tells whether the receiver
* internally preserves all added elements. If the receiver is rebinnable,
* the elements can be obtained via elements() methods.
*
*/
public synchronized boolean isRebinnable() {
return true;
}
/**
* Returns the maximum.
*/
public synchronized double max() {
if (!isIncrementalStatValid)
updateIncrementalStats();
return this.max;
}
/**
* Returns the minimum.
*/
public synchronized double min() {
if (!isIncrementalStatValid)
updateIncrementalStats();
return this.min;
}
/**
* Returns the moment of k-th order with value c, which is
* Sum( (x[i]-c)k ) / size().
*
* @param k
* the order; any number - can be less than zero, zero or greater
* than zero.
* @param c
* any number.
*/
public synchronized double moment(int k, double c) {
// currently no caching for this parameter
return DoubleDescriptive.moment(this.elements, k, c);
}
/**
* Returns the exact phi-quantile; that is, the smallest contained
* element elem for which holds that phi percent of
* elements are less than elem.
*
* @param phi
* must satisfy 0 < phi < 1.
*/
public synchronized double quantile(double phi) {
return DoubleDescriptive.quantile(sortedElements_unsafe(), phi);
}
/**
* Returns exactly how many percent of the elements contained in the
* receiver are <= element. Does linear interpolation if the
* element is not contained but lies in between two contained elements.
*
* @param element
* the element to search for.
* @return the exact percentage phi of elements
* <= element (0.0 <= phi <= 1.0).
*/
public synchronized double quantileInverse(double element) {
return DoubleDescriptive.quantileInverse(sortedElements_unsafe(), element);
}
/**
* Returns the exact quantiles of the specified percentages.
*
* @param percentages
* the percentages for which quantiles are to be computed. Each
* percentage must be in the interval (0.0,1.0].
* percentages must be sorted ascending.
* @return the exact quantiles.
*/
public DoubleArrayList quantiles(DoubleArrayList percentages) {
return DoubleDescriptive.quantiles(sortedElements_unsafe(), percentages);
}
/**
* Removes from the receiver all elements that are contained in the
* specified list.
*
* @param list
* the elements to be removed.
* @return true
if the receiver changed as a result of the
* call.
*/
public synchronized boolean removeAllOf(DoubleArrayList list) {
boolean changed = this.elements.removeAll(list);
if (changed) {
clearAllMeasures();
invalidateAll();
this.size = 0;
if (fixedOrder) {
this.sortedElements.removeAll(list);
this.isSorted = true;
}
}
return changed;
}
/**
* Uniformly samples (chooses) n random elements with or without
* replacement from the contained elements and adds them to the given
* buffer. If the buffer is connected to a bin, the effect is that the
* chosen elements are added to the bin connected to the buffer. Also see
* {@link #buffered(int) buffered}.
*
* @param n
* the number of elements to choose.
* @param withReplacement
* true samples with replacement, otherwise samples
* without replacement.
* @param randomGenerator
* a random number generator. Set this parameter to null
* to use a default random number generator seeded with the
* current time.
* @param buffer
* the buffer to which chosen elements will be added.
* @throws IllegalArgumentException
* if !withReplacement && n > size().
* @see cern.jet.random.tdouble.sampling
*/
public synchronized void sample(int n, boolean withReplacement, DoubleRandomEngine randomGenerator,
cern.colt.buffer.tdouble.DoubleBuffer buffer) {
if (randomGenerator == null)
randomGenerator = AbstractDoubleDistribution.makeDefaultGenerator();
buffer.clear();
if (!withReplacement) { // without
if (n > size())
throw new IllegalArgumentException("n must be less than or equal to size()");
cern.jet.random.tdouble.sampling.DoubleRandomSamplingAssistant sampler = new cern.jet.random.tdouble.sampling.DoubleRandomSamplingAssistant(
n, size(), randomGenerator);
for (int i = n; --i >= 0;) {
if (sampler.sampleNextElement())
buffer.add(this.elements.getQuick(i));
}
} else { // with
cern.jet.random.tdouble.DoubleUniform uniform = new cern.jet.random.tdouble.DoubleUniform(randomGenerator);
int s = size();
for (int i = n; --i >= 0;) {
buffer.add(this.elements.getQuick(uniform.nextIntFromTo(0, s - 1)));
}
buffer.flush();
}
}
/**
* Generic bootstrap resampling. Quite optimized - Don't be afraid to try
* it. Executes resamples resampling steps. In each resampling step
* does the following:
*
* - Uniformly samples (chooses) size() random elements with
* replacement from this and fills them into an auxiliary bin
* b1.
*
- Uniformly samples (chooses) other.size() random elements
* with replacement from other and fills them into another
* auxiliary bin b2.
*
- Executes the comparison function function on both auxiliary
* bins (function.apply(b1,b2)) and adds the result of the function
* to an auxiliary bootstrap bin b3.
*
*
* Finally returns the auxiliary bootstrap bin b3 from which the
* measure of interest can be read off.
*
*
* Background:
*
*
* Also see a more in-depth
* discussion on bootstrapping and related randomization methods. The
* classical statistical test for comparing the means of two samples is the
* t-test. Unfortunately, this test assumes that the two samples each
* come from a normal distribution and that these distributions have the
* same standard deviation. Quite often, however, data has a distribution
* that is non-normal in many ways. In particular, distributions are often
* unsymmetric. For such data, the t-test may produce misleading results and
* should thus not be used. Sometimes asymmetric data can be transformed
* into normally distributed data by taking e.g. the logarithm and the
* t-test will then produce valid results, but this still requires
* postulation of a certain distribution underlying the data, which is often
* not warranted, because too little is known about the data composition.
*
*
* Bootstrap resampling of means differences (and other differences)
* is a robust replacement for the t-test and does not require assumptions
* about the actual distribution of the data. The idea of bootstrapping is
* quite simple: simulation. The only assumption required is that the two
* samples a and b are representative for the underlying
* distribution with respect to the statistic that is being tested - this
* assumption is of course implicit in all statistical tests. We can now
* generate lots of further samples that correspond to the two given ones,
* by sampling with replacement. This process is called
* resampling. A resample can (and usually will) have a different
* mean than the original one and by drawing hundreds or thousands of such
* resamples ar from a and
* br from b we can compute the so-called
* bootstrap distribution of all the differences "mean of
* ar minus mean of br". That
* is, a bootstrap bin filled with the differences. Now we can compute, what
* fraction of these differences is, say, greater than zero. Let's assume we
* have computed 1000 resamples of both a and b and found
* that only 8 of the differences were greater than zero. Then
* 8/1000 or 0.008 is the p-value (probability) for the
* hypothesis that the mean of the distribution underlying a is
* actually larger than the mean of the distribution underlying b.
* From this bootstrap test, we can clearly reject the hypothesis.
*
*
* Instead of using means differences, we can also use other differences,
* for example, the median differences.
*
*
* Instead of p-values we can also read arbitrary confidence intervals from
* the bootstrap bin. For example, 90% of all bootstrap differences
* are left of the value -3.5, hence a left 90% confidence
* interval for the difference would be (3.5,infinity); in other
* words: the difference is 3.5 or larger with probability
* 0.9.
*
*
* Sometimes we would like to compare not only means and medians, but also
* the variability (spread) of two samples. The conventional method of doing
* this is the F-test, which compares the standard deviations. It is
* related to the t-test and, like the latter, assumes the two samples to
* come from a normal distribution. The F-test is very sensitive to data
* with deviations from normality. Instead we can again resort to more
* robust bootstrap resampling and compare a measure of spread, for example
* the inter-quartile range. This way we compute a bootstrap resampling
* of inter-quartile range differences in order to arrive at a test for
* inequality or variability.
*
*
* Example:
*
*
*
*
* // v1,v2 - the two samples to compare against each other
* double[] v1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 21, 22,23,24,25,26,27,28,29,30,31};
* double[] v2 = {10,11,12,13,14,15,16,17,18,19, 20, 30,31,32,33,34,35,36,37,38,39};
* hep.aida.bin.DynamicBin1D X = new hep.aida.bin.DynamicBin1D();
* hep.aida.bin.DynamicBin1D Y = new hep.aida.bin.DynamicBin1D();
* X.addAllOf(new cern.colt.list.DoubleArrayList(v1));
* Y.addAllOf(new cern.colt.list.DoubleArrayList(v2));
* cern.jet.random.engine.RandomEngine random = new cern.jet.random.engine.MersenneTwister();
*
* // bootstrap resampling of differences of means:
* BinBinFunction1D diff = new BinBinFunction1D() {
* public double apply(DynamicBin1D x, DynamicBin1D y) {return x.mean() - y.mean();}
* };
*
* // bootstrap resampling of differences of medians:
* BinBinFunction1D diff = new BinBinFunction1D() {
* public double apply(DynamicBin1D x, DynamicBin1D y) {return x.median() - y.median();}
* };
*
* // bootstrap resampling of differences of inter-quartile ranges:
* BinBinFunction1D diff = new BinBinFunction1D() {
* public double apply(DynamicBin1D x, DynamicBin1D y) {return (x.quantile(0.75)-x.quantile(0.25)) - (y.quantile(0.75)-y.quantile(0.25)); }
* };
*
* DynamicBin1D boot = X.sampleBootstrap(Y,1000,random,diff);
*
* cern.jet.math.Functions F = cern.jet.math.Functions.functions;
* System.out.println("p-value="+ (boot.aggregate(F.plus, F.greater(0)) / boot.size()));
* System.out.println("left 90% confidence interval = ("+boot.quantile(0.9) + ",infinity)");
*
* -->
* // bootstrap resampling of differences of means:
* p-value=0.0080
* left 90% confidence interval = (-3.571428571428573,infinity)
*
* // bootstrap resampling of differences of medians:
* p-value=0.36
* left 90% confidence interval = (5.0,infinity)
*
* // bootstrap resampling of differences of inter-quartile ranges:
* p-value=0.5699
* left 90% confidence interval = (5.0,infinity)
*
*
*
*
*
*
* @param other
* the other bin to compare the receiver against.
* @param resamples
* the number of times resampling shall be done.
* @param randomGenerator
* a random number generator. Set this parameter to null
* to use a default random number generator seeded with the
* current time.
* @param function
* a difference function comparing two samples; takes as first
* argument a sample of this and as second argument a
* sample of other.
* @return a bootstrap bin holding the results of function of each
* resampling step.
* @see cern.colt.GenericPermuting#permutation(long,int)
*/
public synchronized DynamicDoubleBin1D sampleBootstrap(DynamicDoubleBin1D other, int resamples,
cern.jet.random.tdouble.engine.DoubleRandomEngine randomGenerator, DoubleBinBinFunction1D function) {
if (randomGenerator == null)
randomGenerator = AbstractDoubleDistribution.makeDefaultGenerator();
// since "resamples" can be quite large, we care about performance and
// memory
int maxCapacity = 1000;
int s1 = size();
int s2 = other.size();
// prepare auxiliary bins and buffers
DynamicDoubleBin1D sample1 = new DynamicDoubleBin1D();
cern.colt.buffer.tdouble.DoubleBuffer buffer1 = sample1.buffered(Math.min(maxCapacity, s1));
DynamicDoubleBin1D sample2 = new DynamicDoubleBin1D();
cern.colt.buffer.tdouble.DoubleBuffer buffer2 = sample2.buffered(Math.min(maxCapacity, s2));
DynamicDoubleBin1D bootstrap = new DynamicDoubleBin1D();
cern.colt.buffer.tdouble.DoubleBuffer bootBuffer = bootstrap.buffered(Math.min(maxCapacity, resamples));
// resampling steps
for (int i = resamples; --i >= 0;) {
sample1.clear();
sample2.clear();
this.sample(s1, true, randomGenerator, buffer1);
other.sample(s2, true, randomGenerator, buffer2);
bootBuffer.add(function.apply(sample1, sample2));
}
bootBuffer.flush();
return bootstrap;
}
/**
* Determines whether the receivers internally preserved elements may be
* reordered or not.
*
* - fixedOrder==false allows the order in which elements are
* returned by method elements() to be different from the order in
* which elements are added.
*
- fixedOrder==true guarantees that under all circumstances the
* order in which elements are returned by method elements() is
* identical to the order in which elements are added. However, the latter
* consumes twice as much memory if operations involving sorting are
* requested. This option is usually only required if a 2-dimensional bin,
* formed by two 1-dimensional bins, needs to be rebinnable.
*
*
* Naturally, if fixedOrder is set to true you should not
* already have added elements to the receiver; it should be empty.
*/
public void setFixedOrder(boolean fixedOrder) {
// if (size() > 0) throw new RuntimeException("must be called before
// starting to add elements.");
this.fixedOrder = fixedOrder;
}
/**
* Returns the number of elements contained in the receiver.
*
* @return the number of elements contained in the receiver.
*/
public synchronized int size() {
return elements.size();
// Never ever use "this.size" as it would be intuitive!
// This class abuses "this.size". "this.size" DOES NOT REFLECT the
// number of elements contained in the receiver!
// Instead, "this.size" reflects the number of elements incremental
// stats computation has already processed.
}
/**
* Sorts elements if not already sorted.
*/
protected void sort() {
if (!this.isSorted) {
if (this.fixedOrder) {
this.sortedElements.clear();
this.sortedElements.addAllOfFromTo(this.elements, 0, this.elements.size() - 1);
this.sortedElements.sort();
} else {
/*
* Call updateIncrementalStats() because after sorting we no
* more know what elements are still to be done by
* updateIncrementalStats() and would therefore later need to
* rebuild incremental stats from scratch.
*/
updateIncrementalStats();
invalidateAll();
this.elements.sort();
this.isIncrementalStatValid = true;
}
this.isSorted = true;
}
}
/**
* Returns a copy of the currently stored elements, sorted ascending.
* Concerning the memory required for operations involving sorting, see
* {@link #setFixedOrder(boolean)}.
*
* @return a copy of the currently stored elements, sorted ascending.
*/
public synchronized DoubleArrayList sortedElements() {
// safe since we are already synchronized.
return sortedElements_unsafe().copy();
}
/**
* Returns the currently stored elements, sorted ascending; WARNING:
* not a copy of them; Thus, improper usage of the returned list may not
* only corrupt the receiver's internal state, but also break thread safety!
* Only provided for performance and memory sensitive applications. Do not
* modify the returned elements unless you know exactly what you're doing.
* This method can be used in a thread safe, clean and performant way
* by explicitly synchronizing on the bin, as follows:
*
*
* ...
* synchronized (dynamicBin) { // lock out anybody else
* DoubleArrayList elements = dynamicBin.sortedElements_unsafe();
* // read each element and do something with it, e.g.
* double[] values = elements.elements(); // zero-copy
* for (int i=dynamicBin.size(); --i >=0; ) {
* foo(values[i]);
* }
* }
* ...
*
*
* Concerning the memory required for operations involving sorting, see
* {@link #setFixedOrder(boolean)}.
*
* @return the currently stored elements, sorted ascending.
*/
protected synchronized DoubleArrayList sortedElements_unsafe() {
sort();
if (fixedOrder)
return this.sortedElements;
return this.elements;
}
/**
* Modifies the receiver to be standardized. Changes each element
* x[i] as follows: x[i] = (x[i]-mean)/standardDeviation.
*/
public synchronized void standardize(double mean, double standardDeviation) {
DoubleDescriptive.standardize(this.elements, mean, standardDeviation);
clearAllMeasures();
invalidateAll();
this.size = 0;
}
/**
* Returns the sum of all elements, which is Sum( x[i] ).
*/
public synchronized double sum() {
if (!isIncrementalStatValid)
updateIncrementalStats();
return this.sum;
}
/**
* Returns the sum of inversions, which is Sum( 1 / x[i] ).
*/
public synchronized double sumOfInversions() {
if (!isSumOfInversionsValid)
updateSumOfInversions();
return this.sumOfInversions;
}
/**
* Returns the sum of logarithms, which is Sum( Log(x[i]) ).
*/
public synchronized double sumOfLogarithms() {
if (!isSumOfLogarithmsValid)
updateSumOfLogarithms();
return this.sumOfLogarithms;
}
/**
* Returns the k-th order sum of powers, which is
* Sum( x[i]k ).
*
* @param k
* the order of the powers.
* @return the sum of powers.
*/
public synchronized double sumOfPowers(int k) {
// no chaching for this measure
if (k >= -1 && k <= 2)
return super.sumOfPowers(k);
return DoubleDescriptive.sumOfPowers(this.elements, k);
}
/**
* Returns the sum of squares, which is Sum( x[i] * x[i] ).
*/
public synchronized double sumOfSquares() {
if (!isIncrementalStatValid)
updateIncrementalStats();
return this.sum_xx;
}
/**
* Returns a String representation of the receiver.
*/
public synchronized String toString() {
StringBuffer buf = new StringBuffer(super.toString());
DoubleArrayList distinctElements = new DoubleArrayList();
IntArrayList frequencies = new IntArrayList();
frequencies(distinctElements, frequencies);
if (distinctElements.size() < 100) { // don't cause unintended floods
buf.append("Distinct elements: " + distinctElements + "\n");
buf.append("Frequencies: " + frequencies + "\n");
} else {
buf.append("Distinct elements & frequencies not printed (too many).");
}
return buf.toString();
}
/**
* Removes the s smallest and l largest elements from the
* receiver. The receivers size will be reduced by s + l elements.
*
* @param s
* the number of smallest elements to trim away (s >= 0
* ).
* @param l
* the number of largest elements to trim away (l >= 0).
*/
public synchronized void trim(int s, int l) {
DoubleArrayList elems = sortedElements();
clear();
addAllOfFromTo(elems, s, elems.size() - 1 - l);
}
/**
* Returns the trimmed mean. That is the mean of the data if the
* s smallest and l largest elements would be
* removed from the receiver (they are not removed).
*
* @param s
* the number of smallest elements to trim away (s >= 0
* ).
* @param l
* the number of largest elements to trim away (l >= 0).
* @return the trimmed mean.
*/
public synchronized double trimmedMean(int s, int l) {
// no caching for this parameter.
return DoubleDescriptive.trimmedMean(sortedElements_unsafe(), mean(), s, l);
}
/**
* Trims the capacity of the receiver to be the receiver's current size.
* (This has nothing to do with trimming away smallest and largest elements.
* The method name is used to be consistent with JDK practice.)
*
* Releases any superfluos internal memory. An application can use this
* operation to minimize the storage of the receiver. Does not affect
* functionality.
*/
public synchronized void trimToSize() {
this.elements.trimToSize();
this.sortedElements.clear();
this.sortedElements.trimToSize();
if (fixedOrder)
this.isSorted = false;
}
/**
* assertion: isBasicParametersValid == false
*
*/
protected void updateIncrementalStats() {
// prepare arguments
double[] arguments = new double[4];
arguments[0] = this.min;
arguments[1] = this.max;
arguments[2] = this.sum;
arguments[3] = this.sum_xx;
DoubleDescriptive.incrementalUpdate(this.elements, this.size, this.elements.size() - 1, arguments);
// store the new parameters back
this.min = arguments[0];
this.max = arguments[1];
this.sum = arguments[2];
this.sum_xx = arguments[3];
this.isIncrementalStatValid = true;
this.size = this.elements.size(); // next time we don't need to redo
// the stuff we have just done...
}
/**
* assertion: isBasicParametersValid == false
*
*/
protected void updateSumOfInversions() {
this.sumOfInversions = DoubleDescriptive.sumOfInversions(this.elements, 0, size() - 1);
this.isSumOfInversionsValid = true;
}
/**
*
*/
protected void updateSumOfLogarithms() {
this.sumOfLogarithms = DoubleDescriptive.sumOfLogarithms(this.elements, 0, size() - 1);
this.isSumOfLogarithmsValid = true;
}
/**
*
*
* @param element
* element to be appended.
*/
protected void validateAll() {
this.isSorted = true;
this.isIncrementalStatValid = true;
// this.isSkewValid = true;
// this.isKurtosisValid = true;
this.isSumOfInversionsValid = true;
this.isSumOfLogarithmsValid = true;
}
}