
example.Statistics Maven / Gradle / Ivy
/*
* Zorbage: an algebraic data hierarchy for use in numeric processing.
*
* Copyright (c) 2016-2021 Barry DeZonia All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list
* of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or other
* materials provided with the distribution.
*
* Neither the name of the nor the names of its contributors may
* be used to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package example;
import nom.bdezonia.zorbage.algebra.G;
import nom.bdezonia.zorbage.algorithm.ApproxStdDev;
import nom.bdezonia.zorbage.algorithm.ApproxSumOfSquaredDeviationsWithCount;
import nom.bdezonia.zorbage.algorithm.ApproxVariance;
import nom.bdezonia.zorbage.algorithm.BasicStats;
import nom.bdezonia.zorbage.algorithm.ContraharmonicMean;
import nom.bdezonia.zorbage.algorithm.GeometricMean;
import nom.bdezonia.zorbage.algorithm.HarmonicMean;
import nom.bdezonia.zorbage.algorithm.MaxElement;
import nom.bdezonia.zorbage.algorithm.Mean;
import nom.bdezonia.zorbage.algorithm.Median;
import nom.bdezonia.zorbage.algorithm.MinElement;
import nom.bdezonia.zorbage.algorithm.MinMaxElement;
import nom.bdezonia.zorbage.algorithm.NanMaxElement;
import nom.bdezonia.zorbage.algorithm.NanMean;
import nom.bdezonia.zorbage.algorithm.NanMedian;
import nom.bdezonia.zorbage.algorithm.NanMinElement;
import nom.bdezonia.zorbage.algorithm.NanMinMaxElement;
import nom.bdezonia.zorbage.algorithm.NanStdDev;
import nom.bdezonia.zorbage.algorithm.NanSum;
import nom.bdezonia.zorbage.algorithm.NanSumWithCount;
import nom.bdezonia.zorbage.algorithm.NanVariance;
import nom.bdezonia.zorbage.algorithm.NonNanValues;
import nom.bdezonia.zorbage.algorithm.Product;
import nom.bdezonia.zorbage.algorithm.SequenceL0Norm;
import nom.bdezonia.zorbage.algorithm.SequenceL1Norm;
import nom.bdezonia.zorbage.algorithm.SequenceL2Norm;
import nom.bdezonia.zorbage.algorithm.SequenceLInfinityNorm;
import nom.bdezonia.zorbage.algorithm.StdDev;
import nom.bdezonia.zorbage.algorithm.Sum;
import nom.bdezonia.zorbage.algorithm.SumWithCount;
import nom.bdezonia.zorbage.algorithm.SummaryStats;
import nom.bdezonia.zorbage.algorithm.Variance;
import nom.bdezonia.zorbage.datasource.IndexedDataSource;
import nom.bdezonia.zorbage.datasource.ReadOnlyHighPrecisionDataSource;
import nom.bdezonia.zorbage.type.integer.int64.SignedInt64Member;
import nom.bdezonia.zorbage.type.real.float32.Float32Member;
import nom.bdezonia.zorbage.type.real.float64.Float64Member;
import nom.bdezonia.zorbage.type.real.highprec.HighPrecisionMember;
/**
* @author Barry DeZonia
*/
class Statistics {
/*
* As of the writing of this example Zorbage's statistical methods are pretty basic.
* There are plans to add more as collaborators request thing they need related to
* their projects.
*/
/*
* FYI here is one method for getting a quick statistical summary of a set of numbers.
*/
void example1() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(G.FLT.construct(), new float[] {43,7,99,1,2,3,100,55,31});
Float32Member min = G.FLT.construct();
Float32Member q1 = G.FLT.construct();
Float32Member median = G.FLT.construct();
Float32Member mean = G.FLT.construct();
Float32Member q3 = G.FLT.construct();
Float32Member max = G.FLT.construct();
SummaryStats.compute(G.FLT, data, min, q1, median, mean, q3, max);
// If your data contains NaN values you can calc values working around the NaNs
SignedInt64Member numNoData = G.INT64.construct();
SummaryStats.computeSafe(G.FLT, data, min, q1, median, mean, q3, max, numNoData);
}
/*
* FYI here is another method for getting a quick statistical summary of a set of numbers.
*/
void example2() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(G.FLT.construct(), new float[] {43,7,99,1,2,3,100,55,31});
Float32Member mean = G.FLT.construct();
Float32Member stdErrMean = G.FLT.construct();
Float32Member stddev = G.FLT.construct();
Float32Member sampleVariance = G.FLT.construct();
Float32Member sampleSkew = G.FLT.construct();
Float32Member excessKurtosis = G.FLT.construct();
BasicStats.compute(G.FLT, data, mean, stdErrMean, stddev, sampleVariance, sampleSkew, excessKurtosis);
}
/*
* Zorbage can calculate basic statistics from lists of numbers. The numbers can
* be of any of the many types supported in Zorbage.
*/
void example3() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(new Float64Member(), 10);
Float64Member result1 = G.DBL.construct();
Float64Member result2 = G.DBL.construct();
Mean.compute(G.DBL, data, result1);
Median.compute(G.DBL, data, result2);
StdDev.compute(G.DBL, data, result1);
Variance.compute(G.DBL, data, result1);
Sum.compute(G.DBL, data, result1);
SumWithCount.compute(G.DBL, data, result1, result2);
Product.compute(G.DBL, data, result1);
MinElement.compute(G.DBL, data, result1);
MaxElement.compute(G.DBL, data, result2);
MinMaxElement.compute(G.DBL, data, result1, result2);
SequenceL0Norm.compute(G.DBL, G.DBL, data, result1);
SequenceL1Norm.compute(G.DBL, G.DBL, data, result1);
SequenceL2Norm.compute(G.DBL, G.DBL, data, result1);
SequenceLInfinityNorm.compute(G.DBL, G.DBL, data, result1);
HarmonicMean.compute(G.DBL, data, result1);
GeometricMean.compute(G.DBL, data, result1);
ContraharmonicMean.compute(G.DBL, data, result1);
}
/*
* One of the aspects of the above listed routines is that they are naive algorithms
* that compute the mathematically correct result. In practice this can lead to
* overflows, underflows, and rounding errors.
*
* One way to avoid these issues is to compute the values using high precision
* floating point numbers in the calculations. These floating point numbers do not
* overflow, underflow, or lose precision. You can wrap any list in a filter that
* converts values read from a list into a high precision float. One can use the
* same naive algorithms and guarantee you will get the right results.
*
*/
void example4() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(new Float64Member(), 10);
IndexedDataSource filtered =
new ReadOnlyHighPrecisionDataSource<>(G.DBL, data);
HighPrecisionMember result1 = G.HP.construct();
HighPrecisionMember result2 = G.HP.construct();
Mean.compute(G.HP, filtered, result1);
Median.compute(G.HP, filtered, result2);
StdDev.compute(G.HP, filtered, result1);
Variance.compute(G.HP, filtered, result1);
Sum.compute(G.HP, filtered, result1);
SumWithCount.compute(G.HP, filtered, result1, result2);
Product.compute(G.HP, filtered, result1);
MinElement.compute(G.HP, filtered, result1);
MaxElement.compute(G.HP, filtered, result2);
MinMaxElement.compute(G.HP, filtered, result1, result2);
SequenceL0Norm.compute(G.HP, G.HP, filtered, result1);
SequenceL1Norm.compute(G.HP, G.HP, filtered, result1);
SequenceL2Norm.compute(G.HP, G.HP, filtered, result1);
SequenceLInfinityNorm.compute(G.HP, G.HP, filtered, result1);
HarmonicMean.compute(G.HP, filtered, result1);
GeometricMean.compute(G.HP, filtered, result1);
ContraharmonicMean.compute(G.HP, filtered, result1);
}
/*
* The high precision approach is useful when you have a large dataset. It does
* use a few more cpu cycles to calculate. If cpu cycles are at a premium you can
* use the "approximate" algorithms. These algorithms are written to avoid overflows
* while working in the native type. It's faster but the results are only approximate.
* They are plenty accurate but they may differ in the last places and may be
* significantly off for numbers whose square can exceed floating point limits.
*/
void example5() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(new Float64Member(), 10);
Float64Member result = G.DBL.construct();
ApproxStdDev.compute(G.DBL, data, result);
ApproxVariance.compute(G.DBL, data, result);
Float64Member avg = G.DBL.construct();
Float64Member sumSq = G.DBL.construct();
Float64Member count = G.DBL.construct();
ApproxSumOfSquaredDeviationsWithCount.compute(G.DBL, data, avg, sumSq, count);
}
/*
* Finally Zorbage provides methods that will calculate statistics on datasets that
* include NaN values. Most of the algorithms take no more space than the naive
* algorithms. They basically ignore NaN values and calculate the statistics upon
* the remaining data.
*/
void example6() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(new Float64Member(), 10);
Float64Member result1 = G.DBL.construct();
Float64Member result2 = G.DBL.construct();
NanMean.compute(G.DBL, data, result1);
NanMedian.compute(G.DBL, data, result1);
NanStdDev.compute(G.DBL, data, result1);
NanVariance.compute(G.DBL, data, result1);
NanSum.compute(G.DBL, data, result1);
NanSumWithCount.compute(G.DBL, data, result1, result2);
NanMinElement.compute(G.DBL, data, result1);
NanMaxElement.compute(G.DBL, data, result2);
NanMinMaxElement.compute(G.DBL, data, result1, result2);
}
/*
* Those NaN oriented algorithms use the naive statistical algorithms. You can use
* the method NonNanValues to get a list of values that are not NaN and then pass
* them to a naive or approximate algorithms and even convert them to high precision
* floats if you need. This takes some more memory to do but you have the flexibility
* you might want.
*/
void example7() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(new Float64Member(), 10);
IndexedDataSource nonNan = NonNanValues.compute(G.DBL, data);
// find an approximate variance value of the nonNan data
Float64Member result1 = G.DBL.construct();
ApproxVariance.compute(G.DBL, nonNan, result1);
// find an exact variance value of the nonNan data
IndexedDataSource hiPrec =
new ReadOnlyHighPrecisionDataSource<>(G.DBL, nonNan);
HighPrecisionMember result2 = G.HP.construct();
Variance.compute(G.HP, hiPrec, result2);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy