![JAR search and dependency download from the Maven repository](/logo.png)
com.github.TKnudsen.ComplexDataObject.model.tools.StatisticsSupport Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of complex-data-object Show documentation
Show all versions of complex-data-object Show documentation
A library that models real-world objects in Java, referred to as ComplexDataObjects. Other features: IO and preprocessing of ComplexDataObjects.
The newest version!
package com.github.TKnudsen.ComplexDataObject.model.tools;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
/**
*
* Title: StatisticsSupport
*
*
*
* Description: extension of apache commons math stat descriptive
* DescriptiveStatistics. DescriptiveStatistics maintains the input data in
* memory and has the capability of producing "rolling" statistics computed from
* a "window" consisting of the most recently added values.
*
* Aggregate Statistics Included: min, max, mean, geometric mean, n, sum, sum of
* squares, standard deviation, variance, percentiles, skewness, kurtosis,
* median
*
* "Rolling" capability? Yes
*
* Values stored? Yes
*
*
* *
*
* Copyright: (c) 2012-2020 Juergen Bernard,
* https://github.com/TKnudsen/ComplexDataObject
*
*
* @author Juergen Bernard
* @version 1.08
*
*/
public class StatisticsSupport extends DescriptiveStatistics implements Iterable {
/**
*
*/
private static final long serialVersionUID = -4338838213221265737L;
private double median = Double.NaN;
private int count = -1;
private int uniqueObservations = -1;
/**
* NANs are removed!
*
* @param vector
*/
public StatisticsSupport(Number[] vector) {
for (int i = 0; i < vector.length; i++)
if (!Double.isNaN(vector[i].doubleValue()))
addValue(vector[i].doubleValue());
}
/**
* NANs are removed!
*
* @param vector
*/
public StatisticsSupport(double[] vector) {
for (int i = 0; i < vector.length; i++)
if (!Double.isNaN(vector[i]))
addValue(vector[i]);
}
/**
* NANs are removed!
*
* @param values
*/
public StatisticsSupport(Collection extends Number> values) {
Iterator extends Number> iterator = values.iterator();
while (iterator.hasNext()) {
Number n = iterator.next();
if (n != null) {
Double d = n.doubleValue();
if (!Double.isNaN(d))
addValue(d);
}
}
}
/**
* NANs are removed!
*
* @param values
*/
public StatisticsSupport(List values) {
for (int i = 0; i < values.size(); i++)
if (!Double.isNaN(values.get(i)))
addValue(values.get(i));
}
/**
* NANs are removed!
*
* @param values
*/
public StatisticsSupport(Set values) {
Iterator iterator = values.iterator();
while (iterator.hasNext()) {
double d = iterator.next();
if (!Double.isNaN(d))
addValue(d);
}
}
/**
*
* @param quantile the median is called by typing 50.0, not by 0.5!
* @return
*/
public double[] getOutliers(double quantile) {
double dNotOutmin = this.getPercentile(quantile);
double dNotOutmax = this.getPercentile(100.0 - quantile);
List outliers = new ArrayList<>();
for (int i = 0; i < getValues().length; i++)
if (getValues()[i] < dNotOutmin || getValues()[i] > dNotOutmax)
outliers.add(getValues()[i]);
double[] ret = new double[outliers.size()];
for (int i = 0; i < outliers.size(); i++)
ret[i] = outliers.get(i);
return ret;
}
private void resetValues() {
this.median = Double.NaN;
this.count = -1;
uniqueObservations = -1;
}
@Override
public void addValue(double v) {
resetValues();
super.addValue(v);
}
/**
* Adds a list of values. may be slow for large data sets. Requires testing.
*
* @param values
*/
public void addAll(List values) {
for (Double d : values)
addValue(d);
}
public double getMedian() {
if (Double.isNaN(median))
median = getPercentile(50);
return median;
}
public double getMean() {
return super.getMean();
}
public int getCount() {
if (count == -1)
count = getValues().length;
return count;
}
@Override
public double getMax() {
return super.getMax();
}
@Override
public double getMin() {
return super.getMin();
}
/**
* Must be between 0 and 100
*
* @param percent
* @return
*/
public double getPercentile(int percent) {
return getPercentile((double) percent);
}
/**
* Must be between 0 and 100
*
* @param percent
* @return
*/
public double getPercentile(double percent) {
if (percent <= 0)
return getMin();
if (percent >= 100)
return getMax();
return super.getPercentile((double) percent);
}
@Override
public double getStandardDeviation() {
return super.getStandardDeviation();
}
@Override
public double getVariance() {
return super.getVariance();
}
/**
* Number of different values
*
* @return
*/
public int getCountUniqueObservations() {
if (uniqueObservations == -1) {
List list = DataConversion.doublePrimitivesToList(getValues());
@SuppressWarnings({ "rawtypes", "unchecked" })
Set uniqueValues = new HashSet(list);
uniqueObservations = uniqueValues.size();
}
return uniqueObservations;
}
@Override
public double[] getValues() {
return super.getValues();
}
/**
* Predicts if a given variable is discrete by examining the ratio
* #uniques/#elements.
*
* @param percent the variable which values are checked.
* @return true if the ratio is smaller than the given parameter (0.01 means
* 1%).
*/
public boolean isLikelyDiscrete(double ratio) {
int uniqueObservations = getCountUniqueObservations();
double ratioObserved = (double) uniqueObservations / getValues().length;
return (ratioObserved < ratio);
}
@Override
public Iterator iterator() {
return DataConversion.doublePrimitivesToList(getValues()).iterator();
}
/**
* Calculates the Entropy for the value distribution. Should only be applied for
* positive values. Negative values will be inverted.
*
* @return
*/
public double getEntropy() {
if (getCount() == 0)
return 0;
double entropy = 0.0;
for (Iterator iter = iterator(); iter.hasNext();) {
Double d = iter.next();
if (d > 0)
entropy -= (d * Math.log(d));
else if (d < 0)
entropy -= (Math.abs(d) * Math.log(Math.abs(d)));
}
entropy /= Math.log(2.0);
return entropy;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy