jaitools.numeric.SampleStats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jt-all Show documentation
Show all versions of jt-all Show documentation
Provides a single jar containing all JAI-tools modules which you can
use instead of including individual modules in your project. Note:
It does not include the Jiffle scripting language or Jiffle image
operator.
The newest version!
/*
* Copyright 2009-2011 Michael Bedward
*
* This file is part of jai-tools.
*
* jai-tools is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* jai-tools is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with jai-tools. If not, see .
*
*/
package jaitools.numeric;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;
import jaitools.CollectionFactory;
/**
* A collection of static methods to calculate summary statistics for
* a sample of double-valued data. This class is used by both Jiffle
* and the KernelStats operator.
*
* @author Michael Bedward
* @author Daniele Romagnoli, GeoSolutions S.A.S.
* @since 1.0
* @version $Id: SampleStats.java 1610 2011-03-31 04:44:28Z michael.bedward $
*/
public class SampleStats {
/**
* Return the maximum of the given values.
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return max value or Double.NaN if the sample is empty
*/
public static double max(Double[] values, boolean ignoreNaN) {
if (values == null || values.length == 0) {
return Double.NaN;
} else if (values.length == 1) {
return values[0];
}
SortedSet set = CollectionFactory.sortedSet();
set.addAll(Arrays.asList(values));
if (ignoreNaN) set.remove(Double.NaN);
return set.last();
}
/**
* Return the mean of the given values.
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return mean value or Double.NaN if the sample is empty
*/
public static double mean(Double[] values, boolean ignoreNaN) {
if (values == null || values.length == 0) {
return Double.NaN;
} else if (values.length == 1) {
return values[0];
}
double sum = 0.0d;
int n = 0;
for (Double val : values) {
if (val.isNaN()) {
if (!ignoreNaN) return Double.NaN;
} else {
sum += val;
n++ ;
}
}
return sum / n;
}
/**
* Calculates the minimum of the given values.
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return min value or Double.NaN if the sample is empty
*/
public static double min(Double[] values, boolean ignoreNaN) {
if (values == null || values.length == 0) {
return Double.NaN;
} else if (values.length == 1) {
return values[0];
}
SortedSet set = CollectionFactory.sortedSet();
set.addAll(Arrays.asList(values));
if (ignoreNaN) set.remove(Double.NaN);
return set.first();
}
/**
* Calculates the median of the given values. For a sample with an odd
* number of elements the median is the mid-point value of the
* sorted sample. For an even number of elements it is the mean of
* the two values on either side of the mid-point.
*
* @param values sample values (need not be pre-sorted)
* @param ignoreNaN specifies whether to ignore NaN values
* @return median value or Double.NaN if the sample is empty
*/
@SuppressWarnings("empty-statement")
public static double median(Double[] values, boolean ignoreNaN) {
if (values == null) {
return Double.NaN;
}
List nonNaNValues = CollectionFactory.list();
nonNaNValues.addAll(Arrays.asList(values));
if (ignoreNaN) {
while (nonNaNValues.remove(Double.NaN)) /* deliberately empty */ ;
}
if (nonNaNValues.isEmpty()) {
return Double.NaN;
} else if (nonNaNValues.size() == 1) {
return nonNaNValues.get(0);
} else if (nonNaNValues.size() == 2) {
return (nonNaNValues.get(0) + nonNaNValues.get(1)) / 2;
}
Collections.sort(nonNaNValues);
int midHi = nonNaNValues.size() / 2;
int midLo = midHi - 1;
boolean even = nonNaNValues.size() % 2 == 0;
Double result = 0.0d;
int k = 0;
for (Double val : nonNaNValues) {
if (k == midHi) {
if (!even) {
return val;
} else {
result += val;
return result / 2;
}
} else if (even && k == midLo) {
result += val;
}
k++ ;
}
return 0; // to suppress compiler warning
}
/**
* Calculates the empirical mode (highest frequency value) of the given values.
* Double.NaN values are ignored. If more than one data value occurs with
* maximum frequency the following tie-break rules are used:
*
* - for an odd number of tied values, return their median
*
- for an even number of tied values, return the value below
* the mid-point of the sorted list of tied values
*
* This ensures that the calculated mode occurs in the sample data.
* Whether or not the mode is meaningful for the sample is up to the user !
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return calculated mode or Double.NaN if the sample is empty
*/
@SuppressWarnings("empty-statement")
public static double mode(Double[] values, boolean ignoreNaN) {
if (values == null) {
return Double.NaN;
}
List list = CollectionFactory.list();
list.addAll(Arrays.asList(values));
if (ignoreNaN) {
while (list.remove(Double.NaN)) /* deliberately empty */ ;
}
if (list.isEmpty()) {
return Double.NaN;
} else if (list.size() == 1) {
return list.get(0);
}
Collections.sort(list);
List uniqueValues = CollectionFactory.list();
List freq = CollectionFactory.list();
Double curVal = list.get(0);
int curFreq = 1;
int maxFreq = 1;
for (int i = 1; i < list.size(); i++) {
if (CompareOp.aequal(curVal, list.get(i))) {
curFreq++ ;
} else {
uniqueValues.add(curVal);
freq.add(curFreq);
curVal = list.get(i);
if (curFreq > maxFreq) maxFreq = curFreq;
curFreq = 1;
}
}
uniqueValues.add(curVal);
freq.add(curFreq);
if (curFreq > maxFreq) maxFreq = curFreq;
List maxFreqIndices = CollectionFactory.list();
int k = 0;
for (Integer f : freq) {
if (f == maxFreq) {
maxFreqIndices.add(k);
}
k++ ;
}
if (maxFreqIndices.size() == 1) {
return uniqueValues.get(maxFreqIndices.get(0));
}
boolean even = maxFreqIndices.size() % 2 == 0;
int i = maxFreqIndices.size() / 2;
if (even) i-- ;
return uniqueValues.get(maxFreqIndices.get(i));
}
/**
* Calculates the range (max - min) of a set of values.
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return the range or Double.NaN if the set is empty
*/
public static double range(Double[] values, boolean ignoreNaN) {
if (values == null || values.length == 0) {
return Double.NaN;
} else if (values.length == 1) {
return 0d;
}
SortedSet set = CollectionFactory.sortedSet();
set.addAll(Arrays.asList(values));
if (ignoreNaN) set.remove(Double.NaN);
return set.last() - set.first();
}
/**
* Calculates sample variance using the running sample algorithm
* of Welford (1962) described by Knuth in The Art of Computer
* Programming (3rd ed) Vol.2 p.232
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return sample variance
*/
public static double variance(Double[] values, boolean ignoreNaN) {
if (values.length < 2) {
return Double.NaN;
}
double mNew, mOld = 0.0d, s = 0.0d;
int n = 0;
for (int i = 0; i < values.length; i++) {
if (Double.isNaN(values[i])) {
if (!ignoreNaN) {
return Double.NaN;
}
} else {
n++;
if (n == 1) {
mNew = mOld = values[i];
} else {
mNew = mOld + (values[i] - mOld) / n;
s = s + (values[i] - mOld) * (values[i] - mNew);
mOld = mNew;
}
}
}
if (n > 1) {
return s / (n - 1);
} else if (n == 1) {
return 0.0d;
} else {
return Double.NaN;
}
}
/**
* Calculates sample standard deviation. This is a convenience
* method that calls {@linkplain #variance(java.lang.Double[], boolean) }
* and returns the square-root of the result
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return sample standard deviation as a double
*/
public static double sdev(Double[] values, boolean ignoreNaN) {
double var = variance(values, ignoreNaN);
return (Double.isNaN(var) ? Double.NaN : Math.sqrt(var));
}
/**
* Calculates the sum of the values.
*
* @param values sample values
* @param ignoreNaN specifies whether to ignore NaN values
* @return sum of the values
*/
public static double sum(Double[] values, boolean ignoreNaN) {
double sum = 0.0d;
for (int i = 0; i < values.length; i++) {
if (Double.isNaN(values[i])) {
if (!ignoreNaN) {
return Double.NaN;
}
} else {
sum = sum + values[i];
}
}
return sum;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy