
prerna.math.StatisticsUtilityMethods Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright 2015 Defense Health Agency (DHA)
*
* If your use of this software does not include any GPLv2 components:
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ----------------------------------------------------------------------------
* If your use of this software includes any GPLv2 components:
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*******************************************************************************/
package prerna.math;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Arrays;
import prerna.util.ArrayUtilityMethods;
public final class StatisticsUtilityMethods {
private static final String ILLEGAL_ARGS_ERR = "The data array either is null or does not contain any data.";
private StatisticsUtilityMethods(){
}
public static double quartile(final double[] values, final double lowerPercent, final boolean isOrdered) {
if (values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
// Rank order the values if not already ordered
if(!isOrdered) {
Arrays.sort(values);
}
int index = (int) Math.floor(values.length * lowerPercent / 100);
return values[index];
}
public static Double quartileIgnoringNull(Double[] values, final double lowerPercent, final boolean isOrdered) {
if (values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
// remove the null values
values = (Double[]) ArrayUtilityMethods.removeAllNulls(values);
// Rank order the values if not already ordered
if(!isOrdered) {
values = ArrayUtilityMethods.sortDoubleWrapperArr(values);
}
int index = (int) Math.floor(values.length * lowerPercent / 100);
return values[index];
}
public static double getMinimumValue(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double minValue = values[0];
for(index = 1; index < size; index++) {
if(minValue > values[index]) {
minValue = values[index];
}
}
return minValue;
}
public static double getMinimumValue(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double minValue = values[0];
for(index = 1; index < size; index++) {
if(minValue > values[index]) {
minValue = values[index];
}
}
return minValue;
}
public static Double getMinimumValueIgnoringNull(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
Double minValue = null;
for(index = 0; index < size; index++) {
if(values[index] != null) {
if(minValue == null) {
minValue = values[index];
} else if(minValue > values[index]) {
minValue = values[index];
}
}
}
return minValue;
}
public static double getMaximumValue(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double maxValue = values[0];
for(index = 1; index < size; index++) {
if(maxValue < values[index]) {
maxValue = values[index];
}
}
return maxValue;
}
public static double getMaximumValue(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double maxValue = values[0];
for(index = 1; index < size; index++) {
if(maxValue < values[index]) {
maxValue = values[index];
}
}
return maxValue;
}
public static Double getMaximumValueIgnoringNull(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
Double maxValue = null;
for(index = 0; index < size; index++) {
if(values[index] != null) {
if(maxValue == null) {
maxValue = values[index];
} else if(maxValue < values[index]) {
maxValue = values[index];
}
}
}
return maxValue;
}
public static int getMinimumValue(final int[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
int minValue = values[0];
for(index = 1; index < size; index++) {
if(minValue > values[index]) {
minValue = values[index];
}
}
return minValue;
}
public static int getMaximumValue(final int[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
int maxValue = values[0];
for(index = 1; index < size; index++) {
if(maxValue < values[index]) {
maxValue = values[index];
}
}
return maxValue;
}
public static int getSum(final int[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
int sum = values[0];
for(index = 1; index < size; index++) {
sum += values[index];
}
return sum;
}
public static double getSum(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double sum = values[0];
for(index = 1; index < size; index++) {
sum += values[index];
}
return sum;
}
public static int getSum(final Integer[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
int sum = values[0];
for(index = 1; index < size; index++) {
sum += values[index];
}
return sum;
}
public static double getSum(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double sum = values[0];
for(index = 1; index < size; index++) {
sum += values[index];
}
return sum;
}
public static double getSumIgnoringNull(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double sum = 0.0;
for(index = 0; index < size; index++) {
if(values[index] != null) {
sum += values[index];
}
}
return sum;
}
public static double getAverage(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int size = values.length;
double sum = getSum(values);
return sum/size;
}
public static double getAverageIgnoringNull(final Double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
int nonNullSize = 0;
double sum = 0.0;
for(index = 0; index < size; index++) {
if(values[index] != null) {
sum += values[index];
nonNullSize++;
}
}
if(nonNullSize == 0) {
throw new IllegalArgumentException("nonNullSize can not be 0");
}
return sum/nonNullSize;
}
public static double getSumIgnoringInfinity(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int size = values.length;
double sum = 0.0;
for(index = 0; index < size; index++) {
double val = values[index];
if(!Double.isInfinite(val)) {
sum += values[index];
}
}
return sum;
}
public static double getAverageIgnoringInfinity(final double[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int index;
int counter = 0;
int size = values.length;
double sum = 0.0;
for(index = 0; index < size; index++) {
double val = values[index];
if(!Double.isInfinite(val)) {
sum += values[index];
counter++;
}
}
if(counter == 0) {
throw new IllegalArgumentException("counter can not be 0");
}
return sum/counter;
}
public static double getSampleStandardDeviation(final double[] values) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
double avg = getAverage(values);
int index;
int size = values.length;
double stdev = Math.pow(values[0] - avg,2);
for(index = 1; index < size; index++) {
stdev += Math.pow(values[index] - avg,2);
}
return Math.pow(stdev/(size - 1), 0.5);
}
public static double getSampleStandardDeviationIgnoringNull(final Double[] values) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
double avg = getAverageIgnoringNull(values);
int index;
int size = values.length;
int nonNullSize = 0;
double stdev = 0;
for(index = 1; index < size; index++) {
if(values[index] != null) {
stdev += Math.pow(values[index] - avg,2);
nonNullSize++;
}
}
return Math.pow(stdev/(nonNullSize - 1), 0.5);
}
public static double getSampleStandardDeviationIgnoringInfinity(final double[] values) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
double avg = getAverageIgnoringInfinity(values);
int index;
int counter = 0;
int size = values.length;
double stdev = 0;
for(index = 0; index < size; index++) {
double val = values[index];
if(!Double.isInfinite(val)) {
stdev += Math.pow(values[index] - avg,2);
counter++;
}
}
return Math.pow(stdev/(counter - 1), 0.5);
}
public static double getMedian(final double[] values, boolean isSorted) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isSorted) {
Arrays.sort(values);
}
int middle = values.length/2;
if (values.length % 2 == 1) {
return values[middle];
} else {
return (values[middle-1] + values[middle]) / 2.0;
}
}
public static double getSkewness(final double[] values, boolean isSorted) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
int numValues = values.length;
double mean = getAverage(values);
double stdev = getSampleStandardDeviation(values);
int i;
double skewness = 0;
for(i = 0; i < numValues; i++) {
skewness += Math.pow( (values[i] - mean)/stdev, 3.0);
}
double coefficient = (double) numValues/ ( (numValues - 1) * (numValues - 2) );
return coefficient * skewness;
}
public static double getSkewnessIgnoringNull(Double[] values, boolean isSorted) {
if( values == null || values.length < 1) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isSorted){
values = ArrayUtilityMethods.sortDoubleWrapperArr(values);
}
int numValues = values.length;
int index;
int nonNullSize = 0;
double sum = 0.0;
for(index = 0; index < numValues; index++) {
if(values[index] != null) {
sum += values[index];
nonNullSize++;
}
}
if(nonNullSize == 0) {
throw new IllegalArgumentException("nonNullSize can not be 0");
}
double mean = sum/nonNullSize;
double stdev = getSampleStandardDeviationIgnoringNull(values);
int i;
double skewness = 0;
for(i = 0; i < numValues; i++) {
if(values[i] != null) {
skewness += Math.pow( (values[i] - mean), 3.0);
}
}
double coefficient = (double) nonNullSize/ ( (nonNullSize - 1) * (nonNullSize - 2) );
return coefficient * skewness / Math.pow(stdev, 3);
}
public static double[] calculateZScores(final double[] values, final boolean isOrdered) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
double[] newValues = values.clone();
if(!isOrdered){
Arrays.sort(newValues);
}
int numValues = values.length;
double avg = getAverage(newValues);
double stdev = getSampleStandardDeviation(newValues);
double[] zScore = new double[numValues];
int i;
for(i = 0; i < numValues; i++) {
zScore[i] = (values[i] - avg)/stdev;
}
return zScore;
}
public static double[] calculateZScoresIgnoringInfinity(final double[] values, final boolean isOrdered) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
double[] newValues = values.clone();
if(!isOrdered){
Arrays.sort(newValues);
}
int numValues = values.length;
double avg = getAverageIgnoringInfinity(newValues);
double stdev = getSampleStandardDeviationIgnoringInfinity(newValues);
double[] zScore = new double[numValues];
int i;
for(i = 0; i < numValues; i++) {
if(Double.isInfinite(values[i])) {
zScore[i] = Double.NaN;
} else {
if(stdev == 0) {
zScore[i] = 0;
} else {
zScore[i] = (values[i] - avg)/stdev;
}
}
}
return zScore;
}
public static double[] calculateZScoreRange(final double[] values, final boolean isOrdered) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isOrdered){
Arrays.sort(values);
}
double minVal = values[0];
double maxVal = values[values.length - 1];
double avg = getAverage(values);
double stdev = getSampleStandardDeviation(values);
double minZScore = (minVal - avg)/stdev;
double maxZScore = (maxVal - avg)/stdev;
int index;
int start = (int) Math.ceil(minZScore);
int end = (int) Math.floor(maxZScore);
if(start == end) {
return new double[]{start};
}
if( (start-minZScore)/(maxZScore - minZScore) < 0.05 ) {
start++;
}
if( (maxZScore-end)/(maxZScore - minZScore) < 0.05 ) {
end--;
}
double[] zScore = new double[end - start + 3]; //+3 due to minZScore, maxZScore, and including the end value
zScore[0] = minZScore;
zScore[zScore.length - 1] = maxZScore;
int counter = 1;
for(index = start; index <= end; index++){
zScore[counter] = index;
counter++;
}
return zScore;
}
public static String[] getZScoreRangeAsString(final double[] values, final boolean isOrdered) {
NumberFormat formatter = new DecimalFormat("#.##");
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isOrdered){
Arrays.sort(values);
}
double[] zScoreVals = calculateZScoreRange(values, true);
int i;
int size = zScoreVals.length;
String[] zScoreValsAsString = new String[size];
for(i = 0; i < size; i++) {
zScoreValsAsString[i] = formatter.format(zScoreVals[i]);
}
return zScoreValsAsString;
}
public static double[] calculateZScoreRange(Double[] values, final boolean isOrdered) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isOrdered){
values = ArrayUtilityMethods.sortDoubleWrapperArr(values);
}
double minVal = values[0];
double maxVal = values[values.length - 1];
double avg = getAverageIgnoringNull(values);
double stdev = getSampleStandardDeviationIgnoringNull(values);
double minZScore = (minVal - avg)/stdev;
double maxZScore = (maxVal - avg)/stdev;
int index;
int start = (int) Math.ceil(minZScore);
int end = (int) Math.floor(maxZScore);
if(start == end) {
return new double[]{start};
}
if( (start-minZScore)/(maxZScore - minZScore) < 0.05 ) {
start++;
}
if( (maxZScore-end)/(maxZScore - minZScore) < 0.05 ) {
end--;
}
double[] zScore = new double[end - start + 3]; //+3 due to minZScore, maxZScore, and including the end value
zScore[0] = minZScore;
zScore[zScore.length - 1] = maxZScore;
int counter = 1;
for(index = start; index <= end; index++){
zScore[counter] = index;
counter++;
}
return zScore;
}
public static String[] getZScoreRangeAsStringIgnoringNull(Double[] values, final boolean isOrdered) {
NumberFormat formatter = new DecimalFormat("#.##");
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
if(!isOrdered){
values = ArrayUtilityMethods.sortDoubleWrapperArr(values);
}
double[] zScoreVals = calculateZScoreRange(values, true);
int i;
int size = zScoreVals.length;
String[] zScoreValsAsString = new String[size];
for(i = 0; i < size; i++) {
zScoreValsAsString[i] = formatter.format(zScoreVals[i]);
}
return zScoreValsAsString;
}
//Calculates entropy from an array of counts
public static double calculateEntropy(final int[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
// if only one value, then entropy is 0
if(values.length == 1) {
return 0;
}
if(ArrayUtilityMethods.removeAllZeroValues(values).length == 1) {
return 0;
}
double entropy = 0;
double sum = getSum(values);
int index;
for(index = 0; index < values.length; index++) {
double val = values[index];
if(val != 0) {
double prob = val / sum;
entropy += prob * logBase2(prob);
}
}
entropy *= -1;
return entropy;
}
public static double calculateEntropyDensity(final int[] values) {
if( values == null || values.length == 0) {
throw new IllegalArgumentException(ILLEGAL_ARGS_ERR);
}
// if only one value, then entropy is 0
if(values.length == 1) {
return 0;
}
if(ArrayUtilityMethods.removeAllZeroValues(values).length == 1) {
return 0;
}
int uniqueVals = values.length;
double entropy = calculateEntropy(values);
double entropyDensity = entropy / uniqueVals;
return entropyDensity;
}
/**
* Generate the log base 2 of a given input
* @param x The value to take the log base 2 off
* @return The log base 2 of the value inputed
*/
public static double logBase2(final double x) {
return Math.log(x) / Math.log(2);
}
public static boolean areValuesUniformlyDistributed(final int[] valueArr, final int p, final int N, final int m, final double alpha) {
double totalKSStat = 0.0;
int numValues = valueArr.length;
if(numValues <= m) {
System.out.println("\nNumber of values is less than m, could be parent.");
return true;
}
if(numValues < N) {
System.out.println("\nERROR: N is less than the number of values in the dataset. Please increase N.");
}
for(int i = 0; i < p; i++) {
int startIndex = (int)(Math.random() * (numValues - N));
int[] randomStartValueArr = Arrays.copyOfRange(valueArr,startIndex,startIndex + N);
Arrays.sort(randomStartValueArr);
double ksStat = calculateAverageKSStat(randomStartValueArr,m);
totalKSStat += ksStat;
System.out.println("Starting at index " + startIndex + " the average KSStat for all subsets is " + ksStat);
}
double averageKSStat = totalKSStat / p;
System.out.println("Overall KSStat for randomly generated starts is " + averageKSStat);
if(alpha == 0.05) {
if(averageKSStat < 0.40925) {
return true;
}else {
return false;
}
}else {
System.out.println("Alpha is not valid. Please enter either 0.05.");
return false;
}
}
//m is number of values in the set
public static double calculateAverageKSStat(final int[] valueArr, final int m) {
double totalKSStat = 0.0;
int numValues = valueArr.length;
int numSets = (int)Math.floor(numValues * 1.0 /m);
for(int i = 0; i < numSets; i++){
int[] subsetValueArr = Arrays.copyOfRange(valueArr,i*m,(i+1)*m);
totalKSStat += calculateKSStat(subsetValueArr);
}
return totalKSStat / numSets;
}
public static double calculateKSStat(final int[] valueArr) {
// Normalize all values
int numValues = valueArr.length;
double minValue = valueArr[0];
double maxValue = valueArr[numValues - 1];
//TODO what happens when min and max are equal. is it fine?
if(minValue == maxValue) {
return 0.0;
}
double minDiff = maxValue - minValue;
for(int i = 1; i < numValues; i++) {
int diff = valueArr[i] - valueArr[i-1];
if(diff < minDiff) {
minDiff = diff;
}
}
// System.out.println("Minimum Difference " + minDiff);
double maxDistance = 0;
for(int i = 0; i < numValues; i++) {
double firstVal = (valueArr[i] - minValue + minDiff) / (maxValue- minValue + minDiff);
double dist1 = Math.abs(firstVal - ((i * 1.0) / numValues));
double dist2 = Math.abs(firstVal - ((i + 1.0) / numValues));
double distance = Math.max(dist1,dist2);
// System.out.println(firstVal + "..." + dist1 + "..." + dist2);
if(distance > maxDistance) {
maxDistance = distance;
}
}
// System.out.println("Max Distance "+maxDistance);
return maxDistance * Math.sqrt(numValues);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy