
net.maizegenetics.analysis.numericaltransform.BoxCoxTransformation Maven / Gradle / Ivy
package net.maizegenetics.analysis.numericaltransform;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Random;
import java.util.stream.Collectors;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest;
import net.maizegenetics.phenotype.Phenotype;
import net.maizegenetics.phenotype.PhenotypeAttribute;
import net.maizegenetics.phenotype.PhenotypeUtils;
import net.maizegenetics.phenotype.Phenotype.ATTRIBUTE_TYPE;
import net.maizegenetics.taxa.Taxon;
public class BoxCoxTransformation {
public Phenotype applyBoxCox(Phenotype pheno, boolean addSmallVal, long randomSeed,double startLambda, double endLambda, double stepLambda) throws Exception{
//Get the variable names and put it into a list
ArrayList attributes = (ArrayList)pheno.attributeListOfType(ATTRIBUTE_TYPE.data);
//get the covariates
attributes.addAll((ArrayList)pheno.attributeListOfType(ATTRIBUTE_TYPE.covariate));
ArrayList phenoNames = (ArrayList)attributes.stream().map((phenoAttr) -> phenoAttr.name()).collect(Collectors.toList());
//Loop through the List get the unique names
ArrayList taxonList = (ArrayList)pheno.taxaAttribute().allTaxaAsList();
ArrayList uniqueTaxaNames =(ArrayList) pheno.taxaAttribute().allTaxaAsList().stream()
.map((taxon) -> taxon.getName()).collect(Collectors.toList());
//loop through the all the values and find the minimum value
double minValue = Double.MAX_VALUE;
HashMap> phenoToDataList = new HashMap>();
for(String varName : phenoNames) {
ArrayList currentPhenoValues = new ArrayList();
for(int i = 0; i < taxonList.size(); i++) {
double currentValue = ((Float)pheno.value(i,pheno.attributeIndexForName(varName))).doubleValue();
currentPhenoValues.add(currentValue);
if( currentValue < minValue && currentValue > 0) {
minValue = currentValue;
}
}
phenoToDataList.put(varName,currentPhenoValues);
}
if(!addSmallVal) {
minValue = 0.0;
}
else {
System.out.println(minValue);
}
//Loop through the Phenotype Attributes and Box Cox them
double[][] fullTransformedVals = new double[uniqueTaxaNames.size()][phenoNames.size()];
for(int phenoNameIndex = 0; phenoNameIndex < phenoNames.size(); phenoNameIndex++){
double[] boxCoxValues = computeBoxCox(phenoToDataList.get(phenoNames.get(phenoNameIndex)),minValue,randomSeed,startLambda, endLambda, stepLambda);
for(int taxaIndex = 0; taxaIndex < uniqueTaxaNames.size(); taxaIndex++) {
fullTransformedVals[taxaIndex][phenoNameIndex] = boxCoxValues[taxaIndex];
}
}
//Create 2-D ArrayList to store all of the transformed values
ArrayList> avgValues = new ArrayList>();
for(int taxaCounter = 0; taxaCounter < fullTransformedVals.length; taxaCounter++) {
ArrayList currentTaxaVals = new ArrayList();
for(int phenoCounter = 0; phenoCounter < fullTransformedVals[taxaCounter].length; phenoCounter++) {
currentTaxaVals.add(fullTransformedVals[taxaCounter][phenoCounter]);
}
avgValues.add(currentTaxaVals);
}
ArrayList originalTypes = new ArrayList<>();
for(int i = 0; i < pheno.attributeListCopy().size(); i++) {
originalTypes.add(pheno.attributeType(i));
}
try {
//Take the taxa List, the variable name list and the matrix and send to PhenoUtils package to get a phenotype back
return PhenotypeUtils.createPhenotypeFromTransform(uniqueTaxaNames, phenoNames, avgValues,pheno.name()+"_BoxCoxTransformed",originalTypes);
}
catch(Exception e) {
throw e;
}
}
public static double[] computeBoxCox(ArrayList phenotypeValues,double minimumValue,long randomSeed, double startLambda, double endLambda, double stepLambda) {
//add in the small value if needed
//Select a random small value where 0 addedSmallPhenoValues = (ArrayList) phenotypeValues.stream().map((currentVal) -> {
return rand.nextDouble() * 0.5 * minimumValue + currentVal;
}).collect(Collectors.toList());
//Check attribute to make sure we have variation
if(!attributeHasVariation(addedSmallPhenoValues)) {
//if we dont we need to return the original value as box cox will not do anything
double[] valuesToReturn = new double[addedSmallPhenoValues.size()];
for(int i = 0; i < addedSmallPhenoValues.size(); i++) {
valuesToReturn[i] = addedSmallPhenoValues.get(i);
}
return valuesToReturn;
}
else {
//Loop through lambdas and compute the KST
double currentLambda = startLambda;
double[] currentValues = new double[addedSmallPhenoValues.size()];
double bestTestStat = Double.MAX_VALUE;
double bestMean = 0.0;
double bestStDev = 0.0;
//Loop through lambdas -5 to 5 increasing by .2 each time
for(double lambda = startLambda; lambda < endLambda; lambda += stepLambda) {
//create a new array to hold the values
double[] transformedValues = new double[addedSmallPhenoValues.size()];
//get the transformed values
for(int i = 0; i < addedSmallPhenoValues.size(); i++) {
transformedValues[i] = boxCoxTransform(addedSmallPhenoValues.get(i), lambda);
}
//Check to see how well it fits a normal distribution
//Use the following method
//
//Get Mean and standard deviation of the transformed data
//Create a Normal Distribution using these input params
//Use KolmogorovSmirnovTest to see how well the data fits the normal distribution
//Pick the one with the lowest values
StandardDeviation sdev = new StandardDeviation();
double sampleStandardDev = sdev.evaluate(transformedValues);
//calculate the mean
Mean meanObj = new Mean();
double meanVal = meanObj.evaluate(transformedValues);
//Skip this lambda if we have a negative sample standard deviation
if(sampleStandardDev<=0.0 || Double.isNaN(sampleStandardDev) || Double.isNaN(meanVal)) {
continue;
}
//Compute kst
NormalDistribution normDist = new NormalDistribution(meanVal, sampleStandardDev);
KolmogorovSmirnovTest kst = new KolmogorovSmirnovTest();
//Get the statistic
double testStat = kst.kolmogorovSmirnovStatistic(normDist, transformedValues);
//If it is less than our current best, we update the statistics
if(testStat < bestTestStat) {
currentLambda = lambda;
currentValues = transformedValues;
bestTestStat = testStat;
bestMean = meanVal;
bestStDev = sampleStandardDev;
}
}
return currentValues;
}
}
private static double boxCoxTransform(double value, double lambda) {
if(lambda == 0) {
//do the natural log
return Math.log(value);
}
else {
//Compute (value^lamdba - 1)/lambda
return (Math.pow(value, lambda) - 1.0)/lambda;
}
}
private static boolean attributeHasVariation(ArrayList phenoAttribute) {
double prevVal = phenoAttribute.get(0);
for(int i = 1; i < phenoAttribute.size(); i++) {
if(phenoAttribute.get(i) != prevVal) {
return true;
}
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy