All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.numericaltransform.BoxCoxTransformation Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

The newest version!
package net.maizegenetics.analysis.numericaltransform;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Random;
import java.util.stream.Collectors;

import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest;

import net.maizegenetics.phenotype.Phenotype;
import net.maizegenetics.phenotype.PhenotypeAttribute;
import net.maizegenetics.phenotype.PhenotypeUtils;
import net.maizegenetics.phenotype.Phenotype.ATTRIBUTE_TYPE;
import net.maizegenetics.taxa.Taxon;

public class BoxCoxTransformation {

    public Phenotype applyBoxCox(Phenotype pheno, boolean addSmallVal, long randomSeed,double startLambda, double endLambda, double stepLambda) throws Exception{
      //Get the variable names and put it into a list
        ArrayList attributes = (ArrayList)pheno.attributeListOfType(ATTRIBUTE_TYPE.data);
        
        //get the covariates
        attributes.addAll((ArrayList)pheno.attributeListOfType(ATTRIBUTE_TYPE.covariate));
        
        ArrayList phenoNames = (ArrayList)attributes.stream().map((phenoAttr) -> phenoAttr.name()).collect(Collectors.toList());
        
        //Loop through the List get the unique names
        ArrayList taxonList = (ArrayList)pheno.taxaAttribute().allTaxaAsList();
        ArrayList uniqueTaxaNames =(ArrayList) pheno.taxaAttribute().allTaxaAsList().stream()
                                                                .map((taxon) -> taxon.getName()).collect(Collectors.toList());
        
        //loop through the all the values and find the minimum value
        double minValue = Double.MAX_VALUE;
        
        HashMap> phenoToDataList = new HashMap>();
        
        for(String varName : phenoNames) {
            ArrayList currentPhenoValues = new ArrayList();
            for(int i = 0; i < taxonList.size(); i++) {
                double currentValue = ((Float)pheno.value(i,pheno.attributeIndexForName(varName))).doubleValue();
                currentPhenoValues.add(currentValue);
                if( currentValue < minValue && currentValue > 0) {
                    minValue = currentValue;
                }
            }
            phenoToDataList.put(varName,currentPhenoValues);
        }
        
        if(!addSmallVal) {
            minValue = 0.0;
        }
        else {
            System.out.println(minValue);
        }
        
       //Loop through the Phenotype Attributes and Box Cox them
        double[][] fullTransformedVals = new double[uniqueTaxaNames.size()][phenoNames.size()];

        for(int phenoNameIndex = 0; phenoNameIndex < phenoNames.size(); phenoNameIndex++){
            double[] boxCoxValues = computeBoxCox(phenoToDataList.get(phenoNames.get(phenoNameIndex)),minValue,randomSeed,startLambda, endLambda, stepLambda);
            for(int taxaIndex = 0; taxaIndex < uniqueTaxaNames.size(); taxaIndex++) {
                fullTransformedVals[taxaIndex][phenoNameIndex] = boxCoxValues[taxaIndex];
            }
        }
        
        //Create 2-D ArrayList to store all of the transformed values
        ArrayList> avgValues = new ArrayList>();
        for(int taxaCounter = 0; taxaCounter < fullTransformedVals.length; taxaCounter++) {
            ArrayList currentTaxaVals = new ArrayList();
            for(int phenoCounter = 0; phenoCounter < fullTransformedVals[taxaCounter].length; phenoCounter++) {
                currentTaxaVals.add(fullTransformedVals[taxaCounter][phenoCounter]);
            }
            avgValues.add(currentTaxaVals);
        }
        ArrayList originalTypes = new ArrayList<>();
        for(int i = 0; i < pheno.attributeListCopy().size(); i++) {
            originalTypes.add(pheno.attributeType(i));
        }
        
        try {
            //Take the taxa List, the variable name list and the matrix and send to PhenoUtils package to get a phenotype back
            return PhenotypeUtils.createPhenotypeFromTransform(uniqueTaxaNames, phenoNames, avgValues,pheno.name()+"_BoxCoxTransformed",originalTypes);
        }
        catch(Exception e) {
            throw e;
        }
        
    }
    
    public static double[] computeBoxCox(ArrayList phenotypeValues,double minimumValue,long randomSeed, double startLambda, double endLambda, double stepLambda) {
        //add in the small value if needed
        //Select a random small value where 0 addedSmallPhenoValues = (ArrayList) phenotypeValues.stream().map((currentVal) -> {
            return rand.nextDouble() * 0.5 * minimumValue + currentVal;
        }).collect(Collectors.toList());
       
        //Check attribute to make sure we have variation
        if(!attributeHasVariation(addedSmallPhenoValues)) {
            //if we dont we need to return the original value as box cox will not do anything
            double[] valuesToReturn = new double[addedSmallPhenoValues.size()];
            for(int i = 0; i < addedSmallPhenoValues.size(); i++) {
                valuesToReturn[i] = addedSmallPhenoValues.get(i);
            }
            return valuesToReturn;
        }
        else {
            //Loop through lambdas and compute the KST
            double currentLambda = startLambda;
            double[] currentValues = new double[addedSmallPhenoValues.size()];
            double bestTestStat = Double.MAX_VALUE;
            
            double bestMean = 0.0;
            double bestStDev = 0.0;
            
            //Loop through lambdas -5 to 5 increasing by .2 each time
            for(double lambda = startLambda; lambda < endLambda; lambda += stepLambda) {
                //create a new array to hold the values
                double[] transformedValues = new double[addedSmallPhenoValues.size()];
                
                //get the transformed values
                for(int i = 0; i < addedSmallPhenoValues.size(); i++) {
                    transformedValues[i] = boxCoxTransform(addedSmallPhenoValues.get(i), lambda);
                }
                
                //Check to see how well it fits a normal distribution
                //Use the following method
                //
                //Get Mean and standard deviation of the transformed data
                //Create a Normal Distribution using these input params
                //Use KolmogorovSmirnovTest to see how well the data fits the normal distribution
                //Pick the one with the lowest values
                StandardDeviation sdev = new StandardDeviation();
                double sampleStandardDev = sdev.evaluate(transformedValues);

                //calculate the mean
                Mean meanObj = new Mean();
                double meanVal = meanObj.evaluate(transformedValues);
                
                //Skip this lambda if we have a negative sample standard deviation
                if(sampleStandardDev<=0.0 || Double.isNaN(sampleStandardDev) || Double.isNaN(meanVal)) {
                    continue;
                }
                //Compute kst
                NormalDistribution normDist = new NormalDistribution(meanVal, sampleStandardDev);
                KolmogorovSmirnovTest kst = new KolmogorovSmirnovTest();
                
                //Get the statistic
                double testStat = kst.kolmogorovSmirnovStatistic(normDist, transformedValues);
                
                //If it is less than our current best, we update the statistics
                if(testStat < bestTestStat) {
                    currentLambda = lambda;
                    currentValues = transformedValues;
                    bestTestStat = testStat;
                    bestMean = meanVal;
                    bestStDev = sampleStandardDev;
                }
            }
            return currentValues;
        }
    }
    
    
    private static double boxCoxTransform(double value, double lambda) {
        if(lambda == 0) {
            //do the natural log
            return Math.log(value);
        }
        else {
            //Compute (value^lamdba - 1)/lambda
            return (Math.pow(value, lambda) - 1.0)/lambda;
        }
    }
    
    private static boolean attributeHasVariation(ArrayList phenoAttribute) {
        double prevVal = phenoAttribute.get(0);
        
        for(int i = 1; i < phenoAttribute.size(); i++) { 
            if(phenoAttribute.get(i) != prevVal) {
                return true;
            }
        }
        return false;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy