All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.math.stats.ZScores Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.math.stats;

import JSci.maths.statistics.NormalDistribution;
import cern.jet.random.tdouble.StudentT;
//import cern.jet.random.tdouble.engine.DRand;
//import cern.jet.random.tdouble.engine.DoubleRandomEngine;
import cern.jet.stat.tdouble.Probability;

/**
 *
 * @author juha
 */
public class ZScores {

    private static NormalDistribution normDist;

    /**
     *
     * Calculates a weighted Z-score according to Whitlock's paper:
     * http://www.ncbi.nlm.nih.gov/pubmed/16135132 Square root of the sample
     * size is used as the weight for each test.
     *
     * @param zScores Z-scores from individual tests
     * @param sampleSizes sample sizes of these tests
     * @return
     */
    public static double getWeightedZ(float[] zScores, int[] sampleSizes) {
        if (zScores.length != sampleSizes.length) {
            throw new IllegalArgumentException("Zscores and sample sizes should have same length!");
        }
        double weightedZ = 0;
        double sampleSizeSum = 0;
        int nrNans = 0;
        for (int j = 0; j < zScores.length; j++) {
            if (!Float.isNaN(zScores[j])) {
                nrNans++;
                weightedZ += Math.sqrt(sampleSizes[j]) * zScores[j];
                sampleSizeSum += sampleSizes[j];
            }
        }
        
        weightedZ /= Math.sqrt(sampleSizeSum);
        return weightedZ;
    }
    /**
     *
     * Calculates a weighted Z-score according to Whitlock's paper:
     * http://www.ncbi.nlm.nih.gov/pubmed/16135132 Square root of the sample
     * size is used as the weight for each test.
     *
     * @param zScores Z-scores from individual tests
     * @param sampleSizes sample sizes of these tests
     * @return
     */
    public static double getWeightedZ(double[] zScores, int[] sampleSizes) {
        if (zScores.length != sampleSizes.length) {
            throw new IllegalArgumentException("Zscores and sample sizes should have same length!");
        }
        double weightedZ = 0;
        double sampleSizeSum = 0;
        int nrNans = 0;
        for (int j = 0; j < zScores.length; j++) {
            if (!Double.isNaN(zScores[j])) {
                nrNans++;
                weightedZ += Math.sqrt(sampleSizes[j]) * zScores[j];
                sampleSizeSum += sampleSizes[j];
            }
        }
        
        weightedZ /= Math.sqrt(sampleSizeSum);
        return weightedZ;
    }

    /**
     *
     * Calculates a weighted Z-score according to Whitlock's paper:
     * http://www.ncbi.nlm.nih.gov/pubmed/16135132 Square root of the sample
     * size is used as the weight for each test.
     *
     * @param zScores Z-scores from individual tests
     * @param sampleSizes sample sizes of these tests
     * @return
     */
    public static double getWeightedZ(double[] zScores, int[] sampleSizes, double[] weights) {
        if (zScores.length != sampleSizes.length) {
            throw new IllegalArgumentException("Zscores and sample sizes should have same length!");
        }
        double weightedZ = 0;
        double sampleSizeSum = 0;
        for (int j = 0; j < zScores.length; j++) {
            if (!Double.isNaN(zScores[j])) {
                weightedZ += Math.sqrt(sampleSizes[j]) * weights[j] * zScores[j];
                sampleSizeSum += (sampleSizes[j] * weights[j]);
            }
        }
        weightedZ /= Math.sqrt(sampleSizeSum);
        return weightedZ;
    }

    /**
     *
     * Returns the p-value corresponding to the given Z-score assuming a two
     * tailed-test.
     *
     * @param z A Z-score to convert
     * @return The p-value corresponding to the Z-score
     */
    public static double zToP(double z) {

        if (normDist == null) {
            normDist = new NormalDistribution();
            System.out.println("Creating new Normal Dist");
        }
        double p;
        if (z > 0) {
            p = normDist.cumulative(-z);
        } else {
            p = normDist.cumulative(z);
        }
        if (p > 0.5) {
            p = 1 - p;
        }
        p *= 2.0d;

        return p;
    }

    /**
     *
     * Returns the absolute Z-score for a given p-value using a normal
     * distribution.
     *
     * @param p p-value
     * @return absolute Z-score
     */
    public static double pToZ(double p) {
        if(p == 1d){
            return 0;
        } else if (p < 0 || p > 1d){
            throw new IllegalStateException("P-value should be between 0 and 1.");
        } else if (p == 0d){
            return 40d;
        }
        
        return Probability.normalInverse(p);
    }

    /**
     *
     * Returns the Z-score for a given correlation coefficient from Student's t
     * distribution.
     *
     * @param correlation correlation coefficient (r)
     * @param nrSamples number of samples used
     * @return Z-score
     */
    public static double correlationToZ(double correlation, int nrSamples, StudentT tDist) {

        double t = correlation / (Math.sqrt((1 - correlation * correlation) / (double) (nrSamples - 2)));
        double pValue = 0;
        double zScore = 0;
        if (t < 0) {
            pValue = tDist.cdf(t);
            if (pValue < 2.0E-323) {
                pValue = 2.0E-323;
            }
            zScore = Probability.normalInverse(pValue);
        } else {
            pValue = tDist.cdf(-t); //Take two sided P-Value
            if (pValue < 2.0E-323) {
                pValue = 2.0E-323;
            }
            zScore = -Probability.normalInverse(pValue);
        }
        return zScore;
    }

    /**
     *
     * Returns the two-sided p-value for a given correlation coefficient from
     * Student's t distribution.
     *
     * @param correlation correlation coefficient (r)
     * @param nrSamples number of samples used
     * @return Z-score
     */
    public static double correlationToP(double correlation, int nrSamples, StudentT tDist) {

        double t = correlation / (Math.sqrt((1 - correlation * correlation) / (double) (nrSamples - 2)));
        double pValue = 0;
        if (t < 0) {
            pValue = tDist.cdf(t);
            if (pValue < 2.0E-323) {
                pValue = 2.0E-323;
            }
        } else {
            pValue = tDist.cdf(-t); //Take two sided P-Value
            if (pValue < 2.0E-323) {
                pValue = 2.0E-323;
            }
        }
        return pValue;
    }
//    private static void zScoreToCorrelation(int nrSamples) {
//        if (m_zScoreToCorrelation == null || m_zScoreToCorrelation.length != nrSamples) {
//            //Fast look-up service to determine P-Value for individual Spearman correlation coefficient, given sample size:
//            cern.jet.random.StudentT tDistColt = new cern.jet.random.StudentT(nrSamples - 2, (new cern.jet.random.engine.DRand()));
//
//            double[] zScoreToCorrelation = new double[2000001];
//            for (int corrInt = 0; corrInt < 2000001; corrInt++) {
//                double correlation = (double) (corrInt - 1000000) / 1000000d;
//                double t = correlation / (Math.sqrt((1 - correlation * correlation) / (double) (nrSamples - 2)));
//                double pValue = 0;
//                double zScore = 0;
//                if (t < 0) {
//                    pValue = tDistColt.cdf(t);
//                    if (pValue < 2.0E-323) {
//                        pValue = 2.0E-323;
//                    }
//                    zScore = cern.jet.stat.Probability.normalInverse(pValue);
//                } else {
//                    pValue = tDistColt.cdf(-t); //Take two sided P-Value
//                    if (pValue < 2.0E-323) {
//                        pValue = 2.0E-323;
//                    }
//                    zScore = -cern.jet.stat.Probability.normalInverse(pValue);
//                }
//                int zScoreInt = (int) Math.round(zScore * 10000d + 1000000d);
//                if (zScoreInt < 0) {
//                    zScoreInt = 0;
//                }
//                if (zScoreInt > 2000000) {
//                    zScoreInt = 2000000;
//                }
//                zScoreToCorrelation[zScoreInt] = correlation;
//            }
//            for (int i = 0; i < zScoreToCorrelation.length; i++) {
//
//                double zForBin = (i - 1000000d) / 10000;
//                double p = zToP(zForBin);
//
//                double r = 1;
//                if (p > 0.2 && p < 1) {
//                    System.out.println(p+"\t"+nrSamples);
//                    double t = cern.jet.stat.Probability.studentTInverse(p, nrSamples);
//                    // solve the eqn:
//                    int nrSamplesDf = nrSamples - 2;
//                    double tsquared = t * t;
//                    double tsquaredDF = nrSamplesDf * tsquared;
//                    r = -1 / Math.sqrt(1 + (((double) nrSamples / t)*((double) nrSamples / t) ));
//                    System.out.println(i + "\t" + zForBin + "\t" + p + "\t" + r + "\t" + zScoreToCorrelation[i]);
//                }
//                
//                // System.out.println(zScoreToCorrelation[i] + "\t" + r);
//                // t = r / (Math.sqrt(1-r2/nrSamplesDf));
//                // t * Math.sqrt(1-r2/nrSamplesDf) = r
//                // t2 * ((1-r2)/nrSamplesDf) = r2;
//                // t2 * nrSamplesDf * (1-r2) = r2 * nrSamplesDf;
//                // tsquaredDF - tsquaredDF * r2 = r2 * nrSamplesDf;
//                // tsquaredDF - (tsquaredDF-nrSamplesDf)*r2 = 0;
//                // tsquaredDF / (tsquaredDF-nrSamplesDf) - r2 = 0;
//                // sqrt(tsquaredDF / (tsquaredDF-nrSamplesDf))) = r;
//
//            }
//
//            m_zScoreToCorrelation = zScoreToCorrelation;
//        }
//
//
//    }
    private static JSci.maths.statistics.TDistribution tDist = null;
    private static final double FINDROOT_ACCURACY = 1.0e-15;
    private static final int FINDROOT_MAX_ITERATIONS = 15000;

    public static double zScoreToCorrelation(double obsZ, int nrSamples) {
        if (tDist == null || tDist.getDegreesOfFreedom() != nrSamples - 2) {
            tDist = new JSci.maths.statistics.TDistribution(nrSamples - 2);
        }


        //For a given Z-Score calculate the correlation:
        double obsPValue = zToP(obsZ);

        if (obsPValue == 0) {
            obsPValue = Double.MIN_VALUE;
        } else if (obsPValue == 0.5) {
            return 0;
        } else {
            obsPValue /= 2; // cannot divide 0 or close to 0 by 2
        }

//        System.out.println(obsZ + "\t" + obsPValue);


        double obsT = inverseT(obsPValue);
        if (obsZ < 0) {
            obsT *= -1;
        }
        double corr = Math.sqrt(obsT * obsT / (obsT * obsT + nrSamples));
        if (obsT > 0) {
            corr = -corr;
        }
//        System.out.println("p " + obsPValue + "\tz " + obsZ + "\tc " + corr + "\tt" + obsT);
        if (Double.isNaN(corr)) {
            return 0;
        }
        return corr;
    }

    public static double extrapolateZScore(int originalSampleSize, int newSampleSize, double originalZ) {
        double correlation = zScoreToCorrelation(originalZ, originalSampleSize);
        Correlation.correlationToZScore(newSampleSize);
        double newZ = Correlation.convertCorrelationToZScore(newSampleSize, correlation);
//        System.out.println(newZ);
        return newZ;
    }

    private static double inverseT(double probability) {
        if (probability < 0 || probability > 1) {
            throw new IllegalArgumentException("Error: probability out of normal range (should be 0 >= x <= 1): " + probability);
        }
        if (probability == 0.0) {
            return -Double.MAX_VALUE;
        }
        if (probability == 1.0) {
            return Double.MAX_VALUE;
        }
        if (probability == 0.5) {
            return 0.0;
        }
        return findRoot(probability, 0.0, -0.5 * Double.MAX_VALUE, 0.5 * Double.MAX_VALUE);
    }

    private static double findRoot(double prob, double guess, double xLo, double xHi) {
        double x = guess, xNew = guess;
        double error, pdf, dx = 1.0;
        int i = 0;
        while (Math.abs(dx) > FINDROOT_ACCURACY && i++ < FINDROOT_MAX_ITERATIONS) {
            // Apply Newton-Raphson step
            error = tDist.cumulative(x) - prob;
            if (error < 0.0) {
                xLo = x;
            } else {
                xHi = x;
            }
            pdf = tDist.probability(x);
            if (pdf != 0.0) { // Avoid division by zero
                dx = error / pdf;
                xNew = x - dx;
            }
            // If the Newton-Raphson fails to converge (which for example may be the
            // case if the initial guess is to rough) we apply a bisection
            // step to determine a more narrow interval around the root.
            if (xNew < xLo || xNew > xHi || pdf == 0.0) {
                xNew = (xLo + xHi) / 2.0;
                dx = xNew - x;
            }
            x = xNew;
        }
        return x;
    }

    public static double betaToZ(double b, double se, double n) {
        return b / (se * Math.sqrt(n));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy