All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.math.stats.FDRUtils Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.math.stats;

import JSci.maths.ArrayMath;
import JSci.maths.statistics.NormalDistribution;
import java.io.IOException;
import java.util.Arrays;
import umcg.genetica.math.matrix.DoubleMatrixDataset;

/**
 *
 * @author juha
 */
public class FDRUtils {

    private static NormalDistribution normDist;

    public enum CloneArrays {

        CLONE_BOTH, DONT_CLONE, CLONE_REAL
    }

    public enum SortArrays {

        SORT_DESCENDING, ALREADY_SORTED_DESCENDING
    }

    public enum AbsoluteValues {

        USE_ABSOLUTE, USE_AS_IS
    }

    // prevent instantiation, only use static factory methods
    private FDRUtils() {
        normDist = null;
    }

    /**
     *
     * Returns the threshold value in the whole of given real data matrix that
     * corresponds to the given false discovery rate against the null
     * distribution from given permuted data matrix.
     *
     * @param real real data [x][y]
     * @param permuted permuted data [x][y]
     * @param fdr false discovery rate to find
     * @param c should the data be cloned or not (because the arrays are
     * modified)
     * @param s should the data be sorted first or are they already sorted
     * @param a should absolute values be used
     * @return largest threshold value in real data so that FDR is not larger
     * than the given one
     */
    public static double getGlobalThreshold(double[][] real, double[][] permuted, double fdr, CloneArrays c, SortArrays s, AbsoluteValues a) {

        double[][][] perm3d = new double[1][][];
        perm3d[0] = permuted;
        return getGlobalThreshold(real, perm3d, fdr, c, s, a);
    }

    /**
     *
     * Calculates the number of significant Z-scores per each row in the given Z-score matrix. Z-scores are converted to p-values and the given p-value threshold is used as cutoff for significance. A two-tailed test is assumed.
     *
     * @param data a Z-score matrix
     * @param threshold p-value threshold to use
     * @param s should the data be sorted first or are they already sorted
     * @param a should absolute values be used
     * @return an array with the number of significant (according to the given p-value threshold) values in the matrix per row
     */
    public static int[] getNrSignificant(double[][] data, double threshold, SortArrays s, AbsoluteValues a) {

        switch (s) {
            case SORT_DESCENDING:
                data = sortDesc(data);
                break;
        }

        switch (a) {
            case USE_ABSOLUTE:
                data = ArrayMath.abs(data);
                break;
        }

        int[] nrSignificant = new int[data.length];
        for (int gene = 0; gene < data.length; gene++) {
            for (int term = 0; term < data[0].length; term++) {
                if (zToP(data[gene][term]) < threshold) {
                    nrSignificant[gene]++;
                } else {
                    break;
                }
            }
        }

        return nrSignificant;
    }

    /**
     *
     * Returns the p-value corresponding to the given Z-score assuming a two tailed-test.
     *
     * @param z A Z-score to convert
     * @return The p-value corresponding to the Z-score
     */
    public static double zToP(double z) {

        if (normDist == null) {
            normDist = new NormalDistribution();
        }
        double p;
        if (z > 0) {
            p = normDist.cumulative(-z);
        } else {
            p = normDist.cumulative(z);
        }
        if (p > 0.5) {
            p = 1 - p;
        }
        p *= 2.0d;

        return p;
    }

    /**
     *
     * Returns the threshold value in the whole of given real data matrix that
     * corresponds to the given false discovery rate against the null
     * distribution from given permuted data matrices.
     *
     * @param real real data [x][y]
     * @param permuted permuted data [permutation][x][y]
     * @param fdr false discovery rate to find
     * @param c should the data be cloned or not (because the arrays are
     * modified)
     * @param s should the data be sorted first or are they already sorted
     * @param a should absolute values be used
     * @return largest threshold value in real data so that FDR is not larger
     * than the given one
     */
    public static double getGlobalThreshold(double[][] real, double[][][] permuted, double fdr, CloneArrays c, SortArrays s, AbsoluteValues a) {

        checkNulls(real, permuted);
        checkSizes(real, permuted);
        checkFDR(fdr);

        int nrPermutations = permuted.length;
        double[][] realData = real;
        double[][][] permData = permuted;
        switch (c) {
            case CLONE_BOTH:
                realData = real.clone();
                permData = permuted.clone();
                break;
            case CLONE_REAL:
                realData = real.clone();
                break;
        }

        switch (s) {
            case SORT_DESCENDING:
                realData = sortDesc(realData);
                permData = sortDesc(permData);
                break;
        }

        switch (a) {
            case USE_ABSOLUTE:
                realData = ArrayMath.abs(realData);
                for (int p = 0; p < nrPermutations; p++) {
                    permData[p] = ArrayMath.abs(permData[p]);
                }
                break;
        }

        double threshold = -1;
        int nrSignificantInReal = 0;
        int nrSignificantInPerm = 0;
        int[] realColsTraversed = new int[realData.length];
        int[][] permColsTraversed = new int[nrPermutations][realData.length];
        double foundFDR = -1;

        while (foundFDR < fdr) {

            // get next largest value i.e. threshold from real data
            // knowing that the data are sorted both by rows and columns
            double realThreshold = 0;
            int nextMaxRow = -1;
            for (int i = 0; i < realColsTraversed.length; i++) {
                if (realData[i][realColsTraversed[i]] > realThreshold) {
                    realThreshold = realData[i][realColsTraversed[i]];
                    nextMaxRow = i;
                }
                if (realColsTraversed[i] == 0) {
                    break;
                }
            }
            realColsTraversed[nextMaxRow]++;
            nrSignificantInReal++;

            for (int p = 0; p < nrPermutations; p++) {
                for (int i = 0; i < permColsTraversed[0].length; i++) {
                    while (permData[p][i][permColsTraversed[p][i]] > realThreshold) {
                        nrSignificantInPerm++;
                        permColsTraversed[p][i]++;
                    }
                    if (permColsTraversed[p][i] == 0) {
                        break;
                    }
                }
            }

            foundFDR = (double) nrSignificantInPerm / (double) nrPermutations / (double) nrSignificantInReal;
            if (foundFDR <= fdr) {
                threshold = realThreshold;
            }
        }

        return threshold;
    }

    /**
     *
     * Returns the threshold values per row in the given real data matrix that
     * corresponds to the given false discovery rate against the null
     * distribution from rows of given permuted data matrix.
     *
     * @param real real data [x][y]
     * @param permuted permuted data [x][y]
     * @param fdr false discovery rate to find
     * @param c should the data be cloned or not (because the arrays are
     * modified)
     * @param s should the data be sorted first or are they already sorted
     * @param a should absolute values be used
     * @return largest threshold values per row in real data so that FDR is not
     * larger than the given one
     */
    public static double[] getThresholdsPerRow(double[][] real, double[][] permuted, double fdr, CloneArrays c, SortArrays s, AbsoluteValues a) {

        double[][][] perm3d = new double[1][][];
        perm3d[0] = permuted;
        return getThresholdsPerRow(real, perm3d, fdr, c, s, a);
    }

    /**
     *
     * Returns the threshold values per row in the given real data matrix that
     * corresponds to the given false discovery rate against the null
     * distribution from rows of given permuted data matrices.
     *
     * @param real real data [x][y]
     * @param permuted permuted data [permutation][x][y]
     * @param fdr false discovery rate to find
     * @param c should the data be cloned or not (because the arrays are
     * modified)
     * @param s should the data be sorted first or are they already sorted
     * @param a should absolute values be used
     * @return largest threshold values per row in real data so that FDR is not
     * larger than the given one
     */
    public static double[] getThresholdsPerRow(double[][] real, double[][][] permuted, double fdr, CloneArrays c, SortArrays s, AbsoluteValues a) {

        checkNulls(real, permuted);
        checkSizes(real, permuted);
        checkFDR(fdr);

        int nrPermutations = permuted.length;
        double[][] realData = real;
        double[][][] permData = permuted;
        switch (c) {
            case CLONE_BOTH:
                realData = real.clone();
                permData = permuted.clone();
                break;
            case CLONE_REAL:
                realData = real.clone();
                break;
        }

        switch (a) {
            case USE_ABSOLUTE:
                realData = ArrayMath.abs(realData);
                for (int p = 0; p < nrPermutations; p++) {
                    permData[p] = ArrayMath.abs(permData[p]);
                }
                break;
        }

        double[] thresholds = new double[realData.length];

        for (int row = 0; row < realData.length; row++) {

            if (s == SortArrays.SORT_DESCENDING) {
                Arrays.sort(realData[row]);
                realData[row] = ArrayMath.invert(realData[row]);
                for (int p = 0; p < nrPermutations; p++) {
                    Arrays.sort(permData[p][row]);
                    permData[p][row] = ArrayMath.invert(permData[p][row]);
                }
            }

            double[] thresholdPerm = new double[nrPermutations];
            double foundFDR = 1;

            for (int p = 0; p < nrPermutations; p++) {
                int nrSignificantInReal = 0;
                thresholdPerm[p] = realData[row][0];
                for (int i = 0; i < realData[0].length; i++) {
                    double currentThreshold = realData[row][i];
                    nrSignificantInReal++;
                    int nrSignificantInPerm = 0;
                    for (int j = 0; j < permData[0][0].length; j++) {
                        if (permData[p][row][j] > currentThreshold) {
                            nrSignificantInPerm++;
                        } else {
                            break;
                        }
                    }

                    foundFDR = nrSignificantInPerm / nrSignificantInReal;

                    if (foundFDR > fdr) {
                        break;
                    } else {
                        thresholdPerm[p] = currentThreshold;
                    }
                }
            }

            Arrays.sort(thresholdPerm);
            thresholds[row] = thresholdPerm[(int) (0.8 * (double) nrPermutations) - 1];

        }

        return thresholds;
    }

    public static int[] getNrSignificant(double[][] data, double[] thresholds, CloneArrays c, SortArrays s, AbsoluteValues a) {

        int[] nrSignificant = new int[thresholds.length];
        double[][] realData = data;
        switch (c) {
            case CLONE_BOTH:
                realData = data.clone();
                break;
            case CLONE_REAL:
                realData = data.clone();
                break;
        }

        switch (a) {
            case USE_ABSOLUTE:
                realData = ArrayMath.abs(realData);
                break;
        }

        if (s == SortArrays.SORT_DESCENDING) {
            for (int row = 0; row < realData.length; row++) {
                Arrays.sort(realData[row]);
                realData[row] = ArrayMath.invert(realData[row]);
            }
        }

        for (int row = 0; row < realData.length; row++) {
            int nrSignificantThisRow = 0;
            for (int i = 0; i < realData[0].length; i++) {
                if (realData[row][i] >= thresholds[row]) {
                    nrSignificantThisRow++;
                }
            }
            nrSignificant[row] = nrSignificantThisRow;
        }
        return nrSignificant;
    }

    public static double[] getThresholdsPerRow(double[][] real, String[] permutedFiles, double fdr, double mvp, CloneArrays c, SortArrays s, AbsoluteValues a) throws IOException {

        checkFDR(fdr);

        int nrPermutations = permutedFiles.length;
        double[][] realData = real;
        switch (c) {
            case CLONE_BOTH:
                realData = real.clone();
                break;
            case CLONE_REAL:
                realData = real.clone();
                break;
        }

        switch (a) {
            case USE_ABSOLUTE:
                realData = ArrayMath.abs(realData);
                break;
        }

        double[] thresholds = new double[realData.length];

        if (s == SortArrays.SORT_DESCENDING) {
            for (int row = 0; row < realData.length; row++) {
                Arrays.sort(realData[row]);
                realData[row] = ArrayMath.invert(realData[row]);
            }
        }

        double[][] thresholdRowPerm = new double[realData.length][nrPermutations];

        for (int p = 0; p < nrPermutations; p++) {

            System.out.println("Processing permutation " + p);

            DoubleMatrixDataset permDataset = new DoubleMatrixDataset(permutedFiles[p]);
            double[][] permData = permDataset.getRawDataTransposed();
            if (a == AbsoluteValues.USE_ABSOLUTE) {
                permData = ArrayMath.abs(permData);
            }

            double foundFDR = 1;
            for (int row = 0; row < realData.length; row++) {

                if (s == SortArrays.SORT_DESCENDING) {
                    Arrays.sort(permData[row]);
                    permData[row] = ArrayMath.invert(permData[row]);
                }

                int nrSignificantInReal = 0;
                thresholdRowPerm[row][p] = Double.MAX_VALUE;
                for (int i = 0; i < realData[0].length; i++) {
                    double currentThreshold = realData[row][i];
                    nrSignificantInReal++;
                    int nrSignificantInPerm = 0;
                    for (int j = 0; j < permData[0].length; j++) {
                        if (permData[row][j] > currentThreshold) {
                            nrSignificantInPerm++;
                        } else {
                            break;
                        }
                    }
                    foundFDR = (double) nrSignificantInPerm / nrSignificantInReal;
//                    System.out.println(nrSignificantInReal + "\t" + nrSignificantInPerm + "\t" + foundFDR);
                    if (row == 16846) {
//                        System.out.println("ENSG00000129170 " + nrSignificantInReal + " " + nrSignificantInPerm + " " + foundFDR);
                    }
                    if (foundFDR > fdr) {
                        break;
                    } else {
                        thresholdRowPerm[row][p] = currentThreshold;
                    }
                }
            }
        }

        // 80 % confidence
        for (int row = 0; row < realData.length; row++) {
            Arrays.sort(thresholdRowPerm[row]);
            thresholds[row] = thresholdRowPerm[row][(int) (mvp * (double) nrPermutations) - 1];
        }

        return thresholds;
    }

    /**
     *
     * Sorts the given array both by rows and by columns.
     *
     * @param array array to be sorted
     * @return sorted array
     */
    private static double[][] sortDesc(double[][] array) {
        // sort data wrt each row
        for (int i = 0; i < array.length; i++) {
            Arrays.sort(array[i]);
            array[i] = ArrayMath.invert(array[i]);
        }
        // sort data wrt each column
        array = ArrayMath.transpose(array);
        for (int i = 0; i < array.length; i++) {
            Arrays.sort(array[i]);
            array[i] = ArrayMath.invert(array[i]);
        }
        array = ArrayMath.transpose(array);

//        for (int i = 0; i < 10; i++) {
//            for (int j = 0; j < 10; j++) {
//                System.out.print(array[i][j] + "\t");
//            }
//            System.out.println();
//        }

        return array;
    }

    /**
     *
     * Sorts the given arrays both by rows and by columns.
     *
     * @param array arrays to be sorted ([array][row][col])
     * @return sorted arrays
     */
    private static double[][][] sortDesc(double[][][] array) {

        for (int p = 0; p < array.length; p++) {
            // sort data wrt each row
            for (int i = 0; i < array[p].length; i++) {
                Arrays.sort(array[p][i]);
                array[p][i] = ArrayMath.invert(array[p][i]);
            }
            // sort data wrt each column
            array[p] = ArrayMath.transpose(array[p]);
            for (int i = 0; i < array[p].length; i++) {
                Arrays.sort(array[p][i]);
                array[p][i] = ArrayMath.invert(array[p][i]);
            }
            array[p] = ArrayMath.transpose(array[p]);
        }

        return array;
    }

    private static void checkNulls(double[][] real, double[][][] permuted) {
        if (real == null) {
            throw new IllegalArgumentException("Null real data given.");
        }
        if (permuted == null) {
            throw new IllegalArgumentException("Null permuted data given.");
        }
        for (int i = 0; i < permuted.length; i++) {
            if (permuted[i] == null) {
                throw new IllegalArgumentException("Null permuted data given at index " + i + ".");
            }
        }

    }

    private static void checkSizes(double[][] real, double[][][] permuted) {
        if (real.length != permuted[0].length || real[0].length != permuted[0][0].length) {
            throw new IllegalArgumentException("Matrix sizes (real and permuted) must be the same. Real: " + real.length + " x " + real[0].length
                    + ", permuted: " + permuted[0].length + " x " + permuted[0][0].length);
        }
    }

    private static void checkFDR(double fdr) {
        if (fdr < Double.MIN_VALUE || fdr >= 1) {
            throw new IllegalArgumentException("FDR must be larger than 0 and smaller than 1: " + fdr);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy