All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eqtlmappingpipeline.mixupmapper.MixupMapperVisualization Maven / Gradle / Ivy

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package eqtlmappingpipeline.mixupmapper;

import eqtlmappingpipeline.graphics.curve.ROCCurve;
import eqtlmappingpipeline.graphics.histogram.MultiFrequencyDistributionHistogram;
import eqtlmappingpipeline.graphics.map.MixupMapperHeatMap;
import eqtlmappingpipeline.mixupmapper.containers.Trio;
import eqtlmappingpipeline.mixupmapper.stat.dist.Bin;
import eqtlmappingpipeline.mixupmapper.stat.dist.DiscreteDist;
import java.io.IOException;
import java.util.HashMap;
import java.util.ArrayList;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.TTest;

/**
 *
 * @author harmjan
 */
public class MixupMapperVisualization {

    private double[][] TPR = null;
    private double[][] FPR = null;
    private double[] AUC;
    private boolean loadedFamilyData;
    // For use in calculating log base 10. A log times this is a log base 10.
    private static final double LOG10SCALE = 1 / Math.log(10);
    private HashMap sampleToTrio;
    private HashMap etg;
    private HashMap gte;
    private TextFile textOutput;

    // handy static methods
    public void createROC(DoubleMatrixDataset result, HashMap genotypeToTrait, HashMap traitToGenotype, String outputLoc) throws IOException {
        textOutput = new TextFile(outputLoc + "ROC.txt", TextFile.W);

        etg = traitToGenotype;
        gte = genotypeToTrait;

        String[] rowNames = result.rowObjects.toArray(new String[0]);
        String[] colNames = result.colObjects.toArray(new String[0]);

        double[][] matrix = result.rawData;

        int numPositives = traitToGenotype.size();

        int matrixSize = rowNames.length * colNames.length;
        int numTP = numPositives;
        int numTN = matrixSize - numTP;

        int tp = 0;
        int tn = 0;

        double[] truePosValues = new double[numTP];
        double[] trueNegValues = new double[numTN];

        if (loadedFamilyData) {

            int numParents = 0;
            for (int r = 0; r < matrix.length; r++) {

                String rowName = rowNames[r];
                String coupledGenotype = etg.get(rowName);


                Trio relatives = sampleToTrio.get(coupledGenotype);

                boolean isChild = false;

                if (relatives != null) {

                    if (relatives.child.sampleName.equals(coupledGenotype)) {
                        isChild = true;
                    }

                    // check over all columns for values of relatives
                    for (int c = 0; c < matrix[matrix.length - 1].length; c++) {

                        String colName = colNames[c];
                        boolean valueIsFromRelative = false;
                        if (isChild) {
                            if (colName.equals(relatives.parent1.sampleName) || colName.equals(relatives.parent2.sampleName)) {
                                valueIsFromRelative = true;
                            }
                        } else {
                            if (colName.equals(relatives.child.sampleName)) {
                                valueIsFromRelative = true;
                            }
                        }


                        if (valueIsFromRelative) {
                            numParents++;
                        }
                    }
                }
            }

            System.out.println("Num relatives: " + numParents);
            // System.exit(0);


            if (numParents > 0) {
                double[] truePosMinFamilyValues = new double[numTP];
                double[] trueNegMinFamilyValues = new double[numTN - (numParents)];
                double[] parentValues = new double[numParents];

                numParents = 0;
                int tpc = 0;
                int tnc = 0;

//                outLog.log(matrixSize + " - " + truePosMinFamilyValues.length + " - "+ trueNegMinFamilyValues.length + " - "+ parentValues.length);

                for (int r = 0; r < matrix.length; r++) {

                    String rowName = rowNames[r];
                    String coupledGenotype = etg.get(rowName);


                    Trio relatives = sampleToTrio.get(coupledGenotype);


                    boolean isChild = false;

                    if (relatives.child.sampleName.equals(coupledGenotype)) {
                        isChild = true;
                    }

                    for (int c = 0; c < matrix[matrix.length - 1].length; c++) {
                        String colName = colNames[c];
                        boolean valueIsFromRelative = false;

                        if (isChild) {
                            if (colName.equals(relatives.parent1.sampleName) || colName.equals(relatives.parent2.sampleName)) {
                                valueIsFromRelative = true;
                            }
                        } else {
                            if (colName.equals(relatives.child.sampleName)) {
                                valueIsFromRelative = true;
                            }
                        }

                        if (valueIsFromRelative) {
                            parentValues[numParents] = matrix[r][c];
                            numParents++;
                        } else if (etg.get(rowName) != null && etg.get(rowName).equals(colName)) {
                            truePosMinFamilyValues[tpc] = matrix[r][c];
                            tpc++;
                        } else {
                            trueNegMinFamilyValues[tnc] = matrix[r][c];
                            tnc++;
                        }
                    }
                }
                drawDistributionsWithFamilyData(truePosMinFamilyValues, trueNegMinFamilyValues, parentValues, outputLoc);
            }
        }

        for (int row = 0; row < rowNames.length; row++) {
            String rowName = rowNames[row];

            for (int col = 0; col < colNames.length; col++) {
                String colName = colNames[col];

                if (etg.get(rowName) != null && etg.get(rowName).equals(colName)) {
                    truePosValues[tp] = matrix[row][col];
                    tp++;
                } else {
                    trueNegValues[tn] = matrix[row][col];
                    tn++;
                }
            }

        }

        // create distribution for the best matches of this matrix
        double[] lowestVals = new double[numPositives];
        double[] restVals = new double[numTN];
        int tnProcessed = 0;
        for (int col = 0; col < colNames.length; col++) {
            double lowestForThisCol = Double.MAX_VALUE;
            int lowestRow = -1;
            for (int row = 0; row < rowNames.length; row++) {
                if (matrix[row][col] < lowestForThisCol) {
                    lowestRow = row;
                    lowestForThisCol = matrix[row][col];
                }
            }
            for (int row = 0; row < rowNames.length; row++) {
                if (row == lowestRow) {
                    lowestVals[col] = matrix[row][col];
                } else {
                    restVals[tnProcessed] = matrix[row][col];
                    tnProcessed++;
                }
            }
        }

        drawROCCurve(lowestVals, restVals, true, outputLoc);
        drawROCCurve(truePosValues, trueNegValues, false, outputLoc);

        // now plot the ROC
        textOutput.close();
    }

    public void plotHeatMap(DoubleMatrixDataset result, String plotTitle, String subTitle, String xAxisLabel, String yAxisLabel, double[] expPC1EigenVector, double[] genPC1EigenVector, String outdir) {
        int numRows = result.rowObjects.size();
        int numCols = result.colObjects.size();
        MixupMapperHeatMap hm = new MixupMapperHeatMap((numCols * 10) + 50, (numRows * 10) + 50, true, outdir+"HeatMap.pdf");

        hm.setLabels(result.rowObjects.toArray(new String[0]), result.colObjects.toArray(new String[0]));
        hm.setAxisLabels(xAxisLabel, yAxisLabel);
        // hm.sortByLabels(true, true);
        hm.setPC1EigenVector(genPC1EigenVector, expPC1EigenVector);

        hm.plot(result.rawData);
        hm.setTitle(plotTitle);
        if(subTitle!=null){
            hm.setSubTitle(subTitle);
        }
        

        hm.draw(outdir+"HeatMap.pdf");
        hm.close();
    }

    private void drawDistributionsWithFamilyData(double[] truePosValues, double[] trueNegValues, double[] parentValues, String outputLoc) {
        int numBins = 100;

        double[] tmpVals = new double[3];
        tmpVals[0] = JSci.maths.ArrayMath.max(truePosValues);
        tmpVals[1] = JSci.maths.ArrayMath.max(trueNegValues);
        tmpVals[2] = JSci.maths.ArrayMath.max(parentValues);

        // get the max for the distributions
        double distmax = JSci.maths.ArrayMath.max(tmpVals);

        tmpVals[0] = JSci.maths.ArrayMath.min(truePosValues);
        tmpVals[1] = JSci.maths.ArrayMath.min(trueNegValues);
        tmpVals[2] = JSci.maths.ArrayMath.min(parentValues);
        // get the min for the distributions

        double distmin = JSci.maths.ArrayMath.min(tmpVals);

        distmin = Math.floor(distmin);
        distmax = Math.ceil(distmax);

        double mu0 = JSci.maths.ArrayMath.mean(trueNegValues);
        double mu1 = JSci.maths.ArrayMath.mean(truePosValues);
        double mu2 = JSci.maths.ArrayMath.mean(parentValues);

        double sd0 = JSci.maths.ArrayMath.standardDeviation(trueNegValues);

        double var0 = JSci.maths.ArrayMath.variance(trueNegValues);
        double sd1 = JSci.maths.ArrayMath.standardDeviation(truePosValues);
        double var1 = JSci.maths.ArrayMath.variance(truePosValues);

        double sd2 = JSci.maths.ArrayMath.standardDeviation(parentValues);
        double var2 = JSci.maths.ArrayMath.variance(parentValues);


        double absmu = Math.abs(mu1 - mu0);
        double absmutpvsp = Math.abs(mu1 - mu2);

        double abssd = ((sd0 + sd1) / 2);
        double SNR = absmu / abssd;


        double absvar = (var0 + var1) / 2;
        double SNR2 = absmu / absvar;

        double absvartpvsp = (var1 + var2) / 2;
        double SNR2tpvsp = absmutpvsp / absvartpvsp;

        double SNRTPVsParents = absmutpvsp / ((sd0 + sd2) / 2);


        double test = TTest.test(trueNegValues, truePosValues);
        System.out.println("The SNR/distance between the original sample assignment frequency distribution and other sample pair frequency distribution:\t" + SNR2);
        System.out.println("The SNR/distance between the true positive and 1st degree family distributions:\t" + SNR2tpvsp);

        // make a true positive distribution
        DiscreteDist tp = new DiscreteDist();
        tp.createDist(distmin, distmax, numBins);

        // make a true negative distribution
        DiscreteDist tn = new DiscreteDist();
        tn.createDist(distmin, distmax, numBins);

        DiscreteDist pv = new DiscreteDist();
        pv.createDist(distmin, distmax, numBins);

        java.util.Arrays.sort(truePosValues);
        java.util.Arrays.sort(trueNegValues);
        java.util.Arrays.sort(parentValues);

        double range = 0.0;
        if (distmin < 0) {
            range = distmax + Math.abs(distmin);
        } else {
            range = distmax - distmin;
        }

        double binInterval = range / numBins;

        // put the true positive values in to a bin --> convert to frequency distribution
        for (int i = 0; i < truePosValues.length; i++) {
            double val = 0.0;
            if (distmin < 0.0) {
                val = truePosValues[i] + Math.abs(distmin);
            } else {
                val = truePosValues[i] - distmin;
            }

            int binNum = (int) Math.floor(val / binInterval);

            if (binNum == numBins) {
                binNum = numBins - 1;
            }
            tp.addToBin(binNum);
            // comp.addToBin(binNum);
        }

        // put the true negative values in to a bin --> convert to frequency distribution
        for (int i = 0; i < trueNegValues.length; i++) {
            double val = 0.0;
            if (distmin < 0.0) {
                val = trueNegValues[i] + Math.abs(distmin);
            } else {
                val = trueNegValues[i] - distmin;
            }
            int binNum = (int) Math.floor(val / binInterval);
            if (binNum == numBins) {
                binNum = numBins - 1;
            }
            tn.addToBin(binNum);
            // comp.addToBin(binNum);

        }

        for (int i = 0; i < parentValues.length; i++) {
            double val = 0.0;
            if (distmin < 0.0) {
                val = parentValues[i] + Math.abs(distmin);
            } else {
                val = parentValues[i] - distmin;
            }

            int binNum = (int) Math.floor(val / binInterval);

            if (binNum == numBins) {
                binNum = numBins - 1;
            }
            pv.addToBin(binNum);
            // comp.addToBin(binNum);
        }

        // create the distribution objects
        DiscreteDist[] dists = new DiscreteDist[3];
        tp.convertToFrequency(truePosValues.length);
        tn.convertToFrequency(trueNegValues.length);
        pv.convertToFrequency(parentValues.length);

        dists[0] = tp;
        dists[2] = pv;
        dists[1] = tn;

        // plot the distribution histogram based on frequency
        MultiFrequencyDistributionHistogram mfh = new MultiFrequencyDistributionHistogram(1100, 1100, true, outputLoc + "Distribution-Frequency-WithRelatives.pdf");
        mfh.plot(dists);
        mfh.draw(outputLoc + "Distribution-Frequency-WithRelatives.png");
        mfh.close();
    }

    private void drawROCCurve(double[] truePosValues, double[] trueNegValues, boolean bestmatches, String outputLoc) throws IOException {
        if (trueNegValues.length == 0 || truePosValues.length == 0) {
            System.out.println("WARNING: cannot determine ROCs since there are no values for true positives or true negatives: (" + truePosValues.length + " - " + trueNegValues.length + ")");
        } else {
            int numBins = 100;

            double[] tmpVals = new double[2];
            tmpVals[0] = JSci.maths.ArrayMath.max(truePosValues);
            tmpVals[1] = JSci.maths.ArrayMath.max(trueNegValues);

            // get the max for the distributions
            double distmax = JSci.maths.ArrayMath.max(tmpVals);

            tmpVals[0] = JSci.maths.ArrayMath.min(truePosValues);
            tmpVals[1] = JSci.maths.ArrayMath.min(trueNegValues);

            // get the min for the distributions
            double distmin = JSci.maths.ArrayMath.min(tmpVals);

            distmin = Math.floor(distmin);
            distmax = Math.ceil(distmax);

            double mu0 = JSci.maths.ArrayMath.mean(trueNegValues);
            double mu1 = JSci.maths.ArrayMath.mean(truePosValues);

            double sd0 = JSci.maths.ArrayMath.standardDeviation(trueNegValues);
            double var0 = JSci.maths.ArrayMath.variance(trueNegValues);

            double sd1 = JSci.maths.ArrayMath.standardDeviation(truePosValues);
            double var1 = JSci.maths.ArrayMath.variance(truePosValues);

            double absmu = Math.abs(mu1 - mu0);
            double abssd = ((sd0 + sd1) / 2);

            double SNR = absmu / abssd;

            double absvar = (var0 + var1) / 2;
            double SNR2 = absmu / absvar;


            double test = TTest.test(trueNegValues, truePosValues);
            System.out.println("P-value of T-test between original sample assignments and other sample pairs: " + test);
            if (bestmatches) {
                System.out.println("The SNR/distance between the best match frequency distribution and other sample pair frequency distribution:\t" + SNR2);
            } else {
                System.out.println("The SNR/distance between the original sample assignment frequency distribution and other sample pair frequency distribution:\t" + SNR2);
            }


            // make a true positive distribution
            DiscreteDist tp = new DiscreteDist();
            tp.createDist(distmin, distmax, numBins);

            // make a true negative distribution
            DiscreteDist tn = new DiscreteDist();
            tn.createDist(distmin, distmax, numBins);

//	    System.out.println("Sorting TP");
            java.util.Arrays.sort(truePosValues);

//	    System.out.println("Sorting FP");
            java.util.Arrays.sort(trueNegValues);

            double range = 0.0;
            if (distmin < 0) {
                range = distmax + Math.abs(distmin);
            } else {
                range = distmax - distmin;
            }

            double binInterval = range / numBins;

            // put the true positive values in to a bin --> convert to frequency distribution
//	    System.out.println("Binning TP");
            for (int i = 0; i < truePosValues.length; i++) {
                double val = 0.0;
                if (distmin < 0.0) {
                    val = truePosValues[i] + Math.abs(distmin);
                } else {
                    val = truePosValues[i] - distmin;
                }

                int binNum = (int) Math.floor(val / binInterval);

                if (binNum == numBins) {
                    binNum = numBins - 1;
                }
                tp.addToBin(binNum);
                // comp.addToBin(binNum);
            }

            // put the true negative values in to a bin --> convert to frequency distribution
//	    System.out.println("Binning FP");
            for (int i = 0; i < trueNegValues.length; i++) {
                double val = 0.0;
                if (distmin < 0.0) {
                    val = trueNegValues[i] + Math.abs(distmin);
                } else {
                    val = trueNegValues[i] - distmin;
                }
                int binNum = (int) Math.floor(val / binInterval);
                if (binNum == numBins) {
                    binNum = numBins - 1;
                }
                tn.addToBin(binNum);
                // comp.addToBin(binNum);

            }

            // create the distribution objects
            DiscreteDist[] dists = new DiscreteDist[2];
            tp.convertToFrequency(truePosValues.length);
            tn.convertToFrequency(trueNegValues.length);

            dists[0] = tp;
            dists[1] = tn;

            // plot the distribution histogram based on frequency
            MultiFrequencyDistributionHistogram mfh = null;
            if (bestmatches) {
                mfh = new MultiFrequencyDistributionHistogram(1100, 1100, true, outputLoc + "Distribution-Frequency-BestMatches.pdf");
            } else {
                mfh = new MultiFrequencyDistributionHistogram(1100, 1100, true, outputLoc + "Distribution-Frequency.pdf");
            }

            mfh.plot(dists);

            if (bestmatches) {
                mfh.draw(outputLoc + "Distribution-Frequency-BestMatches.pdf");

            } else {
                mfh.draw(outputLoc + "Distribution-Frequency.pdf");

            }
            mfh.close();

            // calculate the cumulative value for each bin
            tp.calcCumulative();
            tn.calcCumulative();

            // convert to frequencies
            tp.convertToFrequency(truePosValues.length);
            tn.convertToFrequency(trueNegValues.length);

            // reset the bin iterator
            tp.resetIterator();
            tn.resetIterator();

            int numPos = truePosValues.length;
            int numNeg = trueNegValues.length;

            // create new true postive rate and true negative rate bins
            TPR = new double[6][numBins];
            FPR = new double[6][numBins];
            AUC = new double[6];

            // determine the ROC for a priori chance between 0 and 1.0 with increments of 0.05
            for (int i = 1; i < 6; i++) {
                tp.resetIterator();
                tn.resetIterator();

                double apriori = i * 0.20;

                double[] tmpTPR = new double[numBins];
                double[] tmpFPR = new double[numBins];


                double tmpAUC = 0;
                int currentBinNum = 0;

                double prevTPR = 0.0; // y-axis
                double prevFPR = 0.0; // x-axis
                double curTPR = 0.0;
                double curFPR = 0.0;

                while (tp.hasNext()) {
                    Bin tpBin = tp.getNext();
                    Bin tnBin = tn.getNext();

                    double TP = tpBin.getCumulativeFrequency();
                    double FN = 1 - TP;

                    double FP = tnBin.getCumulativeFrequency();
                    double TN = 1 - FP;

                    curTPR = (TP * apriori) / (TP * apriori + FN);
                    // curTPR = (TP * apriori) / (TP * apriori + FN);
                    curFPR = FP / (FP + TN);

                    tmpTPR[currentBinNum] = curTPR;
                    tmpFPR[currentBinNum] = curFPR;



                    if (currentBinNum != 0) {
                        prevTPR = tmpTPR[currentBinNum - 1]; // y-axis
                        prevFPR = tmpFPR[currentBinNum - 1]; // x-axis
                        double squareArea = (curFPR - prevFPR) * (prevTPR); // square area
                        double triangleArea = (curFPR - prevFPR) * (curTPR - prevTPR) * 0.5;
                        tmpAUC += (squareArea + triangleArea);
                    }



                    textOutput.write(i + "\t" + apriori + "\t" + tmpTPR[currentBinNum] + "\t" + tmpFPR[currentBinNum]);
                    currentBinNum++;
                }

                // put the values for this a priori chance in the array
                FPR[i] = tmpFPR;
                TPR[i] = tmpTPR;
                AUC[i] = tmpAUC;
            }

            // plot curve based on the a priori chances
            if (!bestmatches) {
                ROCCurve roc = new ROCCurve(1024, 1024, true, outputLoc + "ROC.pdf");
                roc.plot(TPR, FPR, AUC, 0.20);
                roc.draw(outputLoc + "ROC.pdf");
                roc.close();
            }
        }
    }

    public void loadTruePositives(HashMap genotypeToExpression, HashMap expressionToGenotype) {
        etg = expressionToGenotype;
        gte = genotypeToExpression;
    }

    public void loadTruePositives(String inFile) throws IOException {
        gte = new HashMap();
        etg = new HashMap();

        TextFile in = new TextFile(inFile, TextFile.R);
        String line = "";
        while ((line = in.readLine()) != null) {
            String[] lineElems = line.split("\t");
            if (lineElems.length > 1) {
                gte.put(lineElems[0], lineElems[1]);
                etg.put(lineElems[1], lineElems[0]);
            }
        }
        in.close();

    }

    public void loadFamilyData(HashMap sampleToTrio) {
        this.sampleToTrio = sampleToTrio;

        this.loadedFamilyData = true;
    }

    private boolean isRelative(String colName, String coupledGenotype, ArrayList relatives) {
        boolean isRelative = false;
        for (int i = 0; i < relatives.size(); i++) {
            String relative = relatives.get(i);
            if (relative.equals(colName) && !relative.equals(coupledGenotype)) {
                isRelative = true;
                break;
            }
        }

        return isRelative;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy