All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eqtlmappingpipeline.causalinference.IVAnalysis Maven / Gradle / Ivy

The newest version!
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package eqtlmappingpipeline.causalinference;

import eqtlmappingpipeline.metaqtl3.containers.Settings;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import umcg.genetica.console.ConsoleGUIElems;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Triple;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDatasetSettings;
import umcg.genetica.math.stats.Correlation;
import umcg.genetica.math.stats.Regression;
import umcg.genetica.math.stats.TwoStepLeastSquares;

/**
 *
 * @author harmjan
 */
public class IVAnalysis {

    protected HashSet> snpProbeCombos = new HashSet>();
    protected Settings m_settings;
    // snpProbeCombinationList (SNP cis trans)
    protected TriTyperGeneticalGenomicsDataset[] m_gg;
    protected final String outDir;

    public IVAnalysis(String xmlSettingsFile,
            String ingt, String inexp, String inexpplatform, String inexpannot,
            String gte, String out, int perm, String snpProbeCombinationList, boolean parametric) throws IOException, Exception {

        if (xmlSettingsFile == null && (ingt == null || inexp == null)) {
            throw new IllegalArgumentException("Supply settingsfile for IV Analysis");
        }

        if (snpProbeCombinationList == null) {
            throw new IllegalArgumentException("Supply SNP probe combination file for IV Analysis");
        }

        loadSNPProbeCombos(snpProbeCombinationList);
        if (snpProbeCombos.isEmpty()) {
            throw new IllegalArgumentException("SNP Probe combination file is empty!");
        }

        initializeDatasets(xmlSettingsFile, ingt, inexp, inexpplatform, inexpannot, gte, out, perm, parametric);

        if (!out.endsWith("/")) {
            out += "/";
        }

        Gpio.createDir(out);
        this.outDir = out;


    }

    private void loadSNPProbeCombos(String snpProbeCombinationList) throws IOException {
        System.out.println("Loading SNP-Cis-Trans combo's from file: " + snpProbeCombinationList);
        TextFile tf = new TextFile(snpProbeCombinationList, TextFile.R);
        String[] elems = tf.readLineElems(TextFile.tab);
        while (elems != null) {
            if (elems.length >= 3) {
                Triple t = new Triple(elems[0], elems[1], elems[2]);
                snpProbeCombos.add(t);
            }
            elems = tf.readLineElems(TextFile.tab);
        }
        tf.close();
        System.out.println("Loaded " + snpProbeCombos.size() + " combinations.");
    }

    private void initializeDatasets(String xmlSettingsFile,
            String ingt, String inexp, String inexpplatform, String inexpannot,
            String gte, String out, int perm, boolean parametric) throws IOException, Exception {


        if (m_settings == null && xmlSettingsFile == null && ingt != null) {

            // check the settings
            boolean settingsOk = true;
            if (inexp == null || inexp.trim().length() == 0) {
                System.err.println("ERROR: you did not specify a gene expression file.");
                settingsOk = false;
            }

            if (inexpannot != null && inexpannot.trim().length() != 0) {
                if (inexpplatform == null || inexpplatform.trim().length() == 0) {
                    System.err.println("ERROR: you specified " + inexpannot + " but you did not specify the platform (using --inexpplatform)!");
                    settingsOk = false;
                }
            }

            if (out == null || out.trim().length() == 0) {
                System.err.println("ERROR: you did not specify an output directory.");
                settingsOk = false;
            } else if (!out.endsWith("/")) {
                out += "/";
            }

            if (!settingsOk) {
                System.out.println();
                System.exit(0);
            }

            m_settings = new Settings();
            TriTyperGeneticalGenomicsDatasetSettings s = new TriTyperGeneticalGenomicsDatasetSettings();

            s.name = "Dataset";
            s.expressionLocation = inexp;
            s.expressionplatform = inexpplatform;
            s.probeannotation = inexpannot;
            s.genotypeLocation = ingt;
            s.genotypeToExpressionCoupling = gte;
            s.cisAnalysis = true;
            s.transAnalysis = true;

            if (parametric) {
                System.out.println("Running parametric analysis.");
                m_settings.performParametricAnalysis = true;
            }

            m_settings.cisAnalysis = true;
            m_settings.transAnalysis = true;

            boolean cistrans = false;
            if (m_settings.cisAnalysis && m_settings.transAnalysis) {
                m_settings.confineProbesToProbesMappingToAnyChromosome = true;
            }

            m_settings.datasetSettings = new ArrayList();

            m_settings.regressOutEQTLEffectFileName = null;
            m_settings.datasetSettings.add(s);
            m_settings.nrThreads = 1;
            m_settings.cisAnalysis = true;
            m_settings.transAnalysis = true;
            m_settings.nrPermutationsFDR = perm;

            if (!Gpio.exists(out)) {
                Gpio.createDir(out);
            }

            m_settings.outputReportsDir = out;
            m_settings.createTEXTOutputFiles = true;
            m_settings.createBinaryOutputFiles = false;

        } else if (m_settings == null && xmlSettingsFile != null) {
            // parse settings
            m_settings = new Settings();
            m_settings.load(xmlSettingsFile);
        } else if (m_settings == null) {
            System.out.println("ERROR: No input specified");
            System.exit(0);
        }

        // initialize dataset
        if (!m_settings.cisAnalysis && !m_settings.transAnalysis) {
            System.err.println("! WARNING: defaulting to CIS analysis (override with --trans or --trans and --cis))");
            m_settings.cisAnalysis = true;
        }

        m_settings.writeSettingsToDisk();


        int numDatasets = m_settings.datasetSettings.size();
        m_gg = new TriTyperGeneticalGenomicsDataset[numDatasets];

        int nrOfDatasetsWithGeneExpressionData = 0;
        for (int i = 0; i < numDatasets; i++) {

            System.out.println("- Loading dataset: " + m_settings.datasetSettings.get(i).name + "");
            m_settings.datasetSettings.get(i).confineProbesToProbesMappingToAnyChromosome = m_settings.confineProbesToProbesMappingToAnyChromosome;
            System.out.println(ConsoleGUIElems.LINE);
            m_gg[i] = new TriTyperGeneticalGenomicsDataset(m_settings.datasetSettings.get(i));

            if (m_gg[i].isExpressionDataLoadedCorrectly()) {
                nrOfDatasetsWithGeneExpressionData++;
            }

        }

        if (nrOfDatasetsWithGeneExpressionData == 0) {
            System.out.println("Error: none of your datasets contain any gene expression data for the settings you have specified");
            System.exit(0);
        }


    }

    public void run() throws IOException {
        for (int d = 0; d < m_gg.length; d++) {
            // now test all triples
            SNPLoader snpLoader = m_gg[d].getGenotypeData().createSNPLoader();
            int[] indWGA = m_gg[d].getExpressionToGenotypeIdArray();

            for (int perm = 0; perm < m_settings.nrPermutationsFDR + 1; perm++) {
                String outfile;
                if (perm == 0) {
                    outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-RealData.txt";
                } else {
                    outfile = outDir + m_gg[d].getSettings().name + "_IVAnalysis-PermutationRound-" + perm + ".txt";
                    m_gg[d].permuteSampleLables(m_settings.randomNumberGenerator);
                }
                TextFile out = new TextFile(outfile, TextFile.W);
                Iterator> it = snpProbeCombos.iterator();
                Triple next = it.next();
                ProgressBar pb = new ProgressBar(snpProbeCombos.size(), "Running IV Analysis - Permutation " + perm);
//                out.writeln("SNP\tSNPId\tCisProbe\tCisProbeId\tCisGeneName\tTransProbe\tTransProbeId\tTransGeneName\tSNPTrans-Beta\tSNPTrans-Alpha\tSNPTrans-SE\tCisTrans-Beta\tCisTrans-Alpha\tCisTrans-SE\tIV-Beta\tIV-Alpha\tIV-SE\tcorcistrans\tr2cistrans\tcorResCisResTrans\tr2rescisrestrans\tcorCisResTrans\tcorSNPTrans\tcorSNPResTrans\tcorrSNPResCisResTrans");
                out.writeln("SNP\t"
                        + "CisArrayAddress\t"
                        + "CisGeneName"
                        + "TransArrayAddress\t"
                        + "TransGeneName"
                        + "cisEQTL\t"
                        + "r2cisEQTL\t"
                        + "transEQTL\t"
                        + "r2TransEQTL\t"
                        + "corCisTrans\t"
                        + "r2CisTrans\t"
                        + "corResCisResTrans\t"
                        + "r2ResCisResTrans\t"
                        + "IV-Beta\t"
                        + "IV-SE");
                while (next != null) {
                    String snp = next.getLeft();
                    String cisprobe = next.getMiddle();
                    String transprobe = next.getRight();

                    Integer snpId = m_gg[d].getGenotypeData().getSnpToSNPId().get(snp);
                    Integer cisProbeId = m_gg[d].getExpressionData().getProbeToId().get(cisprobe);
                    Integer transProbeId = m_gg[d].getExpressionData().getProbeToId().get(transprobe);

                    if (snpId == -9 || cisProbeId == -9 || transProbeId == -9) {
//                        out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" + cisProbeId + "\t" + null + "\t" + transprobe + "\t" + transProbeId + "\t" + null + "\t" + null + "\t" + null + "\t" + null + "\t" + null + "\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA");
                    } else {

                        SNP snpObj = m_gg[d].getGenotypeData().getSNPObject(snpId);
                        snpLoader.loadGenotypes(snpObj);
                        snpLoader.loadDosage(snpObj);



                        double[] origCisVals = m_gg[d].getExpressionData().getMatrix()[cisProbeId];
                        double[] origTransVals = m_gg[d].getExpressionData().getMatrix()[transProbeId];

                        int nrequal = 0;
                        int calledGenotypes = 0;
                        for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
                            int genotypeId = indWGA[i];
                            short gt = snpObj.getGenotypes()[genotypeId];
//                            if(m_gg[d].getExpressionData().getIndividuals()[i].equals(m_gg[d].getGenotypeData().getIndividuals()[genotypeId])){
//                                nrequal++;
//                            }
//                            System.out.println(m_gg[d].getExpressionData().getIndividuals()[i] + "\t" + m_gg[d].getGenotypeData().getIndividuals()[genotypeId]);
                            if (genotypeId > -1 && gt > -1) {
                                calledGenotypes++;

                            }
                        }
//                        System.out.println(nrequal+" inds equal..");


                        double[] genotypes = new double[calledGenotypes];
                        double[] cisvals = new double[calledGenotypes];
                        double[] transvals = new double[calledGenotypes];

                        calledGenotypes = 0;
                        for (int i = 0; i < m_gg[d].getExpressionData().getIndividuals().length; i++) {
                            int genotypeId = indWGA[i];
                            short gt = snpObj.getGenotypes()[genotypeId];
                            if (genotypeId > -1 && gt > -1) {
                                genotypes[calledGenotypes] = snpObj.getDosageValues()[genotypeId];
                                cisvals[calledGenotypes] = origCisVals[i];
                                transvals[calledGenotypes] = origTransVals[i];
                                calledGenotypes++;
                            }
                        }




                        double corrCisTrans = JSci.maths.ArrayMath.correlation(cisvals, transvals);

                        double[] snpCisRCs = Regression.getLinearRegressionCoefficients(genotypes, cisvals);
                        double[] snpTransRCs = Regression.getLinearRegressionCoefficients(genotypes, transvals);

                        double[] resCis = new double[cisvals.length];
                        double[] resTrans = new double[cisvals.length];
                        for (int i = 0; i < resCis.length; i++) {
                            resCis[i] = cisvals[i] - snpCisRCs[0] * genotypes[i];
                            resTrans[i] = transvals[i] - snpTransRCs[0] * genotypes[i];
                        }

                        double corrResCisResTrans = JSci.maths.ArrayMath.correlation(resCis, resTrans);



                        double[] cisTransRCs = Regression.getLinearRegressionCoefficients(cisvals, transvals);
                        double[] resCisTransRCs = Regression.getLinearRegressionCoefficients(resCis, transvals);
                        double[] resCisTrans = new double[cisvals.length];
                        double[] resResCisTrans = new double[cisvals.length];
                        for (int i = 0; i < resCisTrans.length; i++) {
                            resCisTrans[i] = transvals[i] - (cisTransRCs[0] * cisvals[i]);
                            resResCisTrans[i] = transvals[i] - (resCisTransRCs[0] * cisvals[i]);
                        }

                        double corrCisResTrans = Correlation.correlate(cisvals, resCisTrans);

                        double transEQTL = Correlation.correlate(genotypes, transvals);
                        double corrSNPResTrans = Correlation.correlate(genotypes, resCisTrans);
                        double corrSNPResCisResTrans = Correlation.correlate(genotypes, resResCisTrans);


//                        if (snp.equals("rs12718597") && transprobe.equals("4900309")) {
////                            for (int i = 0; i < genotypes.length; i++) {
////                                System.out.println(genotypes[i] + "\t" + cisvals[i] + "\t" + transvals[i]);
////                            }
//                            double[] result = TwoStepLeastSquares.tsls(transvals, cisvals, genotypes, true);
//                            System.exit(0);
//                        }

//                        double m1 = JSci.maths.ArrayMath.mean(genotypes);
//                        double m2 = JSci.maths.ArrayMath.mean(transvals);
//                        double m3 = JSci.maths.ArrayMath.mean(cisvals);
//
//                        double s1 = JSci.maths.ArrayMath.standardDeviation(genotypes);
//                        double s2 = JSci.maths.ArrayMath.standardDeviation(transvals);
//                        double s3 = JSci.maths.ArrayMath.standardDeviation(cisvals);
//
//                        for (int i = 0; i < genotypes.length; i++) {
//                            genotypes[i] -= m1;
//                            genotypes[i] /= s1;
//
//                            transvals[i] -= m2;
//                            transvals[i] /= s2;
//
//                            cisvals[i] -= m3;
//                            cisvals[i] /= s3;
//                        }
//
//                        RankDoubleArray rda = new RankDoubleArray();
//                        genotypes = rda.rank(genotypes);
//                        cisvals = rda.rank(cisvals);
//                        transvals = rda.rank(transvals);

                        // we now have all the values we need.. perform two-step OLS

                        // perform regression of snp against trans probe
//                        double[] stats = Regression.getLinearRegressionCoefficients(genotypes, transvals);
//                        double[] stats2 = Regression.getLinearRegressionCoefficients(cisvals, transvals);

                        double[] result = TwoStepLeastSquares.tsls(transvals, cisvals, genotypes);

                        double cisEQTL = Correlation.correlate(genotypes, cisvals);

//                        out.writeln(snp + "\t" + snpId + "\t" + cisprobe + "\t" + cisProbeId + "\t" + m_gg[d].getExpressionData().getAnnotation()[cisProbeId] + "\t" + transprobe + "\t" + transProbeId + "\t" + m_gg[d].getExpressionData().getAnnotation()[transProbeId] + "\t" + stats[0] + "\t" + stats[1] + "\t" + stats[2] + "\t" + stats2[0] + "\t" + stats2[1] + "\t" + stats2[2] + "\t" + result[0] + "\t" + result[1] + "\t" + result[2] + "\t" + corrCisTrans + "\t" + r2CisTrans + "\t" + corrResCisResTrans + "\t" + r2ResCisResTrans + "\t" + corrCisResTrans + "\t" + corSNPTrans + "\t" + corrSNPResTrans+"\t"+corrSNPResCisResTrans);

                        out.write(snp
                                + "\t" + cisprobe
                                + "\t" + m_gg[d].getExpressionData().getAnnotation()[cisProbeId]
                                + "\t" + transprobe
                                + "\t" + m_gg[d].getExpressionData().getAnnotation()[transProbeId]
                                + "\t" + cisEQTL
                                + "\t" + (cisEQTL * cisEQTL)
                                + "\t" + transEQTL
                                + "\t" + (transEQTL * transEQTL)
                                + "\t" + corrCisTrans
                                + "\t" + (corrCisTrans * corrCisTrans)
                                + "\t" + corrResCisResTrans
                                + "\t" + (corrResCisResTrans * corrResCisResTrans)
                                + "\t" + result[0]
                                + "\t" + result[2]);
                        snpObj.clearGenotypes();
                    }

                    if (it.hasNext()) {
                        next = it.next();
                    } else {
                        next = null;
                    }
                    pb.iterate();
                }
                pb.close();
                out.close();
            }

            snpLoader.close();


        }

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy