eqtlmappingpipeline.conditionalanalysis.ConditionalAnalysis Maven / Gradle / Ivy
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package eqtlmappingpipeline.conditionalanalysis;
import eqtlmappingpipeline.metaqtl3.EQTLRegression;
import eqtlmappingpipeline.metaqtl3.FDR;
import eqtlmappingpipeline.metaqtl3.MetaQTL3;
import gnu.trove.set.hash.THashSet;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import umcg.genetica.console.ConsoleGUIElems;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.EQTL;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDatasetSettings;
import umcg.genetica.io.trityper.QTLTextFile;
import umcg.genetica.io.trityper.util.ChrAnnotation;
import umcg.genetica.io.trityper.util.DetermineLD;
import umcg.genetica.text.Strings;
/**
*
* @author harmjan
*/
public class ConditionalAnalysis extends MetaQTL3 {
private boolean skipinitialSNPMapping = false;
private boolean skipAlleQTLMapping = false;
public void setSkipInitialSNPMapping() {
skipinitialSNPMapping = true;
}
public void run(String xmlSettingsFile, String texttoreplace, String texttoreplacewith,
String ingt, String inexp, String inexpplatform, String inexpannot, String gte,
String out, boolean cis, boolean trans, int perm, boolean textout, boolean binout, String snpfile, Integer threads) throws IOException, Exception {
initialize(xmlSettingsFile, texttoreplace, texttoreplacewith, null, null, ingt, inexp, inexpplatform, inexpannot, gte, out, cis, trans, perm, textout, binout, snpfile, threads, null, null, null, true, true, null, null);
double fdrthreshold = m_settings.fdrCutOff;
m_settings.provideBetasAndStandardErrors = true;
m_settings.provideFoldChangeData = true;
String origOutputDir = m_settings.outputReportsDir;
m_settings.outputReportsDir = origOutputDir + "SNP-Initial" + Gpio.getFileSeparator();
m_settings.plotOutputDirectory = origOutputDir + "SNP-Initial" + Gpio.getFileSeparator() + "plots" + Gpio.getFileSeparator();
Gpio.createDir(m_settings.plotOutputDirectory);
Gpio.createDir(m_settings.outputReportsDir);
String[] gzfiles = Gpio.getListOfFiles(origOutputDir, "gz");
for (String filename : gzfiles) {
File file = new File(filename);
String f = file.getName();
Gpio.moveFile(filename, m_settings.outputReportsDir + f);
}
System.out.println(m_settings.outputReportsDir + " is used for output");
if (!skipinitialSNPMapping && !skipAlleQTLMapping) {
// run the eQTL analysis over the SNPs --> determine strongest probes for each SNP
mapEQTLs();
}
HashSet tsSNPConfine = m_settings.tsSNPsConfine;
// take the significant eQTL Probes
String fdrSignificantFile = "eQTLsFDR" + fdrthreshold + "-ProbeLevel.txt";
String fdrAllFile = "eQTLsFDR-ProbeLevel.txt.gz";
FDR.FDRMethod fdrType = m_settings.fdrType;
if (fdrType.equals(FDR.FDRMethod.GENELEVEL)) {
fdrSignificantFile = "eQTLsFDR" + fdrthreshold + "-GeneLevel.txt";
fdrAllFile = "eQTLsFDR-GeneLevel.txt.gz";
} else if (fdrType.equals(FDR.FDRMethod.ALL)) {
fdrSignificantFile = "eQTLsFDR" + fdrthreshold + "-ProbeLevel.txt";
fdrAllFile = "eQTLsFDR-ProbeLevel.txt.gz";
} else if (fdrType.equals(FDR.FDRMethod.FULL)) {
fdrSignificantFile = "eQTLsFDR" + fdrthreshold + ".txt";
fdrAllFile = "eQTLsFDR.txt.gz";
}
QTLTextFile etf = new QTLTextFile(origOutputDir + "/SNP-Initial/" + fdrSignificantFile, QTLTextFile.R);
EQTL[] eQTLsSNPsUnconditional = etf.read();
etf.close();
System.out.println("Loaded: " + eQTLsSNPsUnconditional.length + " eQTL from " + origOutputDir + "/SNP-Initial/" + fdrSignificantFile);
// run the eQTL analysis over the probes -- > get strongest effect per probe
m_settings.outputReportsDir = origOutputDir + "/Probe-Initial/";
m_settings.plotOutputDirectory = origOutputDir + "/Probe-Initial/plots/";
Gpio.createDir(m_settings.outputReportsDir);
Gpio.createDir(m_settings.plotOutputDirectory);
m_settings.tsSNPsConfine = null;
m_settings.tsProbesConfine = new THashSet();
HashMap> esnpProbePairs = new HashMap>();
HashMap> gsnpProbePairs = new HashMap>();
for (EQTL e : eQTLsSNPsUnconditional) {
m_settings.tsProbesConfine.add(e.getProbe());
HashSet probes = gsnpProbePairs.get(e.getRsName());
if (probes == null) {
probes = new HashSet();
}
probes.add(e.getProbe());
gsnpProbePairs.put(e.getRsName(), probes);
}
for (TriTyperGeneticalGenomicsDatasetSettings s : m_settings.datasetSettings) {
s.tsProbesConfine = m_settings.tsProbesConfine;
}
System.out.println(m_settings.tsProbesConfine.size() + "\tprobes have significant effects.");
if (!skipAlleQTLMapping) {
reinit(null);
mapEQTLs();
}
etf = new QTLTextFile(origOutputDir + "/Probe-Initial/" + fdrAllFile, QTLTextFile.R);
EQTL[] eQTLsProbesUnconditional = etf.read();
etf.close();
HashSet eSNPsConfine = new HashSet();
HashMap probeeQTLMap = new HashMap();
for (EQTL e : eQTLsProbesUnconditional) {
if (!probeeQTLMap.containsKey(e.getProbe())) {
probeeQTLMap.put(e.getProbe(), e);
HashSet probes = esnpProbePairs.get(e.getRsName());
if (probes == null) {
probes = new HashSet();
}
eSNPsConfine.add(e.getRsName());
probes.add(e.getProbe());
esnpProbePairs.put(e.getRsName(), probes);
}
}
//eQTLsProbesUnconditional
// run conditional analysis: test only SNP/Probe pairs, condition on top probe effect
m_settings.outputReportsDir = origOutputDir + "/SNPs-Conditional/";
m_settings.plotOutputDirectory = origOutputDir + "/SNPs-Conditional/plots/";
Gpio.createDir(m_settings.plotOutputDirectory);
Gpio.createDir(m_settings.outputReportsDir);
ArrayList eQTLsToRegress = new ArrayList();
HashSet probes = new HashSet();
for (EQTL e : eQTLsProbesUnconditional) {
if (!probes.contains(e.getProbe())) {
eQTLsToRegress.add(e);
probes.add(e.getProbe());
}
}
EQTL[] eQTLsToRegressArr = eQTLsToRegress.toArray(new EQTL[0]);
// m_settings.regressOutEQTLEffectFileName = origOutputDir + "/Probe-Initial/eQTLProbesFDR" + fdrthreshold + "-ProbeLevel.txt";
m_settings.tsSNPsConfine = tsSNPConfine;
m_settings.tsSNPProbeCombinationsConfine = gsnpProbePairs;
m_settings.performEQTLAnalysisOnSNPProbeCombinationSubset = true;
if (!skipAlleQTLMapping) {
reinit(eQTLsToRegressArr);
mapEQTLs();
}
// run conditional analysis: test ony SNP/Probe pairs, condition on snp effect
m_settings.outputReportsDir = origOutputDir + "/Probes-Conditional/";
m_settings.plotOutputDirectory = origOutputDir + "/Probes-Conditional/plots/";
Gpio.createDir(m_settings.plotOutputDirectory);
Gpio.createDir(m_settings.outputReportsDir);
// m_settings.regressOutEQTLEffectFileName = origOutputDir + "/SNP-Initial/eQTLProbesFDR" + fdrthreshold + "-ProbeLevel.txt";
eQTLsToRegress = new ArrayList();
probes = new HashSet();
for (EQTL e : eQTLsSNPsUnconditional) {
if (!probes.contains(e.getProbe())) {
eQTLsToRegress.add(e);
probes.add(e.getProbe());
}
}
eQTLsToRegressArr = eQTLsToRegress.toArray(new EQTL[0]);
m_settings.tsSNPsConfine = eSNPsConfine;
m_settings.tsSNPProbeCombinationsConfine = esnpProbePairs;
m_settings.performEQTLAnalysisOnSNPProbeCombinationSubset = true;
if (!skipAlleQTLMapping) {
reinit(eQTLsToRegressArr);
mapEQTLs();
}
System.out.println(ConsoleGUIElems.DOUBLELINE);
System.out.println("Done with eQTL mappings.. Now summarizing results.");
// now summarize the results in some clever way.
HashMap, Double> fdrSNPsConditional = readFDRFile(origOutputDir + "/SNPs-Conditional/"+fdrAllFile);
HashMap, Double> fdrProbesConditional = readFDRFile(origOutputDir + "/Probes-Conditional/"+fdrAllFile);
HashMap, EQTL> snpConditionaleQTLMap = readEQTLFile(origOutputDir + "/SNPs-Conditional/"+fdrAllFile);
HashMap, EQTL> probeConditionaleQTLMap = readEQTLFile(origOutputDir + "/Probes-Conditional/"+fdrAllFile);
// HashMap, Double> fdrSNPsConditional = readFDRFile(origOutputDir + "/SNPs-Conditional/eQTLsFDR.txt.gz");
// HashMap, Double> fdrSNPsConditional = readFDRFile(origOutputDir + "/SNPs-Conditional/eQTLsFDR.txt.gz");
DetermineLD ld = new DetermineLD();
TextFile outfile = new TextFile(origOutputDir + "/ConditionalAnalysis.txt", TextFile.W);
outfile.writeln("Probe\tProbe Chr\tProbe ChrPos\tProbe HUGO\t"
+ "gSNP\tgSNP Chr\tgSNP ChrPos\tgSNP alleles\tgSNP allele assessed\t"
+ "eSNP\teSNP Chr\teSNP ChrPos\teSNP alleles\teSNP allele assessed\t"
+ "LD (rsquared, dprime)\t"
+ "eSNP N\tgSNP N\t"
+ "gSNP-PVal\teSNP-PVal\tgSNP-PVal-Conditional\teSNP-PVal-Conditional\t"
+ "gSNP-ZMeta\teSNP-ZMeta\tgSNP-ZMeta-Conditional\teSNP-ZMeta-Conditional\t"
+ "gSNP-BMeta\teSNP-BMeta\tgSNP-BMeta-Conditional\teSNP-BMeta-Conditional\t"
+ "gSNP-B\teSNP-B\tgSNP-B-Conditional\teSNP-B-Conditional\t"
// + "gSNP-FC\teSNP-FC\tgSNP-FC-Conditional\teSNP-FC-Conditional\t"
+ "gSNP-FDR\teSNP-FDR\tgSNP-FDR-Conditional\teSNP-FDR-Conditional\t");
SNPLoader[] loaders = new SNPLoader[m_gg.length];
for (int d = 0; d < m_gg.length; d++) {
loaders[d] = m_gg[d].getGenotypeData().createSNPLoader();
}
for (EQTL snpUnconditionalEQTL : eQTLsSNPsUnconditional) {
EQTL probeunconditionaleqtl = probeeQTLMap.get(snpUnconditionalEQTL.getProbe());
if (probeunconditionaleqtl == null) {
System.err.println("ERROR: probe " + snpUnconditionalEQTL.getProbe() + " was not tested in probe-unconditional analysis.");
System.exit(0);
}
Pair gSNPPair = new Pair(snpUnconditionalEQTL.getRsName(), snpUnconditionalEQTL.getProbe());
Pair eSNPPair = new Pair(probeunconditionaleqtl.getRsName(), probeunconditionaleqtl.getProbe());
// System.out.println(e.getRsName() + "\t" + e.getProbe() + "\t" + probeunconditionaleqtl.getRsName());
EQTL snpconditionaleqtl = snpConditionaleQTLMap.get(gSNPPair);
EQTL probeconditionaleqtl = probeConditionaleQTLMap.get(eSNPPair);
String gsnp = snpUnconditionalEQTL.getRsName();
String esnp = probeunconditionaleqtl.getRsName();
String ldstr = "";
String genename = snpUnconditionalEQTL.getProbeHUGO();
if (genename == null) {
genename = "-";
}
String output = snpUnconditionalEQTL.getProbe() + "\t" + ChrAnnotation.parseByte(snpUnconditionalEQTL.getProbeChr()) + "\t" + snpUnconditionalEQTL.getProbeChrPos() + "\t" + genename
+ "\t" + gsnp + "\t" + ChrAnnotation.parseByte(snpUnconditionalEQTL.getRsChr()) + "\t" + snpUnconditionalEQTL.getRsChrPos() + "\t" + snpUnconditionalEQTL.getAlleles() + "\t" + snpUnconditionalEQTL.getAlleleAssessed()
+ "\t" + esnp + "\t" + ChrAnnotation.parseByte(probeunconditionaleqtl.getRsChr()) + "\t" + probeunconditionaleqtl.getRsChrPos() + "\t" + probeunconditionaleqtl.getAlleles() + "\t" + probeunconditionaleqtl.getAlleleAssessed() + "\t";
if (gsnp.equals(esnp)) {
ldstr = "1";
} else {
String[] ldStrArr = new String[m_gg.length];
for (int ds = 0; ds < m_gg.length; ds++) {
ldStrArr[ds] = "-";
TriTyperGeneticalGenomicsDataset d = m_gg[ds];
Integer gsnpid = d.getGenotypeData().getSnpToSNPId().get(gsnp);
Integer esnpid = d.getGenotypeData().getSnpToSNPId().get(esnp);
if (gsnpid != -9 && esnpid != -9) {
SNP gsnpobj = d.getGenotypeData().getSNPObject(gsnpid);
SNP esnpobj = d.getGenotypeData().getSNPObject(esnpid);
if (gsnpobj != null && esnpobj != null) {
loaders[ds].loadGenotypes(gsnpobj);
loaders[ds].loadGenotypes(esnpobj);
double r2 = ld.getRSquared(gsnpobj, esnpobj, d.getGenotypeData(), DetermineLD.RETURN_R_SQUARED, DetermineLD.INCLUDE_CASES_AND_CONTROLS, false);
double dprime = ld.getRSquared(gsnpobj, esnpobj, d.getGenotypeData(), DetermineLD.RETURN_D_PRIME, DetermineLD.INCLUDE_CASES_AND_CONTROLS, false);
ldStrArr[ds] = "" + r2 + ", " + dprime;
}
gsnpobj.clearGenotypes();
esnpobj.clearGenotypes();
}
}
ldstr = Strings.concat(ldStrArr, Strings.semicolon);
}
Integer[] samples = snpUnconditionalEQTL.getDatasetsSamples();
int sampleSizeSNPUnconditional = 0;
for (int i = 0; i < samples.length; i++) {
if (samples[i] != null) {
sampleSizeSNPUnconditional += samples[i];
}
}
samples = probeunconditionaleqtl.getDatasetsSamples();
int sampleSizeProbeUnconditional = 0;
for (int i = 0; i < samples.length; i++) {
if (samples[i] != null) {
sampleSizeProbeUnconditional += samples[i];
}
}
String eSNPSample = "" + sampleSizeSNPUnconditional;
String gSNPSample = "" + sampleSizeProbeUnconditional;
String probeunconditionalpval = "" + probeunconditionaleqtl.getPvalue();
String snpconditionalpval = "-";
String snpconditionalz = "-";
String snpconditionalmetabeta = "-";
String snpconditionalbeta = "-";
String snpconditionalfc = "-";
String snpconditionalfdr = "-";
String probeconditionalpval = "-";
String probeconditionalz = "-";
String probeconditionalmetabeta = "-";
String probeconditionalbeta = "-";
String probeconditionalfc = "-";
String probeconditionalfdr = "-";
if (snpconditionaleqtl != null) {
snpconditionalpval = "" + snpconditionaleqtl.getPvalue();
snpconditionalmetabeta = snpconditionaleqtl.getMetaBeta();
snpconditionalbeta = snpconditionaleqtl.getBeta();
snpconditionalfc = snpconditionaleqtl.getFC();
snpconditionalz = "" + snpconditionaleqtl.getZscore();
snpconditionalfdr = "" + fdrSNPsConditional.get(gSNPPair);
}
if (probeconditionaleqtl != null) {
probeconditionalpval = "" + probeconditionaleqtl.getPvalue();
probeconditionalmetabeta = probeconditionaleqtl.getMetaBeta();
probeconditionalbeta = probeconditionaleqtl.getBeta();
probeconditionalfc = probeconditionaleqtl.getFC();
probeconditionalz = "" + probeconditionaleqtl.getZscore();
probeconditionalfdr = "" + fdrProbesConditional.get(eSNPPair);
}
output += ldstr
+ "\t" + eSNPSample
+ "\t" + gSNPSample
+ "\t" + snpUnconditionalEQTL.getPvalue()
+ "\t" + probeunconditionalpval
+ "\t" + snpconditionalpval
+ "\t" + probeconditionalpval
+ "\t" + snpUnconditionalEQTL.getZscore()
+ "\t" + probeunconditionaleqtl.getZscore()
+ "\t" + snpconditionalz
+ "\t" + probeconditionalz
+ "\t" + snpUnconditionalEQTL.getMetaBeta()
+ "\t" + probeunconditionaleqtl.getMetaBeta()
+ "\t" + snpconditionalmetabeta
+ "\t" + probeconditionalmetabeta
+ "\t" + snpUnconditionalEQTL.getBeta()
+ "\t" + probeunconditionaleqtl.getBeta()
+ "\t" + snpconditionalbeta
+ "\t" + probeconditionalbeta
+ "\t" + snpUnconditionalEQTL.getFDR()
+ "\t" + probeunconditionaleqtl.getFDR()
+ "\t" + snpconditionalfdr
+ "\t" + probeconditionalfdr;
outfile.writeln(output);
System.out.println(output);
}
for (int ds = 0; ds < m_gg.length; ds++) {
loaders[ds].close();
loaders[ds].close();
}
outfile.close();
}
private void reinit(EQTL[] regressthis) throws IOException, Exception {
m_gg = null;
int numDatasets = m_settings.datasetSettings.size();
m_gg = new TriTyperGeneticalGenomicsDataset[numDatasets];
numAvailableInds = 0;
for (int i = 0; i < numDatasets; i++) {
System.out.println("- Loading dataset: " + m_settings.datasetSettings.get(i).name + "");
System.out.println(ConsoleGUIElems.LINE);
m_gg[i] = new TriTyperGeneticalGenomicsDataset(m_settings.datasetSettings.get(i));
}
for (int i = 0; i < numDatasets; i++) {
if (!m_settings.performParametricAnalysis) {
m_gg[i].getExpressionData().rankAllExpressionData(m_settings.equalRankForTies);
}
m_gg[i].getExpressionData().calcAndSubtractMean();
m_gg[i].getExpressionData().calcMeanAndVariance();
numAvailableInds += m_gg[i].getExpressionToGenotypeIdArray().length;
}
if (regressthis != null && regressthis.length > 0) {
EQTLRegression eqr = new EQTLRegression();
eqr.regressOutEQTLEffects(regressthis, m_gg);
}
System.out.println(ConsoleGUIElems.LINE);
System.out.println("");
// sort datasets on size to increase efficiency of random reads..
// for some reason, it is faster to load the largest dataset first.
Arrays.sort(m_gg, Collections.reverseOrder());
System.out.println("Accumulating available data...");
System.out.print(ConsoleGUIElems.LINE);
createSNPList();
createProbeList();
// create WorkPackage objects
determineSNPProbeCombinations();
if (m_workPackages == null || m_workPackages.length == 0) {
System.err.println("Error: No work detected");
System.exit(0);
}
// determine number of threadss
if (m_settings.nrThreads == null) {
m_settings.nrThreads = Runtime.getRuntime().availableProcessors();
} else {
int numProcs = Runtime.getRuntime().availableProcessors();
if (m_settings.nrThreads > numProcs || m_settings.nrThreads < 1) {
m_settings.nrThreads = numProcs;
}
}
if (m_workPackages.length < m_settings.nrThreads) {
m_settings.nrThreads = m_workPackages.length;
}
printSummary();
}
public void runGetResultsForAllSNPs(String xmlSettingsFile, String texttoreplace, String texttoreplacewith,
String ingt, String inexp, String inexpplatform, String inexpannot, String gte,
String out, boolean cis, boolean trans, int perm, boolean textout, boolean binout, String snpfile, Integer threads) throws IOException, Exception {
// initialize..
// initialize(xmlSettingsFile, texttoreplace, texttoreplacewith, ingt, inexp, inexpplatform, inexpannot, gte, out, cis, trans, perm, textout, binout, snpfile, threads, null, null, null);
//
// double fdrthreshold = m_settings.fdrCutOff;
// m_settings.provideBetasAndStandardErrors = true;
// m_settings.provideFoldChangeData = true;
// String origOutputDir = m_settings.outputReportsDir;
// m_settings.outputReportsDir = origOutputDir + "/SNP-Initial/";
// m_settings.plotOutputDirectory = origOutputDir + "/SNP-Initial/plots/";
// Gpio.createDir(m_settings.plotOutputDirectory);
// Gpio.createDir(m_settings.outputReportsDir);
//
// String[] gzfiles = Gpio.getListOfFiles(origOutputDir, "gz");
// for (String filename : gzfiles) {
// String f = filename.replace(origOutputDir, "");
// Gpio.moveFile(filename, m_settings.outputReportsDir + f);
// }
//
//
// System.out.println(m_settings.outputReportsDir + " is used for output");
//
// if (!skipinitialSNPMapping) {
// // run the eQTL analysis over the SNPs --> determine strongest probes for each SNP
// mapEQTLs();
// }
//
// HashSet tsSNPConfine = m_settings.tsSNPsConfine;
//
// // the result is a set of SNP probe combo's. This is a SNP-centric analysis, so we want to load the top probe per SNP, irrespective of the significance.
// HashSet probesToTest = new HashSet();
// TextFile tf = new TextFile(origOutputDir + "/SNP-Initial/eQTLs.txt", TextFile.R);
// tf.readLine();
// String[] elems = tf.readLineElems(TextFile.tab);
// while (elems != null) {
// probesToTest.add(elems[4]);
// elems = tf.readLineElems(TextFile.tab);
// }
// tf.close();
//
//
// // run the eQTL analysis over the probes -- > get strongest effect per probe
// m_settings.outputReportsDir = origOutputDir + "/Probe-Initial/";
// m_settings.plotOutputDirectory = origOutputDir + "/Probe-Initial/plots/";
// Gpio.createDir(m_settings.outputReportsDir);
// Gpio.createDir(m_settings.plotOutputDirectory);
// m_settings.tsSNPsConfine = null;
// m_settings.tsProbesConfine = probesToTest;
// for (TriTyperGeneticalGenomicsDatasetSettings s : m_settings.datasetSettings) {
// s.tsProbesConfine = m_settings.tsProbesConfine;
// }
// System.out.println(m_settings.tsProbesConfine.size() + "\tprobes have significant effects.");
// reinit();
// mapEQTLs();
//
// // CONDITIONAL ANALYSES
// // now determine top effect per probe..
// // we'll regress out those effects later on.
// runConditional(origOutputDir + "/SNP-Initial/", origOutputDir + "/Probe-Initial/", origOutputDir + "/SNP-Conditional/");
// // now repeat the same procedure, but now conditioning for the gSNP effect
// runConditional(origOutputDir + "/Probe-Initial/", origOutputDir + "/SNP-Initial/", origOutputDir + "/Probes-Conditional/");
//
//
// // now summarize the hell out of the results...
//
// // read initial snp probe pairs
// eQTLObject[] snpInitial = readeQTLs(origOutputDir + "/SNP-Initial/eQTLsFDR.txt");
// // read top effects for probes
// eQTLObject[] probeInitial = readeQTLs(origOutputDir + "/Probe-Initial/eQTLsFDR.txt");
// // read snp probe after conditioning
// eQTLObject[] snpConditional = readeQTLs(origOutputDir + "/SNP-Conditional/eQTLsFDR.txt");
// eQTLObject[] probeConditional = readeQTLs(origOutputDir + "/Probe-Conditional/eQTLsFDR.txt");
//
// TextFile outfile = new TextFile(origOutputDir + "/ConditionalAnalysis.txt", TextFile.W);
//
// String header = "probe\thugo\tgsnp\talleles\talleleassessed"
// + "\tesnp\tesnpalleles\tesnpallelesasssesed"
// + "\tgsnpp\tesnpp\tgsnpcondp\tesnpcondp"
// + "\tgsnpmetab\tesnpmetab\tgsnpmetabcond\tesnpmetabcond"
// + "\tgsnpb\tesnpb\tgsnpbcond\tesnpbcond"
// + "\tgsnpss\tesnpss\tgsnpbconss\tesnpbconss"
// + "\tgsnpsFDR\tesnpsFDR\tgsnpbconsFDR\tesnpbconsFDR";
// outfile.writeln(header);
//
// for (eQTLObject ei : snpInitial) {
// // iterate through probeintitial
// eQTLObject matchingpi = null;
// eQTLObject matchingsc = null;
// eQTLObject matchingpc = null;
// for (eQTLObject pi : probeInitial) {
// if (pi.probe.equals(ei.probe)) {
// // this is the thing to print
// matchingpi = pi;
// break;
// }
// }
//
// // now the snp conditional on top probe
// for (eQTLObject ec : snpConditional) {
// if (ec.snp.equals(ei.snp) && ec.snp.equals(ei.snp)) {
// matchingsc = ec;
// break;
// }
// }
//
// // now the probe conditional on top snp
// for (eQTLObject pc : probeConditional) {
// if (pc.snp.equals(matchingpi.snp) && pc.probe.equals(matchingpi.probe)) {
// matchingpc = pc;
// break;
// }
// }
//
// String output = ei.probe
// + "\t" + ei.hugo
// + "\t" + ei.snp
// + "\t" + ei.alleles
// + "\t" + ei.alleleAssessed
// + "\t" + matchingpi.snp
// + "\t" + matchingpi.alleles
// + "\t" + matchingpi.alleleAssessed
// + "\t" + ei.p
// + "\t" + matchingpi.p
// + "\t" + matchingsc.p
// + "\t" + matchingpc.p
// + "\t" + ei.metab
// + "\t" + matchingpi.metab
// + "\t" + matchingsc.metab
// + "\t" + matchingpc.metab
// + "\t" + ei.b
// + "\t" + matchingpi.b
// + "\t" + matchingsc.b
// + "\t" + matchingpc.b
// + "\t" + ei.samplesize
// + "\t" + matchingpi.samplesize
// + "\t" + matchingsc.samplesize
// + "\t" + matchingpc.samplesize
// + "\t" + ei.FDR
// + "\t" + matchingpi.FDR
// + "\t" + matchingsc.FDR
// + "\t" + matchingpc.FDR;
//
//
// outfile.writeln(output);
//
//
// }
// outfile.close();
}
private eQTLObject[] readeQTLs(String eqtlfile) throws IOException {
ArrayList output = new ArrayList();
TextFile tf = new TextFile(eqtlfile, TextFile.R);
tf.readLine(); // skip header
String[] elems = tf.readLineElems(TextFile.tab);
while (elems != null) {
eQTLObject e = new eQTLObject();
e.p = elems[0];
e.snp = elems[1];
e.snpchr = elems[2];
e.snpchrpos = elems[3];
e.probe = elems[4];
e.probechr = elems[5];
e.probechrpos = elems[6];
e.alleleAssessed = elems[QTLTextFile.ASESSEDALLELE];
e.alleles = elems[QTLTextFile.ASESSEDALLELE - 1];
e.samplesize = elems[QTLTextFile.DATASETSIZE];
e.metab = elems[QTLTextFile.METAB];
e.metaz = elems[QTLTextFile.METAZ];
e.b = elems[QTLTextFile.DATASETB];
e.hugo = elems[QTLTextFile.HUGO];
e.FDR = elems[elems.length - 1];
output.add(e);
elems = tf.readLineElems(TextFile.tab);
}
tf.close();
//
//
return output.toArray(new eQTLObject[0]);
}
private HashMap, Double> readFDRFile(String file) throws IOException {
HashMap, Double> output = new HashMap, Double>();
TextFile tfSNPsCond = new TextFile(file, TextFile.R);
tfSNPsCond.readLine();
String[] elems = tfSNPsCond.readLineElems(TextFile.tab);
while (elems != null) {
String snp = elems[1];
String probe = elems[4];
String fdrStr = elems[elems.length - 1];
Double fdr = Double.parseDouble(fdrStr);
output.put(new Pair(snp, probe), fdr);
elems = tfSNPsCond.readLineElems(TextFile.tab);
}
tfSNPsCond.close();
return output;
}
private HashMap, EQTL> readEQTLFile(String eqtlfilename) throws IOException {
HashMap, EQTL> output = new HashMap, EQTL>();
TextFile efile = new TextFile(eqtlfilename, TextFile.R);
efile.readLine();
String[] elems = efile.readLineElems(TextFile.tab);
while (elems != null) {
EQTL e = new EQTL();
e.setPvalue(Double.parseDouble(elems[0]));
e.setRsName(elems[1]);
e.setRsChr(ChrAnnotation.parseChr(elems[2]));
e.setRsChrPos(Integer.parseInt(elems[3]));
e.setProbe(elems[4]);
e.setProbeChr(ChrAnnotation.parseChr(elems[5]));
e.setProbeChrPos(Integer.parseInt(elems[6]));
e.setAlleles(elems[8]);
e.setAlleleAssessed(elems[9]);
if (elems.length > 20) {
if (!elems[20].equals("" + null)) {
e.setFC(elems[20]);
}
}
e.setZscore(Double.parseDouble(elems[QTLTextFile.METAZ]));
e.setMetaBeta(elems[QTLTextFile.METAB]);
e.setBeta(elems[QTLTextFile.DATASETB]);
output.put(new Pair(elems[1], elems[4]), e);
elems = efile.readLineElems(TextFile.tab);
}
efile.close();
return output;
}
void setSkipAllEQTLMapping() {
skipAlleQTLMapping = true;
}
private class eQTLObject {
String p;
String snp;
String probe;
String metaz;
String metab;
String z;
String b;
String hugo;
String samplesize;
String alleleAssessed;
String alleles;
String probechr;
String probechrpos;
String snpchr;
String snpchrpos;
String FDR;
/*
Probe
Probe Chr
Probe ChrPos
Probe HUGO
gSNP
gSNP Chr
gSNP ChrPos
gSNP alleles
gSNP allele assessed
eSNP
eSNP Chr
eSNP ChrPos
eSNP alleles
eSNP allele assessed
LD
gSNP-PVal
eSNP-PVal
gSNP-PVal-Conditional
eSNP-PVal-Conditional
gSNP-ZMeta
eSNP-ZMeta
gSNP-ZMeta-Conditional
eSNP-ZMeta-Conditional
gSNP-BMeta
eSNP-BMeta
gSNP-BMeta-Conditional
eSNP-BMeta-Conditional
gSNP-B
eSNP-B
gSNP-B-Conditional
eSNP-B-Conditional
gSNP-FC
eSNP-FC
gSNP-FC-Conditional
eSNP-FC-Conditional
gSNP-FDR
eSNP-FDR
gSNP-FDR-Conditional
eSNP-FDR-Conditional
*/
}
// private void runConditional(String input, String condition, String output) throws IOException, Exception {
// // now determine top effect per probe..
// // we'll regress out those effects later on.
// determineTopProbeEffect(condition);
// // determine SNP-Probe combo's to test
// HashMap> snpProbePairs = determineSNPProbeCombos(input + "/eQTLs.txt");
// // run conditional analysis: test only SNP/Probe pairs, condition on top probe effect
// m_settings.outputReportsDir = output;
// m_settings.plotOutputDirectory = output + "/plots/";
// Gpio.createDir(m_settings.plotOutputDirectory);
// Gpio.createDir(m_settings.outputReportsDir);
// m_settings.regressOutEQTLEffectFileName = condition + "/eQTLsTopProbeEffects.txt";
// m_settings.tsSNPProbeCombinationsConfine = snpProbePairs;
// m_settings.performEQTLAnalysisOnSNPProbeCombinationSubset = true;
// reinit();
// mapEQTLs();
// }
private void determineTopProbeEffect(String outdir) throws IOException {
TextFile efile = new TextFile(outdir + "/eQTLs.txt", TextFile.R);
TextFile efileout = new TextFile(outdir + "/eQTLsTopProbeEffects.txt", TextFile.W);
efileout.writeln(efile.readLine());
String[] elems = efile.readLineElems(TextFile.tab);
HashSet probesVisited = new HashSet();
while (elems != null) {
if (!probesVisited.contains(elems[4])) {
efileout.writeln(Strings.concat(elems, Strings.tab));
probesVisited.add(elems[4]);
}
elems = efile.readLineElems(TextFile.tab);
}
efileout.close();
efile.close();
}
private HashMap> determineSNPProbeCombos(String filename) throws IOException {
HashMap> gsnpProbePairs = new HashMap>();
TextFile efile = new TextFile(filename, TextFile.R);
efile.readLine();
String[] elems = efile.readLineElems(TextFile.tab);
while (elems != null) {
String snp = elems[1];
String probe = elems[4];
HashSet probes = gsnpProbePairs.get(snp);
if (probes == null) {
probes = new HashSet();
}
probes.add(probe);
gsnpProbePairs.put(snp, probes);
elems = efile.readLineElems(TextFile.tab);
}
efile.close();
return gsnpProbePairs;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy