![JAR search and dependency download from the Maven repository](/logo.png)
eqtlmappingpipeline.binarymeta.meta.MetaAnalyze Maven / Gradle / Ivy
///*
// * To change this template, choose Tools | Templates
// * and open the template in the editor.
// */
package eqtlmappingpipeline.binarymeta.meta;
//
//import eqtlmappingpipeline.gpio.binary.Dataset;
import eqtlmappingpipeline.binarymeta.meta.graphics.ZScorePlot;
import eqtlmappingpipeline.metaqtl3.FDR;
import eqtlmappingpipeline.metaqtl3.graphics.EQTLDotPlot;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.EQTL;
import umcg.genetica.io.trityper.bin.BinaryResultDataset;
import umcg.genetica.io.trityper.bin.BinaryResultProbe;
import umcg.genetica.io.trityper.bin.BinaryResultSNP;
import umcg.genetica.io.trityper.probeannotation.ProbeTranslation;
import umcg.genetica.math.stats.Descriptives;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.zip.DataFormatException;
///**
// *
// * @author harm-jan
// */
public class MetaAnalyze {
protected static MetaSettings m_settings;
protected BinaryResultDataset[] ds;
protected ArrayList probes;
protected ArrayList snps;
protected Integer[][] snpTranslation;
protected int[] pvaluedistribution;
protected EQTL[] eQTLBuffer;
protected EQTL[] finalEQTLBuffer;
protected int nrInFinalBuffer = 0;
protected double pvaluethreshold;
protected ArrayList snpChr;
protected ArrayList snpChrPos;
protected ProbeTranslation probeTranslation;
protected Integer[][] probeTranslationLookupTable;
public static String header = "PValue\t"
+ "SNPName\t"
+ "SNPChr\t"
+ "SNPChrPos\t"
+ "ProbeName\t"
+ "ProbeChr\t"
+ "ProbeCenterChrPos\t"
+ "CisTrans\t"
+ "SNPType\t"
+ "AlleleAssessed\t"
+ "OverallZScore\t"
+ "DatasetsWhereSNPProbePairIsAvailableAndPassesQC\t"
+ "DatasetsZScores\t"
+ "DatasetsNrSamples\t"
+ "IncludedDatasetsMeanProbeExpression\t"
+ "IncludedDatasetsProbeExpressionVariance\t"
+ "HGNCName\t"
+ "IncludedDatasetsCorrelationCoefficient";
protected double[] zsumPerSNP;
protected int[] zsumSNPsNumberOfProbes;
protected double[] zsumPerProbe;
protected int[] zsumProbesNumberOfSNPs;
protected ZScorePlot zs;
protected TextFile zscoretable;
protected HashSet uniqueProbes;
protected HashSet uniqueSNPs;
protected int nrTotalSamples;
protected int numSNPs;
protected int numProbes;
private HashSet probeListToAnalyze;
public void init(String settingsFile, String texttoreplace, String replacetextwith) throws IOException {
m_settings = new MetaSettings();
m_settings.parse(settingsFile, texttoreplace, replacetextwith);
probeTranslation = new ProbeTranslation();
probeTranslation.load(m_settings.getProbetranslationfile());
}
public void analyze() throws IOException, DataFormatException, Exception {
System.out.println("");
System.out.println("Starting analysis!");
String[] datasets = new String[m_settings.getDatasetnames().size()];
for (int i = 0; i < m_settings.getDatasetnames().size(); i++) {
datasets[i] = m_settings.getDatasetnames().get(i);
}
if (!m_settings.getOutput().endsWith("/")) {
m_settings.setOutput(m_settings.getOutput() + "/MetaAnalysis/");
}
if (!Gpio.exists(m_settings.getOutput())) {
Gpio.createDir(m_settings.getOutput());
}
m_settings.save();
String[] locations = new String[m_settings.getDatasetnames().size()];
for (int i = 0; i < locations.length; i++) {
locations[i] = m_settings.getDatasetlocations().get(i);
}
int permstart = 0;
int permstop = m_settings.getNrPermutations() + 1;
if (m_settings.getRunonlypermutation() > -1) {
permstart = m_settings.getRunonlypermutation();
permstop = m_settings.getRunonlypermutation() + m_settings.getNrPermutations();
}
System.out.println(permstart + " - " + permstop);
for (int perm = permstart; perm < permstop; perm++) {
ds = new BinaryResultDataset[m_settings.getDatasetlocations().size()];
runCalculationRound(perm, locations, datasets, -1);
}
if (m_settings.getRunonlypermutation() == -1) {
if (m_settings.getNrPermutations() > 0) {
FDR.calculateFDR(m_settings.getOutput(), m_settings.getNrPermutations(), m_settings.getFinalEQTLBufferMaxLength(), m_settings.getFdrthreshold(), true, null, null, FDR.FDRMethod.ALL, true);
EQTLDotPlot edp = new EQTLDotPlot();
edp.draw(m_settings.getOutput() + "/eQTLsFDR" + m_settings.getFdrthreshold() + ".txt", m_settings.getOutput() + "/DotPlot-FDR" + m_settings.getFdrthreshold() + ".pdf", EQTLDotPlot.Output.PDF); // "/eQTLsFDR" + fdrCutOff + ".txt", outputReportsDir + "/eQTLsFDR" + fdrCutOff + "DotPlot.png"
edp = null;
}
}
}
protected void initdatasets(String[] locations, int perm, int dToUse) throws IOException {
int numProbes = probeTranslation.getNumProbes();
System.out.println(numProbes + " probes found in translation table. Now matching probes across datasets..");
probeTranslationLookupTable = new Integer[ds.length][numProbes];
HashSet probesPresentInDatasets = new HashSet();
// m_settings.getSNPSelection();
HashSet selectedSNPs = null;
// if (m_settings.getSNPSelection() != null) {
// System.out.println("Selecting SNPs from: " + m_settings.getSNPSelection());
// selectedSNPs = new HashSet();
// TextFile stf = new TextFile(m_settings.getSNPSelection(), TextFile.R);
// selectedSNPs.addAll(stf.readAsArrayList());
// stf.close();
// System.out.println("Selected " + selectedSNPs.size() + " unique SNPs from file.");
// }
HashMap> selectedSNPProbePairs = null;
if (m_settings.getSNPProbeSelection() != null) {
System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection());
selectedSNPProbePairs = new HashMap>();
selectedSNPs = new HashSet();
TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R);
int ctr = 0;
String[] felems = stf.readLineElems(TextFile.tab);
while (felems != null) {
String snp = felems[0].intern();
String probe = felems[1].intern();
HashSet probesForSNP = selectedSNPProbePairs.get(snp);
if (probesForSNP == null) {
probesForSNP = new HashSet();
}
probesForSNP.add(probe.intern());
selectedSNPs.add(snp.intern());
selectedSNPProbePairs.put(snp.intern(), probesForSNP);
ctr++;
felems = stf.readLineElems(TextFile.tab);
}
stf.close();
System.out.println("Selected " + ctr + " unique SNPs from file.");
}
HashSet probesToInclude = null;
if (m_settings.getProbeselection() != null) {
TextFile tf = new TextFile(m_settings.getProbeselection(), TextFile.R);
ArrayList probesSelected = tf.readAsArrayList();
probesToInclude = new HashSet();
probesToInclude.addAll(probesSelected);
System.out.println(probesSelected.size() + " probes selected from file: " + m_settings.getProbeselection());
tf.close();
}
for (int d = 0; d < ds.length; d++) {
int probeAnnotationToUse = d;
if (dToUse != -1) {
probeAnnotationToUse = dToUse;
}
ds[d] = new BinaryResultDataset(locations[d], m_settings.getDatasetPrefix().get(probeAnnotationToUse), perm);
BinaryResultProbe[] dsProbes = ds[d].getProbes();
BinaryResultSNP[] dsSNPs = ds[d].getSnps();
nrTotalSamples += ds[d].getMaxNrSamples();
for (BinaryResultProbe p : dsProbes) {
Integer newProbeId = probeTranslation.getProbeId(m_settings.getDatasetannotations().get(probeAnnotationToUse) + p.getName());
if (newProbeId == null) {
System.out.println(m_settings.getDatasetannotations().get(probeAnnotationToUse) + "\t" + p.getName() + " probe not present in annotationfile...?");
System.exit(0);
}
if (probesToInclude == null || probesToInclude.contains("" + newProbeId)) {
probesPresentInDatasets.add(newProbeId);
probeTranslationLookupTable[d][newProbeId] = p.getId();
} else {
probeTranslationLookupTable[d][newProbeId] = null;
}
}
for (BinaryResultSNP s : dsSNPs) {
if (!uniqueSNPs.contains(s.getName().intern()) && (selectedSNPs == null || selectedSNPs.contains(s.getName().intern()))) {
snps.add(s.getName().intern());
snpChr.add(s.getChr());
snpChrPos.add(s.getChrpos());
uniqueSNPs.add(s.getName().intern());
}
}
// ds[d].clearProbeObjects();
}
TextFile probesPresentFile = new TextFile(m_settings.getOutput() + "ProbesPresentInAtLeastOneDataset.txt", TextFile.W);
System.out.println(probesPresentInDatasets.size() + "\tunique probes present in all datasets.");
Integer[] presentNrs = probesPresentInDatasets.toArray(new Integer[0]);
for (Integer i : presentNrs) {
probesPresentFile.writeln("" + i);
}
probesPresentFile.close();
int selectedprobes = 0;
// if (m_settings.getProbeselection() != null) {
// TextFile tf = new TextFile(m_settings.getProbeselection(), TextFile.R);
//
// String[] probesSelected = tf.readAsArray();
//
// tf.close();
//
//// for(int d=0; d();
// probeListToAnalyze.addAll(Arrays.asList(probesSelected));
//
// System.out.println(probeListToAnalyze.size() + " unique probes selected for meta-analysis, from the file " + m_settings.getProbeselection());
// int probePresenceCounter = 0;
// for (int q = 0; q < probeTranslationLookupTable[0].length; q++) {
// if (probeListToAnalyze != null) {
// if (!probeListToAnalyze.contains("" + q)) {
//
// for (int d = 0; d < ds.length; d++) {
// probeTranslationLookupTable[d][q] = null;
// }
// } else {
// probePresenceCounter++;
// }
// }
// }
// System.out.println(probePresenceCounter + "\tprobes selected.");
//
// } else {
for (int q = 0; q < probeTranslationLookupTable[0].length; q++) {
int probePresenceCounter = 0;
if (probeListToAnalyze != null) {
if (!probeListToAnalyze.contains("" + q)) {
for (int d = 0; d < ds.length; d++) {
probeTranslationLookupTable[d][q] = null;
}
}
}
for (int i = 0; i < ds.length; i++) {
if (probeTranslationLookupTable[i][q] != null && ds[i].getMaxNrSamples() > m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) {
probePresenceCounter++;
}
}
if (m_settings.getProbeDatasetPresenceThreshold() > 0 && probePresenceCounter < m_settings.getProbeDatasetPresenceThreshold()) {
for (int d = 0; d < ds.length; d++) {
probeTranslationLookupTable[d][q] = null;
}
} else if (probePresenceCounter > 0) {
selectedprobes++;
}
}
System.out.println("Selected " + selectedprobes + " probes from at least " + m_settings.getProbeDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples.");
// }
// numProbes = uniqueProbes.size();
numSNPs = uniqueSNPs.size();
initSNPTranslation();
}
protected void initSNPTranslation() throws IOException {
snpTranslation = new Integer[ds.length][numSNPs];
for (int d = 0; d < ds.length; d++) {
BinaryResultProbe[] dsProbes = ds[d].getProbes();
BinaryResultSNP[] dsSNPs = ds[d].getSnps();
for (int i = 0; i < snps.size(); i++) {
BinaryResultSNP s = ds[d].getStringToSNP().get(snps.get(i));
if (s != null) {
snpTranslation[d][i] = s.getId();
} else {
snpTranslation[d][i] = null;
}
}
}
int selectedsnps = 0;
HashSet selectedSNPs = null;
if (m_settings.getSNPSelection() != null) {
System.out.println("Selecting SNPs from: " + m_settings.getSNPSelection());
selectedSNPs = new HashSet();
TextFile stf = new TextFile(m_settings.getSNPSelection(), TextFile.R);
selectedSNPs.addAll(stf.readAsArrayList());
stf.close();
System.out.println("Selected " + selectedSNPs.size() + " unique SNPs from file.");
}
TextFile selectedSNPFile = new TextFile(m_settings.getOutput() + "/SelectedSNPs.txt", TextFile.W);
for (int s = 0; s < numSNPs; s++) {
String snpName = snps.get(s);
int snppresencecounter = 0;
for (int d = 0; d < ds.length; d++) {
if (snpTranslation[d][s] != null && ds[d].getMaxNrSamples() >= m_settings.getProbeAndSNPPresenceFilterSampleThreshold()) {
snppresencecounter++;
}
}
if (m_settings.getSnpDatasetPresenceThreshold() > 0 && snppresencecounter < m_settings.getSnpDatasetPresenceThreshold() || (selectedSNPs != null && !selectedSNPs.contains(snpName))) {
for (int d = 0; d < ds.length; d++) {
snpTranslation[d][s] = null;
}
} else if (snppresencecounter > 0) {
selectedSNPFile.writeln(snps.get(s));
selectedsnps++;
}
}
selectedSNPFile.close();
System.out.println("Selected " + selectedsnps + " snps from at least " + m_settings.getSnpDatasetPresenceThreshold() + " datasets of at least " + m_settings.getProbeAndSNPPresenceFilterSampleThreshold() + " samples.");
}
protected void runCalculationRound(int perm, String[] locations, String[] datasets, int dToUse) throws IOException, Exception {
pvaluedistribution = null;
eQTLBuffer = null;
finalEQTLBuffer = null;
nrInFinalBuffer = 0;
uniqueProbes = new HashSet();
uniqueSNPs = new HashSet();
int numDatasets = ds.length;
probes = new ArrayList();
snps = new ArrayList();
snpChr = new ArrayList();
snpChrPos = new ArrayList();
nrTotalSamples = 0;
String[] probeName = probeTranslation.getProbes();
probes.addAll(Arrays.asList(probeName));
initdatasets(locations, perm, dToUse);
String zsName = null;
if (m_settings.isMakezscoreplot()) {
zs = new ZScorePlot();
String[] datasets2 = new String[datasets.length + 1];
System.arraycopy(datasets, 0, datasets2, 0, datasets.length);
datasets2[datasets2.length - 1] = "Meta-Analysis";
if (perm > 0) {
zsName = m_settings.getOutput() + "ZScoreComparison-PermutationRound" + perm;
} else {
zsName = m_settings.getOutput() + "ZScoreComparison";
}
zs.init(numDatasets + 1, datasets2, true, zsName);
}
Descriptives.lookupSqrt(nrTotalSamples);
pvaluedistribution = new int[m_settings.getNrOfBins()];
eQTLBuffer = new EQTL[10000];
finalEQTLBuffer = new EQTL[0];
pvaluethreshold = Double.MAX_VALUE;
zsumPerSNP = new double[snps.size()];
zsumSNPsNumberOfProbes = new int[snps.size()];
zsumPerProbe = new double[probes.size()];
zsumProbesNumberOfSNPs = new int[probes.size()];
System.out.println("Performing the meta-analysis now: ");
// System.out.println(snps.size() + "\t unique SNPs present in at least " + m_settings.snpDatasetPresenceThreshold + " datasets");
// System.out.println(probes.size() + "\t unique Probespresent in at least " + m_settings.probeDatasetPresenceThreshold + " datasets");
System.out.println(nrTotalSamples + "\t total samples");
if (m_settings.isMakezscoretable()) {
if (perm == 0) {
zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable.txt.gz", TextFile.W, (10 * 1048576));
} else {
zscoretable = new TextFile(m_settings.getOutput() + "metazscoretable-Permutation" + perm + ".txt.gz", TextFile.W, (10 * 1048576));
}
StringBuilder zscoreout = new StringBuilder();
zscoreout.append("SNP\tAlleleCoding\tAssessedAllele");
for (int i = 0; i < probes.size(); i++) {
zscoreout.append("\t").append(probes.get(i));
}
zscoretable.writeln(zscoreout.toString());
}
HashMap> selectedSNPProbePairs = null;
if (m_settings.getSNPProbeSelection() != null) {
System.out.println("Selecting SNP-probe pairs from: " + m_settings.getSNPProbeSelection());
selectedSNPProbePairs = new HashMap>();
TextFile stf = new TextFile(m_settings.getSNPProbeSelection(), TextFile.R);
int ctr = 0;
String[] felems = stf.readLineElems(TextFile.tab);
while (felems != null) {
String snp = felems[0];
String probe = felems[1];
HashSet probesForSNP = selectedSNPProbePairs.get(snp);
if (probesForSNP == null) {
probesForSNP = new HashSet();
}
probesForSNP.add(probe);
selectedSNPProbePairs.put(snp, probesForSNP);
ctr++;
felems = stf.readLineElems(TextFile.tab);
}
stf.close();
System.out.println("Selected " + ctr + " unique SNPs from file.");
}
/// init calculation pool,
int nrProcs = Runtime.getRuntime().availableProcessors();
if (m_settings.getNrThresds() > 0) {
if (m_settings.getNrThresds() > nrProcs) {
m_settings.setNrThresds(nrProcs);
}
nrProcs = m_settings.getNrThresds();
}
System.out.println("Using " + nrProcs + " threads :)");
MetaAnalysisCalculationThread[] calcPool = new MetaAnalysisCalculationThread[nrProcs];
LinkedBlockingQueue loaderQueue = new LinkedBlockingQueue(nrProcs);
MetaAnalysisLoaderThread loaderThread = new MetaAnalysisLoaderThread(loaderQueue, snpTranslation, snps, ds);
loaderThread.setName("Loader");
loaderThread.start();
PValueThreshold p = new PValueThreshold();
LinkedBlockingQueue resultQueue = new LinkedBlockingQueue(nrProcs);
MetaAnalysisResultThread resultThread = new MetaAnalysisResultThread(resultQueue, m_settings, datasets, perm, zscoretable, p, snps, selectedSNPProbePairs, probes);
resultThread.setName("Result");
resultThread.start();
for (int i = 0; i < nrProcs; i++) {
calcPool[i] = new MetaAnalysisCalculationThread(loaderQueue, resultQueue, snps, probes, snpChr, snpChrPos, ds, snpTranslation, probeTranslationLookupTable, probeTranslation, m_settings, zs, p);
calcPool[i].setName("MetaCalc-" + i);
calcPool[i].start();
}
// kill the threads
try {
loaderThread.join();
MetaAnalysisWorkPackage poison = new MetaAnalysisWorkPackage(0, 0);
poison.poisonTheWell();
for (int threadNum = 0; threadNum < calcPool.length; threadNum++) {
try {
loaderQueue.put(poison);
} catch (InterruptedException ex) {
ex.printStackTrace();
}
}
for (int threadNum = 0; threadNum < calcPool.length; threadNum++) {
calcPool[threadNum].join();
}
resultQueue.put(poison);
resultThread.join();
} catch (InterruptedException e) {
System.err.println("Exception: Thread main interrupted.");
}
if (m_settings.isMakezscoretable()) {
// if (perm == 0) {
zscoretable.close();
// }
}
if (zs != null) {
zs.write(zsName);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy