
eqtlmappingpipeline.metaqtl3.ResultProcessorThread Maven / Gradle / Ivy
The newest version!
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package eqtlmappingpipeline.metaqtl3;
import eqtlmappingpipeline.metaqtl3.containers.Settings;
import cern.colt.matrix.tint.IntMatrix2D;
import eqtlmappingpipeline.metaqtl3.containers.QTL;
import eqtlmappingpipeline.metaqtl3.containers.Result;
import eqtlmappingpipeline.metaqtl3.containers.WorkPackage;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.LinkedBlockingQueue;
import javax.xml.bind.annotation.adapters.HexBinaryAdapter;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.io.bin.BinaryFile;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.QTLTextFile;
import umcg.genetica.io.trityper.util.BaseAnnot;
/**
*
* @author harmjan
*/
public class ResultProcessorThread extends Thread {
// private BinaryResultProbeSummary[] m_dsProbeSummary = null;
// private BinaryResultSNPSummary[] m_dsSNPSummary = null;
// private BinaryGZipFloatMatrix[] m_dsZScoreMatrix = null;
// private double m_pvaluethreshold = 2;
// private int m_maxNrResults = 150000;
// private final int m_totalNumberOfProbes;
// private Result[] m_BinaryBuffer;
// private final double m_pvaluePlotThreshold;
// private final static char m_tab = '\t';
// private int nrProcessed;
// private QTL[] tmpEQTLBuffer;
// private int m_eQTLBufferCounter = 0;
// private int m_result_counter = 0;
// private int nrSet;
// private int nrInFinalBuffer = 0;
// private TextFile[] zScoreBinaryFile;
// private TextFile zScoreMetaAnalysisFile;
// private int m_numdatasets = 0;
long nrZ = 0;
private boolean m_createBinaryFiles = false;
private TriTyperGeneticalGenomicsDataset[] m_gg = null;
private boolean m_cisOnly;
private IntMatrix2D m_probeTranslation;
private int m_midpointprobedist;
private final String m_outputdir;
private final boolean m_permuting;
private final int m_permutationround;
private final boolean m_createTEXTFiles;
private final String[] m_probeList;
private final LinkedBlockingQueue m_queue;
private final WorkPackage[] m_availableWorkPackages;
private long nrTestsPerformed = 0;
private QTL[] finalEQTLs;
private double maxSavedPvalue = -Double.MAX_VALUE;
private int locationToStoreResult = 0;
private boolean bufferHasOverFlown = false;
private boolean sorted = false;
private int m_maxResults = 0;
public double highestP = Double.MAX_VALUE;
private int nrSNPsTested = 0;
private final boolean m_useAbsoluteZScore;
private BinaryFile[] zScoreBinaryFile;
private BinaryFile zScoreMetaAnalysisFile;
private TextFile zScoreMetaAnalysisRowNamesFile;
private TextFile[] zScoreRowNamesFile;
public ResultProcessorThread(int nrThreads, LinkedBlockingQueue queue, boolean chargeOutput,
TriTyperGeneticalGenomicsDataset[] gg, Settings settings, IntMatrix2D pprobeTranslation,
boolean permuting, int round, String[] snplist, String[] probelist, WorkPackage[] allPackages) {
m_availableWorkPackages = allPackages;
m_createBinaryFiles = settings.createBinaryOutputFiles;
m_createTEXTFiles = settings.createTEXTOutputFiles;
m_useAbsoluteZScore = settings.useAbsoluteZScorePValue;
m_queue = queue;
m_outputdir = settings.outputReportsDir;
m_permuting = permuting;
m_permutationround = round;
m_probeTranslation = pprobeTranslation;
m_gg = gg;
m_midpointprobedist = settings.ciseQTLAnalysMaxSNPProbeMidPointDistance;
m_cisOnly = (settings.cisAnalysis && !settings.transAnalysis);
m_probeList = probelist;
m_maxResults = settings.maxNrMostSignificantEQTLs;
int tmpbuffersize = (m_maxResults / 10);
if (tmpbuffersize == 0) {
tmpbuffersize = 10;
} else if (tmpbuffersize > 250000) {
tmpbuffersize = 250000;
}
// m_totalNumberOfProbes = probelist.length;
// m_pvaluePlotThreshold = settings.plotOutputPValueCutOff;
// tmpEQTLBuffer = new QTL[tmpbuffersize];
// m_result_counter = 0;
// m_numdatasets = m_gg.length;
finalEQTLs = new QTL[(m_maxResults + tmpbuffersize)];
nrSNPsTested = 0;
}
@Override
public void run() {
// nrProcessed = 0;
try {
if (m_createBinaryFiles) {
zScoreBinaryFile = new BinaryFile[m_gg.length];
zScoreRowNamesFile = new TextFile[m_gg.length];
if (m_gg.length > 1) {
String metaAnalysisFileName = m_outputdir + "MetaAnalysis";
if (m_permuting) {
metaAnalysisFileName += "-PermutationRound-" + m_permutationround;
}
zScoreMetaAnalysisFile = new BinaryFile(metaAnalysisFileName + ".dat", BinaryFile.W);
// write magic number
if (m_cisOnly) {
zScoreMetaAnalysisFile.writeInt(1);
} else {
zScoreMetaAnalysisFile.writeInt(0);
}
zScoreMetaAnalysisRowNamesFile = new TextFile(metaAnalysisFileName + "-RowNames.txt.gz", TextFile.W);
zScoreMetaAnalysisRowNamesFile.writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled");
TextFile tf = new TextFile(metaAnalysisFileName + "-ColNames.txt.gz", TextFile.W);
tf.writeList(Arrays.asList(m_probeList));
tf.close();
}
for (int d = 0; d < m_gg.length; d++) {
String fileName = m_outputdir + m_gg[d].getSettings().name;
if (m_permuting) {
fileName += "-PermutationRound-" + m_permutationround;
}
zScoreBinaryFile[d] = new BinaryFile(fileName + ".dat", BinaryFile.W);
// write magic number
if (m_cisOnly) {
zScoreBinaryFile[d].writeInt(1);
} else {
zScoreBinaryFile[d].writeInt(0);
}
TextFile tf = new TextFile(fileName + "-ColNames.txt.gz", TextFile.W);
tf.writeList(Arrays.asList(m_probeList));
tf.close();
zScoreRowNamesFile[d] = new TextFile(fileName + "-RowNames.txt.gz", TextFile.W);
zScoreRowNamesFile[d].writeln("SNP\tAlleles\tMinorAllele\tAlleleAssessed\tNrCalled\tMaf\tHWE\tCallRate");
}
}
ProgressBar progressbar = new ProgressBar(m_availableWorkPackages.length);
boolean poison = false;
while (!poison) {
WorkPackage wp = m_queue.take();
Result r = wp.results;
if (wp.getHasResults()) {
nrSNPsTested++;
}
if (r.poison) {
poison = true;
} else if (r.pvalues != null) {
nrTestsPerformed += wp.getNumTested();
double[] pvalues = r.pvalues;
//Is this working?
if (m_createBinaryFiles && !poison) {
writeBinaryResult(r);
}
if (m_createTEXTFiles && !poison) {
// classic textual output.
for (int p = 0; p < pvalues.length; p++) {
double pval = pvalues[p];
if (!Double.isNaN(pval) && pval <= highestP) {
double[][] corr = r.correlations;
double[] correlations = new double[corr.length];
double[] zscores = new double[corr.length];
int[] samples = new int[corr.length];
double[] fc = new double[corr.length];
double[] beta = new double[corr.length];
double[] betase = new double[corr.length];
for (int d = 0; d < correlations.length; d++) {
if (Double.isNaN(corr[d][p])) {
correlations[d] = Double.NaN;
zscores[d] = Double.NaN;
samples[d] = -9;
fc[d] = Double.NaN;
beta[d] = Double.NaN;
betase[d] = Double.NaN;
} else {
correlations[d] = corr[d][p];
if (m_useAbsoluteZScore) {
zscores[d] = Math.abs(r.zscores[d][p]);
} else {
zscores[d] = r.zscores[d][p];
}
samples[d] = r.numSamples[d];
fc[d] = r.fc[d][p];
beta[d] = r.beta[d][p];
betase[d] = r.se[d][p];
}
}
//
byte allele = -1;
byte[] alleles = null;
SNP[] snps = wp.getSnps();
for (int d = 0; d < snps.length; d++) {
if (snps[d] != null) {
allele = snps[d].getMinorAllele();
alleles = snps[d].getAlleles();
break;
}
}
if (alleles == null) {
System.err.println("SNP has null alleles: ");
for (int d = 0; d < snps.length; d++) {
if (snps[d] != null) {
allele = snps[d].getMinorAllele();
System.err.println(allele);
alleles = snps[d].getAlleles();
System.err.println(alleles);
break;
}
}
}
double Zfinal = r.finalZScore[p];
double finalbeta = r.finalBeta[p];
double finalbetase = r.finalBetaSe[p];
int pid;
if (m_cisOnly) {
pid = wp.getProbes()[p];
} else {
pid = p;
}
addEQTL(pid, wp.getId(), pval, Zfinal, correlations, zscores, samples, alleles, allele, fc, beta, betase, finalbeta, finalbetase);
}
}
}
}
if (wp.results != null) {
wp.clearResults();
}
progressbar.iterate();
}
progressbar.close();
//Is this working?
if (m_createBinaryFiles) {
String fileName = "check";
if (m_permuting) {
fileName += "-PermutationRound-" + m_permutationround;
}
fileName += ".md5";
HexBinaryAdapter md5Parser = new HexBinaryAdapter();
BufferedWriter md5writer = new BufferedWriter(new FileWriter(m_outputdir + fileName));
for (int d = 0; d < m_gg.length; d++) {
zScoreBinaryFile[d].close();
fileName = m_gg[d].getSettings().name;
if (m_permuting) {
fileName += "-PermutationRound-" + m_permutationround;
}
fileName += ".dat";
md5writer.write(md5Parser.marshal(zScoreBinaryFile[d].getWrittenHash()) + " " + fileName + '\n');
zScoreRowNamesFile[d].close();
}
if (m_gg.length > 1) {
zScoreMetaAnalysisFile.close();
fileName = "MetaAnalysis";
if (m_permuting) {
fileName += "-PermutationRound-" + m_permutationround;
}
fileName += ".dat";
md5writer.write(md5Parser.marshal(zScoreMetaAnalysisFile.getWrittenHash()) + " " + fileName + '\n');
zScoreMetaAnalysisRowNamesFile.close();
}
md5writer.close();
}
if (m_createTEXTFiles) {
if (!sorted) {
if (locationToStoreResult != 0) {
Arrays.sort(finalEQTLs, 0, locationToStoreResult);
// SmoothSort.sort(finalEQTLs, 0, locationToStoreResult);
// inplaceArrayQuickSort.sort(finalEQTLs, 0, locationToStoreResult);
}
}
writeTextResults();
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (InterruptedException e2) {
e2.printStackTrace();
}
}
private void writeBinaryResult(Result r) throws IOException {
if (r != null) {
int[] numSamples = null;
try {
numSamples = r.numSamples;
} catch (NullPointerException e) {
System.out.println("ERROR: null result?");
}
int wpId = r.wpid;
WorkPackage currentWP = m_availableWorkPackages[wpId];
double[][] zscores = r.zscores;
if (zscores != null) {
SNP[] snps = currentWP.getSnps();
int numDatasets = zscores.length;
double[] finalZscores = r.finalZScore;
StringBuilder snpoutput = null;
// if we're doing a meta-analysis, write the meta-analysis Z to a separate binaryFile
if (m_gg.length > 1) {
int totalSampleNr = 0;
String snpname = null;
for (int d = 0; d < numDatasets; d++) {
if (snps[d] != null) {
snpname = snps[d].getName();
byte[] alleles = snps[d].getAlleles();
byte minorAllele = snps[d].getMinorAllele();
byte alleleassessed = alleles[1];
if (currentWP.getFlipSNPAlleles()[d]) {
alleleassessed = alleles[0];
}
if (snpoutput == null) {
snpoutput = new StringBuilder();
snpoutput.append(snpname);
snpoutput.append("\t");
snpoutput.append(BaseAnnot.getAllelesDescription(alleles));
snpoutput.append("\t");
snpoutput.append(BaseAnnot.toString(minorAllele));
snpoutput.append("\t");
snpoutput.append(BaseAnnot.toString(alleleassessed));
}
totalSampleNr += r.numSamples[d];
}
}
StringBuilder sb = null;
for (int p = 0; p < finalZscores.length; p++) {
float z = (float) finalZscores[p];
if (m_cisOnly) {
int[] probes = currentWP.getProbes();
int probeId = probes[p];
String probeName = m_probeList[probeId];
if (sb == null) {
sb = new StringBuilder();
} else {
sb.append("\t");
}
sb.append(probeName);
zScoreMetaAnalysisFile.writeFloat(z);
} else {
zScoreMetaAnalysisFile.writeFloat(z);
}
}
if (snpoutput != null) {
snpoutput.append("\t");
snpoutput.append(totalSampleNr);
snpoutput.append("\t-\t-\t-\t");
snpoutput.append(finalZscores.length);
snpoutput.append("\t");
if (sb != null) {
snpoutput.append(sb.toString());
} else {
snpoutput.append("-");
}
zScoreMetaAnalysisRowNamesFile.writeln(snpoutput.toString());
}
}
for (int d = 0; d < numDatasets; d++) {
double[] datasetZScores = zscores[d];
SNP datasetSNP = snps[d];
if (datasetSNP != null) {
BinaryFile outfile = zScoreBinaryFile[d];
String snpname = datasetSNP.getName();
byte[] alleles = datasetSNP.getAlleles();
byte minorAllele = datasetSNP.getMinorAllele();
byte alleleassessed = alleles[1];
double hwe = datasetSNP.getHWEP();
double cr = datasetSNP.getCR();
double maf = datasetSNP.getMAF();
if (currentWP.getFlipSNPAlleles()[d]) {
alleleassessed = alleles[0];
}
TextFile snpfile = zScoreRowNamesFile[d];
StringBuilder sb = null;
for (int p = 0; p < datasetZScores.length; p++) {
float z = (float) datasetZScores[p];
if (currentWP.getFlipSNPAlleles()[d]) {
z *= -1;
}
// System.out.println(p + "\t" + alleleassessed + "\t" + m_probeList[p] + "\t" + z + "\t" + currentWP.getFlipSNPAlleles()[d]);
if (m_cisOnly) {
// take into account that not all probes have been tested..
int[] probes = currentWP.getProbes();
int probeId = probes[p];
String probeName = m_probeList[probeId];
outfile.writeFloat(z);
if (sb == null) {
sb = new StringBuilder();
} else {
sb.append("\t");
}
sb.append(probeName);
} else {
outfile.writeFloat(z);
}
}
StringBuilder buffer = new StringBuilder();
buffer.append(snpname)
.append("\t")
.append(BaseAnnot.getAllelesDescription(alleles))
.append("\t")
.append(BaseAnnot.toString(minorAllele))
.append("\t")
.append(BaseAnnot.toString(alleleassessed))
.append("\t")
.append(datasetSNP.getNrCalled())
.append("\t")
.append(maf)
.append("\t")
.append(hwe)
.append("\t")
.append(cr)
.append("\t")
.append(datasetZScores.length)
.append("\t");
if (sb != null) {
buffer.append(sb.toString());
} else {
buffer.append("-");
}
snpfile.writeln(buffer.toString());
}
}
}
}
}
private void addEQTL(int pid, int sid, double pval, double zscore, double[] correlations, double[] zscores, int[] numSamples, byte[] alleles, byte assessedAllele, double[] fc, double[] beta, double[] betase, double finalbeta, double finalbetase) {
if (bufferHasOverFlown) {
if (pval <= maxSavedPvalue) {
sorted = false;
finalEQTLs[locationToStoreResult] = new QTL(pval, pid, sid, assessedAllele, zscore, alleles, zscores, numSamples, correlations, fc, beta, betase, finalbeta, finalbetase);
locationToStoreResult++;
if (locationToStoreResult == finalEQTLs.length) {
Arrays.sort(finalEQTLs);
// SmoothSort.sort(finalEQTLs);
// inplaceArrayQuickSort.sort(finalEQTLs);
sorted = true;
locationToStoreResult = m_maxResults;
maxSavedPvalue = finalEQTLs[(m_maxResults - 1)].getPvalue();
}
}
} else {
if (pval > maxSavedPvalue) {
maxSavedPvalue = pval;
}
finalEQTLs[locationToStoreResult] = new QTL(pval, pid, sid, assessedAllele, zscore, alleles, zscores, numSamples, correlations, fc, beta, betase, finalbeta, finalbetase);
locationToStoreResult++;
if (locationToStoreResult == m_maxResults) {
bufferHasOverFlown = true;
}
}
}
private void writeTextResults() throws IOException {
int nrOfEntriesToWrite = m_maxResults;
if (locationToStoreResult < m_maxResults) {
nrOfEntriesToWrite = locationToStoreResult;
}
System.out.println("Writing " + nrOfEntriesToWrite + " results out of " + nrTestsPerformed + " tests performed. " + nrSNPsTested + " SNPs finally tested.");
if (m_permuting) {
TextFile gz = new TextFile((m_outputdir + "PermutedEQTLsPermutationRound" + m_permutationround + ".txt.gz"), TextFile.W);
gz.writeln("PValue\tSNP\tProbe\tGene\tAlleles\tAlleleAssessed\tZScore");
for (int i = 0; i < nrOfEntriesToWrite; i++) {
gz.writeln(finalEQTLs[i].getPermutationDescription(m_availableWorkPackages, m_probeTranslation, m_gg, m_midpointprobedist));
}
gz.close();
} else {
QTLTextFile et = new QTLTextFile((m_outputdir + "eQTLs.txt.gz"), QTLTextFile.W);
for (int i = 0; i < nrOfEntriesToWrite; i++) {
et.writeln(finalEQTLs[i].getDescription(m_availableWorkPackages, m_probeTranslation, m_gg, m_midpointprobedist));
}
et.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy