eqtlmappingpipeline.qcpca.QCPCA Maven / Gradle / Ivy
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package eqtlmappingpipeline.qcpca;
import eqtlmappingpipeline.graphics.ScatterPlot;
import umcg.genetica.math.PCA;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.SortableSNP;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDatasetSettings;
import umcg.genetica.io.trityper.util.DetermineLD;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Log2Transform;
import umcg.genetica.math.stats.Descriptives;
import umcg.genetica.math.stats.QuantileNormalization;
/**
*
* @author harmjan
*/
public class QCPCA {
private boolean useCorrelationMatrix = false;
private boolean LDpruning = false;
// test
public void run(String expressionLoc, String expressionPlatform, String genotypeLoc, String gte, String probeannotation, String outputdirectory, String prunedSNPListFile) {
try {
if (!outputdirectory.endsWith("/")) {
outputdirectory += "/";
}
Gpio.createDir(outputdirectory);
TriTyperGeneticalGenomicsDatasetSettings settings = new TriTyperGeneticalGenomicsDatasetSettings();
settings.expressionLocation = expressionLoc;
settings.expressionplatform = expressionPlatform;
settings.genotypeLocation = genotypeLoc;
settings.genotypeToExpressionCoupling = gte;
settings.probeannotation = probeannotation;
TriTyperGeneticalGenomicsDataset ds = new TriTyperGeneticalGenomicsDataset(settings);
SNPLoader loader = ds.getGenotypeData().createSNPLoader();
ArrayList ldSNPs;
int numsamples = ds.getTotalGGSamples();
int[] indWGA = ds.getExpressionToGenotypeIdArray();
if (prunedSNPListFile == null && LDpruning) {
ldSNPs = pruneSNPsByLDThreshold(ds, loader);
} else if (prunedSNPListFile != null) {
ldSNPs = loadPrunedSNPListFromFile(prunedSNPListFile, ds);
} else {
// PCA based SNP window multiple regression pruning
ldSNPs = pruneSNPsByMLRegressionPCA(ds, loader);
}
System.out.println("Copying data to array");
ProgressBar pb = new ProgressBar(ldSNPs.size());
double[][] datatmp = new double[numsamples][ldSNPs.size()];
HashSet snpsWoData = new HashSet();
for (int i = 0; i < ldSNPs.size(); i++) {
Integer snpID = ldSNPs.get(i);
// System.out.println(snpID);
SNP snpObj = ds.getGenotypeData().getSNPObject(snpID);
loader.loadGenotypes(snpObj);
if (loader.hasDosageInformation()) {
loader.loadDosage(snpObj);
}
double[] snpdata = getSNPData(loader, numsamples, indWGA, snpObj, false);
if (snpdata != null) {
for (int j = 0; j < snpdata.length; j++) {
datatmp[j][i] = snpdata[j];
}
} else {
snpsWoData.add(i);
}
pb.iterate();
}
pb.close();
double[][] datafinal = null;
if (snpsWoData.size() > 0) {
System.out.println("Detected " + snpsWoData.size() + " SNPs not passing QC, out of " + ldSNPs.size());
int numsnpswdata = ldSNPs.size() - snpsWoData.size();
datafinal = new double[numsamples][numsnpswdata];
int snpcounter = 0;
for (int i = 0; i < ldSNPs.size(); i++) {
if (!snpsWoData.contains(i)) {
for (int j = 0; j < datatmp.length; j++) {
datatmp[j][snpcounter] = datatmp[j][i];
}
snpcounter++;
}
}
} else {
datafinal = datatmp;
}
double[][] correlationmatrix = new double[numsamples][numsamples];
if (useCorrelationMatrix) {
correlationmatrix = calculatecorrelationmatrix(datafinal, true);
} else {
for (int i = 0; i < numsamples; i++) {
double[] snpsi = datafinal[i];
for (int j = i + 1; j < numsamples; j++) {
double[] snpsj = datafinal[j];
int nrSNPsWithGenotypeDataAvailableForBothSamples = 0;
double ibsCount = 0;
for (int s = 0; s < snpsi.length; s++) {
if (snpsi[s] != -1 && snpsj[s] != -1) {
double ibsVal = 0;
if (snpsi[s] == snpsj[s]) {
ibsVal = 1.0d;
} else {
if (Math.abs(snpsi[s] - snpsj[s]) == 1) {
ibsVal = 0.5d;
}
}
ibsCount += ibsVal;
nrSNPsWithGenotypeDataAvailableForBothSamples++;
}
}
double corr = (double) ibsCount / (double) nrSNPsWithGenotypeDataAvailableForBothSamples;
correlationmatrix[i][j] = corr;
correlationmatrix[j][i] = corr;
pb.iterate();
}
correlationmatrix[i][i] = 1.0;
}
}
TextFile corMat = new TextFile(outputdirectory + "snpcorrmat.txt", TextFile.W);
for (int i = 0; i < correlationmatrix.length; i++) {
String output = "";
for (int j = 0; j < correlationmatrix.length; j++) {
output += "\t" + correlationmatrix[i][j];
}
corMat.write(output + "\n");
}
corMat.close();
pb.close();
for (int i = 0; i < 10; i++) {
String output = "";
for (int j = 0; j < 10; j++) {
output += "\t" + correlationmatrix[i][j];
}
System.out.println(output);
}
System.out.println("Performing eigenvalue decomposition");
Jama.EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationmatrix);
System.out.println("Getting eigenvalues");
double[] eigenValues = PCA.getRealEigenvalues(eig);
System.out.println("Getting eigenvariance");
double genVarPC1 = PCA.getEigenValueVar(eigenValues, 1);
System.out.println("Getting eigenvector");
double[] PC1GenEigenVector = PCA.getEigenVector(eig, eigenValues, 1);
System.out.println("Getting eigenvector");
double[] PC2GenEigenVector = PCA.getEigenVector(eig, eigenValues, 2);
TextFile eigenvectorsout = new TextFile(outputdirectory + "PCAOverSamplesEigenvalues.txt.gz", TextFile.W);
double cumVarPCA = 0;
for (int pca = 0; pca < numsamples; pca++) {
double varPCA = PCA.getEigenValueVar(eigenValues, pca);
int pcaNr = pca + 1;
cumVarPCA += varPCA;
eigenvectorsout.write(pcaNr + "\t" + varPCA + "\t" + cumVarPCA + "\n");
System.out.println("PCA:\t" + pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + cumVarPCA);
}
eigenvectorsout.close();
System.out.println("Done");
System.out.println(genVarPC1);
for (int i = 1; i < 11; i++) {
ScatterPlot scat = new ScatterPlot();
scat.draw(PCA.getEigenVector(eig, eigenValues, i), PCA.getEigenVector(eig, eigenValues, i + 1), "PC" + i, "PC" + (i + 1), "Genetic Eigenvalues", outputdirectory + "SNP-");
}
TextFile out = new TextFile(outputdirectory + "EigenVectors-SNPs.txt", TextFile.W);
for (int i = 0; i < numsamples; i++) {
String probeCoefficients = "";
for (int pc = 1; pc <= numsamples - 1; pc++) {
probeCoefficients += "\t" + PCA.getEigenVector(eig, eigenValues, pc)[i];
}
// System.out.println(ds.getExpressionData().getIndividuals()[i]+"\t"+ds.getGenotypeData().getIndividuals()[indWGA[i]]+probeCoefficients);
out.write(ds.getExpressionData().getIndividuals()[i] + "\t" + ds.getGenotypeData().getIndividuals()[indWGA[i]] + probeCoefficients + "\n");
}
out.close();
// EXPRESSION DATA!
double[][] rawData = ds.getExpressionData().getMatrix();
DoubleMatrixDataset dataset = new DoubleMatrixDataset(rawData.length, rawData[rawData.length - 1].length);
QuantileNormalization.quantilenormalize(rawData);
Log2Transform.log2transform(rawData);
dataset.rowObjects = Arrays.asList(ds.getExpressionData().getProbes());
dataset.colObjects = Arrays.asList(ds.getExpressionData().getIndividuals());
dataset.rawData = rawData;
int nrProbes = dataset.rowObjects.size();
int nrSamples = dataset.colObjects.size();
System.out.println("Standardizing probe mean and standard deviation");
for (int p = 0; p < dataset.rowObjects.size(); p++) {
double mean = Descriptives.mean(rawData[p]);
double stdev = Math.sqrt(Descriptives.variance(rawData[p], mean));
for (int s = 0; s < dataset.colObjects.size(); s++) {
rawData[p][s] -= mean;
// rawData[p][s] /= stdev; // do not scale each probe for stdev: this will remove the variation captured by the
}
}
System.out.println("- Standardizing sample mean and standard deviation");
for (int s = 0; s < nrSamples; s++) {
double[] vals = new double[nrProbes];
for (int p = 0; p < nrProbes; p++) {
vals[p] = dataset.rawData[p][s];
}
double mean = Descriptives.mean(vals);
for (int p = 0; p < nrProbes; p++) {
vals[p] -= mean;
}
double var = Descriptives.variance(vals, mean);
double stdev = Math.sqrt(var);
for (int p = 0; p < nrProbes; p++) {
dataset.rawData[p][s] = (vals[p] / stdev);
}
}
System.out.print("- Calculating correlations between all " + nrSamples + " samples: ");
double[][] correlationMatrix = new double[nrSamples][nrSamples];
double probeCountMinusOne = nrProbes - 1;
ProgressBar pv2 = new ProgressBar(nrSamples * nrSamples);
for (int f = 0; f < nrSamples; f++) {
for (int g = f; g < nrSamples; g++) {
double covarianceInterim = 0;
for (int p = 0; p < nrProbes; p++) {
covarianceInterim += dataset.rawData[p][f] * dataset.rawData[p][g];
}
double covariance = covarianceInterim / probeCountMinusOne;
correlationMatrix[f][g] = covariance;
correlationMatrix[g][f] = covariance;
pv2.iterate();
pv2.iterate();
}
}
pv2.close();
System.out.println("100%");
System.out.println("Performing eigenvalue decomposition");
Jama.EigenvalueDecomposition eigExp = PCA.eigenValueDecomposition(correlationmatrix);
System.out.println("Getting eigenvalues");
double[] eigenValuesExp = PCA.getRealEigenvalues(eigExp);
System.out.println("Getting eigenvariance");
double expVarPC1 = PCA.getEigenValueVar(eigenValuesExp, 1);
System.out.println("Getting eigenvector");
double[] PC1ExpEigenVector = PCA.getEigenVector(eigExp, eigenValuesExp, 1);
System.out.println("Getting eigenvector");
double[] PC2ExpEigenVector = PCA.getEigenVector(eigExp, eigenValuesExp, 2);
double[][] correlationmatrix2 = new double[numsamples][numsamples];
pb = new ProgressBar(numsamples * numsamples);
pb.print();
for (int i = 1; i < 11; i++) {
ScatterPlot scat = new ScatterPlot();
scat.draw(PCA.getEigenVector(eigExp, eigenValuesExp, i), PCA.getEigenVector(eigExp, eigenValuesExp, i + 1), "PC" + i, "PC" + (i + 1), "Expression Eigenvalues", outputdirectory + "Exp-");
}
if (numsamples > 100) {
numsamples = 100;
}
double bonferroni = 0.05 / (numsamples * numsamples);
System.out.println("Determining significant correlations between genetic PCs and expression PCs");
System.out.println("Threshold: " + bonferroni);
for (int pc = 1; pc <= numsamples - 1; pc++) {
double[] genEig = PCA.getEigenVector(eig, eigenValues, pc);
for (int pc2 = pc; pc2 <= numsamples - 1; pc2++) {
double[] expEig = PCA.getEigenVector(eigExp, eigenValuesExp, pc2);
double corr = JSci.maths.ArrayMath.correlation(genEig, expEig);
correlationmatrix2[pc][pc2] = corr;
correlationmatrix2[pc2][pc] = corr;
ScatterPlot scat = new ScatterPlot();
int df = numsamples - 2;
double t = corr / Math.sqrt(((1 - (corr * corr)) / df));
// sqrt[(1—r2)/(N—2)]
cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(df, (new cern.jet.random.tdouble.engine.DRand()));
double tTestPValue1 = tDistColt.cdf(t);
if (tTestPValue1 < bonferroni) {
System.out.println(corr + "\t" + t + "\t" + tTestPValue1);
scat.draw(genEig, expEig, "SNP" + pc, "EXP" + pc2, "SNP vs Gene expression PC" + pc + ", corr: " + corr + ", pval: " + tTestPValue1, outputdirectory + "SNPvsEXP");
}
pb.iterate();
}
}
pb.close();
out = new TextFile(outputdirectory + "SNP-PCvsExp-PC.txt", TextFile.W);
for (int i = 0; i < numsamples; i++) {
String probeCoefficients = "";
for (int pc = 1; pc <= numsamples - 1; pc++) {
probeCoefficients += "\t" + correlationmatrix2[i][pc];
}
// System.out.println(ds.getExpressionData().getIndividuals()[i]+"\t"+ds.getGenotypeData().getIndividuals()[indWGA[i]]+probeCoefficients);
out.write(i + probeCoefficients + "\n");
}
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private double[] getSampleData(double[][] expressiondata, int sample) {
double[] data = new double[expressiondata.length];
for (int i = 0; i < expressiondata.length; i++) {
data[i] = expressiondata[i][sample];
}
return data;
}
public double[] getSNPData(SNPLoader loader, int numsamples, int[] indWGA, SNP snpObj, boolean normalizegenotypedata) throws IOException {
// System.out.println("Loading snp\t" +snpObj.getId()+"\t"+snpObj.getName());
loader.loadGenotypes(snpObj);
if (loader.hasDosageInformation()) {
loader.loadDosage(snpObj);
}
// if (snpObj.passesQC() && snpObj.getMAF() > 0.05 && snpObj.getCR() > 0.95 && snpObj.getHWEP() > 0.0001) {
double[] tmpData = new double[numsamples];
if (normalizegenotypedata) {
if (loader.hasDosageInformation()) {
double[] genotypes = snpObj.getDosageValues();
int numinds = 0;
for (int i = 0; i < indWGA.length; i++) {
if (indWGA[i] != -1) {
tmpData[numinds] = genotypes[indWGA[i]];
numinds++;
}
}
double mean = JSci.maths.ArrayMath.mean(tmpData);
double stdev = JSci.maths.ArrayMath.standardDeviation(tmpData);
for (int i = 0; i < tmpData.length; i++) {
tmpData[i] -= mean;
tmpData[i] /= stdev;
}
} else {
byte[] genotypes = snpObj.getGenotypes();
int numinds = 0;
int numIndsWithoutGenotypes = 0;
for (int i = 0; i < indWGA.length; i++) {
if (indWGA[i] != -1) {
if (genotypes[indWGA[i]] == -1) {
numIndsWithoutGenotypes++;
}
tmpData[numinds] = genotypes[indWGA[i]];
numinds++;
}
}
double[] tmpData2 = new double[numsamples - numIndsWithoutGenotypes];
int j = 0;
for (int i = 0; i < tmpData.length; i++) {
if (tmpData[i] > 0) {
tmpData2[j] = tmpData[i];
j++;
}
}
double mean = JSci.maths.ArrayMath.mean(tmpData2);
double stdev = JSci.maths.ArrayMath.standardDeviation(tmpData2);
for (int i = 0; i < tmpData.length; i++) {
if (tmpData[i] < 0) {
tmpData[i] = mean;
}
tmpData[i] -= mean;
tmpData[i] /= stdev;
}
}
} else {
byte[] genotypes = snpObj.getGenotypes();
int numinds = 0;
int numIndsWithoutGenotypes = 0;
for (int i = 0; i < indWGA.length; i++) {
if (indWGA[i] != -1) {
if (genotypes[indWGA[i]] == -1) {
numIndsWithoutGenotypes++;
}
tmpData[numinds] = genotypes[indWGA[i]];
numinds++;
}
}
}
snpObj.clearGenotypes();
return tmpData;
// } else {
// return null;
// }
}
private ArrayList loadPrunedSNPListFromFile(String prunedSNPListFile, TriTyperGeneticalGenomicsDataset ds) throws IOException {
System.out.println("Loading list of pruned SNPs from text file: " + prunedSNPListFile);
TextFile tf = new TextFile(prunedSNPListFile, TextFile.R);
String[] list = tf.readAsArray();
tf.close();
ArrayList ldSNPs = new ArrayList();
for (String s : list) {
Integer snpId = ds.getGenotypeData().getSnpToSNPId().get(s);
if (snpId != -9) {
ldSNPs.add(snpId);
}
}
System.out.println(ldSNPs.size() + " out of " + list.length + " SNPs in the pruned SNP list detected.");
return ldSNPs;
}
private ArrayList pruneSNPsByLDThreshold(TriTyperGeneticalGenomicsDataset ds, SNPLoader loader) {
System.out.println("Pruning SNPs for LD");
ArrayList ldSNPs = new ArrayList();
DetermineLD ldcalc = new DetermineLD();
try {
for (int chr = 1; chr < 23; chr++) {
HashSet snpsVisited = new HashSet();
ArrayList snpsForChr = getSortedListOfSNPsForChr(chr, ds);
int numSNPsAfterPruning = 0;
if (snpsForChr == null) {
System.out.println("No SNPs for Chr: " + chr);
} else {
int startsnpnum = 0;
// sort SNPs..
ProgressBar pb = new ProgressBar(snpsForChr.size());
for (startsnpnum = 0; startsnpnum < snpsForChr.size(); startsnpnum++) {
int snpID = snpsForChr.get(startsnpnum);
// int snpID = snpsForChr.get(s);
if (snpsVisited.contains(snpID)) {
// skip this SNP: it belongs to a different LD block
} else {
SNP snpObj = ds.getGenotypeData().getSNPObject(snpID);
loader.loadGenotypes(snpObj);
if (loader.hasDosageInformation()) {
loader.loadDosage(snpObj);
}
if (snpObj.passesQC() && snpObj.getMAF() > 0.05 && snpObj.getCR() > 0.95 && snpObj.getHWEP() > 0.0001) {
// add this SNP to the dataset for calculating correlations
// data.add();
ldSNPs.add(snpID);
for (int querysnpnum = startsnpnum + 1; querysnpnum < snpsForChr.size(); querysnpnum++) {
int snpID2 = snpsForChr.get(querysnpnum);
SNP snpObj2 = ds.getGenotypeData().getSNPObject(snpID2);
loader.loadGenotypes(snpObj2);
if (loader.hasDosageInformation()) {
loader.loadDosage(snpObj2);
}
if (snpObj2.passesQC() && snpObj2.getMAF() > 0.05 && snpObj2.getCR() > 0.95 && snpObj2.getHWEP() > 0.0001) {
double r2 = ldcalc.getRSquared(snpObj, snpObj2, ds.getGenotypeData(), ldcalc.RETURN_R_SQUARED, ldcalc.INCLUDE_CASES_AND_CONTROLS, false);
double dp = ldcalc.getRSquared(snpObj, snpObj2, ds.getGenotypeData(), ldcalc.RETURN_D_PRIME, ldcalc.INCLUDE_CASES_AND_CONTROLS, false);
if (r2 < 0.1 && dp < 0.5) {
//
snpObj2.clearGenotypes();
startsnpnum = querysnpnum;
break;
} else {
snpObj2.clearGenotypes();
snpsVisited.add(snpID2);
}
} else {
snpObj2.clearGenotypes();
snpsVisited.add(snpID2);
}
pb.iterate();
} // pairwise comparison
}
snpObj.clearGenotypes();
} // end HWE, MAF and CR filter
snpsVisited.add(snpID);
numSNPsAfterPruning++;
pb.iterate();
}
pb.close();
System.out.println(numSNPsAfterPruning + " SNPs left after pruning, out of " + snpsForChr.size() + "\t" + ldSNPs.size() + " total.");
}
}
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(ldSNPs.size() + " pruned SNPs");
return ldSNPs;
}
private ArrayList getSortedListOfSNPsForChr(int chr, TriTyperGeneticalGenomicsDataset ds) {
String[] snps = ds.getGenotypeData().getSNPs();
ArrayList snpsOnChr = new ArrayList();
int numsnps = 0;
for (int i = 0; i < snps.length; i++) {
byte snpchr = ds.getGenotypeData().getChr(i);
if (snpchr == chr) {
snpsOnChr.add(i);
}
}
ArrayList snpsSorted = new ArrayList();
for (Integer i : snpsOnChr) {
int chrPos = ds.getGenotypeData().getChrPos(i);
if (chrPos > -1) {
snpsSorted.add(new SortableSNP(null, i, (byte) chr, chrPos, SortableSNP.SORTBY.CHRPOS));
}
}
int numTotalOnChr = snpsOnChr.size();
Collections.sort(snpsSorted);
//check sorting
snpsOnChr = new ArrayList();
for (SortableSNP s : snpsSorted) {
snpsOnChr.add(s.id);
}
// check sorting
int prevpos = -1;
for (Integer i : snpsOnChr) {
Integer chrPos = ds.getGenotypeData().getChrPos(i);
if (prevpos == -1) {
prevpos = chrPos;
} else {
if (chrPos >= prevpos) {
// its ok
} else {
System.out.println("SNPs are not sorted!!");
for (int j = 0; j < snpsOnChr.size(); j++) {
Integer snpid = snpsOnChr.get(j);
System.out.println(j + "\t" + snpsOnChr.get(j) + "\t" + ds.getGenotypeData().getChrPos(snpid));
}
System.exit(0);
}
}
}
System.out.println("Chr " + chr + " has " + snpsOnChr.size() + " SNPs with annotation, out of " + numTotalOnChr);
return snpsOnChr;
}
private ArrayList pruneSNPsByMLRegressionPCA(TriTyperGeneticalGenomicsDataset ds, SNPLoader loader) throws IOException {
ArrayList ldSNPs = new ArrayList();
int numsamples = ds.getTotalGGSamples();
int[] indWGA = ds.getExpressionToGenotypeIdArray();
int windowsize = 50;
int windowshift = 5;
int vifthreshold = 2;
double[][] snpdata = new double[windowsize][numsamples];
int totalafterpruning = 0;
for (int chr = 1; chr < 23; chr++) {
HashSet visitedSNPs = new HashSet();
ArrayList sortedSNPs = getSortedListOfSNPsForChr(chr, ds);
if (sortedSNPs.size() < windowsize) {
System.out.println("Chromosome " + chr + " has less than " + windowsize + " SNPs for pruning");
} else {
int numwindowsremainder = sortedSNPs.size() % windowsize;
int numwindows = (sortedSNPs.size() - numwindowsremainder) / windowsize;
System.out.println("Pruning SNPs for chromosome: " + chr);
ProgressBar pb = new ProgressBar(sortedSNPs.size());
// double[][] correlationmatrix = new double[windowsize][windowsize];
int window = 0;
for (int startsnp = 0; startsnp + windowsize < sortedSNPs.size(); startsnp += windowshift) {
// System.out.println(startsnp+"\t"+sortedSNPs.size());
ArrayList snpsInThisWindow = new ArrayList();
int s = 0;
int currentsnp = startsnp;
boolean fullwindow = true;
while (s < windowsize) { //for(int s = startsnp; s= query){
//// pcscores[pc][sample] += snpdata[snp+1][sample] * probecoefficient;
//// } else {
// pcscores[pc][sample] += snpdata[snp][sample] * probecoefficient;
//// }
// }
// }
// }
//
//
// double sum = 0;
// // for(int sample=0; sample<10; sample++){
// // System.out.println(snp1+"\t"+sample+"\t"+snpdata[snp1][sample]+"\t"+pcscores[snp1][sample]);
// // }
// for(int snp1=0; snp1= snp1) {
pcscores[pc][sample] += snpdata[snp + 1][sample] * probecoefficient;
} else {
pcscores[pc][sample] += snpdata[snp][sample] * probecoefficient;
}
}
}
}
double sum = 0;
// for(int snp1=0; snp1= query){
// pcscores[pc][sample] += snpdata[snp+1][sample] * probecoefficient;
// } else {
// pcscores[pc][sample] += snpdata[snp][sample] * probecoefficient;
// }
//
// }
// }
// }
//
//
// double sum = 0;
// // for(int sample=0; sample<10; sample++){
// // System.out.println(snp1+"\t"+sample+"\t"+snpdata[snp1][sample]+"\t"+pcscores[snp1][sample]);
// // }
// for(int snp2=0; snp2 1E-5){
// System.out.println("Innerproduct > 1E-5 for covariates "+i+" and "+j);
//
// }
//
// System.out.println((i+1)+"\t"+(j+1)+"\t"+sum+"\t"+corr);
// }
//
// }
}
pb.set(startsnp);
}
pb.close();
}
System.out.println("SNPs after pruning: " + visitedSNPs.size());
totalafterpruning += visitedSNPs.size();
}
System.out.println(totalafterpruning);
System.exit(0);
return ldSNPs;
}
private double[][] calculatecorrelationmatrix(double[][] data, boolean verbose) {
return calculatecorrelationmatrix(data, verbose, null);
}
private double[][] calculatecorrelationmatrix(double[][] data, boolean verbose, Integer skip) {
ProgressBar pb = null;
if (verbose) {
System.out.println("Calculating correlation matrix");
pb = new ProgressBar(data.length);
pb.print();
}
int numrows = data.length;
double[][] correlationmatrix = null;
if (skip != null) {
correlationmatrix = new double[numrows - 1][numrows - 1];
int rows = 0;
for (int i = 0; i < numrows; i++) {
if (i != skip) {
int cols = 0;
for (int j = i + 1; j < numrows; j++) {
if (j != skip) {
double corr = JSci.maths.ArrayMath.correlation(data[i], data[j]);
correlationmatrix[rows][cols] = corr;
correlationmatrix[cols][rows] = corr;
cols++;
}
}
correlationmatrix[rows][rows] = 1.0;
rows++;
}
}
} else {
correlationmatrix = new double[numrows][numrows];
for (int i = 0; i < numrows; i++) {
for (int j = i + 1; j < numrows; j++) {
double corr = JSci.maths.ArrayMath.correlation(data[i], data[j]);
correlationmatrix[i][j] = corr;
correlationmatrix[j][i] = corr;
}
if (verbose) {
pb.iterate();
}
correlationmatrix[i][i] = 1.0;
}
if (verbose) {
pb.close();
}
}
return correlationmatrix;
}
}
// case vs control analysis
// if (1==2) {
//
// boolean[] isCase = new boolean[ds.getGenotypeData().getIndividuals().length];
// int nrCases = 0;
// int nrControls = 0;
// for (int s=0; s 0.05 && snpObj.getCR() > 0.95 && snpObj.getHWEP() > 0.0001){
// int[][] twoByTwo = new int[2][2];
// for (int s=0; s= 0){
// tmpData2[j] = tmpData[i];
// j++;
// }
// }
//
// double mean = JSci.maths.ArrayMath.mean(tmpData2);
// double stdev = JSci.maths.ArrayMath.standardDeviation(tmpData2);
//
// for(int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy