
imputationtool.postprocessing.CompareImputedSNPsToRealSNPs Maven / Gradle / Ivy
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package imputationtool.postprocessing;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.collections.primitives.ArrayDoubleList;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGenotypeData;
import umcg.genetica.io.trityper.util.BaseAnnot;
/**
*
* @author harmjan
*/
public class CompareImputedSNPsToRealSNPs {
private final TriTyperGenotypeData ggDataset1;
private final TriTyperGenotypeData ggDataset2;
public static void main(String[] args) throws IOException {
//LiverOmni vs LiverCyto
//0.9997752239290337
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Imputed");
//unimputed merged vs LiverCyto
//0.6492976314532056
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverMerged\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Imputed");
//unimputed merged vs LiverOmni
//0.3528032918756224
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverMerged\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Imputed");
//LiverCyto vs original imputed
//0.9961702580911673
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed");
//LiverOmni vs original imputed
//0.9982103900136859
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed");
//LiverCyto vs omni imputed 100g
// 0.9956201693043798
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");
//LiverOmni vs omni imputed 100g
// 0.9959821144437336
//CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");
//original imputed vs omni imputed 100g
// 0.9893960960146694
CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed_1", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");
}
public void run(String[] args) throws IOException {
CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("/Data/GeneticalGenomicsDatasets/RotterdamStudy/TriTyper773Samples-GenotypesQCedRotterdam/", "Unimputed", "/Data/GeneticalGenomicsDatasets/RotterdamStudy/MachImputed/TriTyperFixed/", "Imputed");
}
public CompareImputedSNPsToRealSNPs(String dataset1, String dataset1Name, String dataset2, String dataset2Name) throws IOException {
ggDataset1 = new TriTyperGenotypeData();
ggDataset1.load(dataset1);
ggDataset2 = new TriTyperGenotypeData();
ggDataset2.load(dataset2);
//Added because of potentialy missing snps in both.
ArrayList snpsTmp = new ArrayList();
if (ggDataset1.getSNPs().length < ggDataset2.getSNPs().length) {
List list = Arrays.asList(ggDataset2.getSNPs());
HashSet testSnps = new HashSet(list);
list = null;
String[] snps = ggDataset1.getSNPs();
for(int i=0; i list = Arrays.asList(ggDataset1.getSNPs());
HashSet testSnps = new HashSet(list);
list = null;
String[] snps = ggDataset2.getSNPs();
for(int i=0; i 0 && concordant == 0){
// System.out.println("0 Concordant!");
// }
}
if(!Double.isNaN(snp1.getMAF())&& !Double.isNaN(snp2.getMAF())){
maf1.add(snp1.getMAF());
maf2.add(snp2.getMAF());
}
hwep1.add(snp1.getHWEP());
hwep2.add(snp2.getHWEP());
// System.out.println("Num shared: "+ concordant +" / "+ called);
snp1.clearGenotypes();
snp2.clearGenotypes();
}
}
q++;
if (q % 10000 == 0) {
System.out.println(q + "snps parsed");
}
}
loader1.close();
loader2.close();
System.out.println("Total called: " + numTotalCalled + "\t" + numTotalConcordant + "\t" + ((double) numTotalConcordant / numTotalCalled) + "\t" + nrIncompatible + "\t" + nrAT);
System.out.println("Correlations between minor allel frequencies: "+JSci.maths.ArrayMath.correlation(maf1.toArray(), maf2.toArray()));
System.out.println("Correlations between hwep "+JSci.maths.ArrayMath.correlation(hwep1.toArray(), hwep2.toArray()));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy