All Downloads are FREE. Search and download functionalities are using the official Maven repository.

imputationtool.postprocessing.CompareImputedSNPsToRealSNPs Maven / Gradle / Ivy

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package imputationtool.postprocessing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.collections.primitives.ArrayDoubleList;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGenotypeData;
import umcg.genetica.io.trityper.util.BaseAnnot;

/**
 *
 * @author harmjan
 */
public class CompareImputedSNPsToRealSNPs {

    private final TriTyperGenotypeData ggDataset1;
    private final TriTyperGenotypeData ggDataset2;  
    
    public static void main(String[] args) throws IOException {
        //LiverOmni  vs LiverCyto
        //0.9997752239290337
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Imputed");
        
        //unimputed merged vs LiverCyto
        //0.6492976314532056
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverMerged\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Imputed");
        
        //unimputed merged vs LiverOmni 
        //0.3528032918756224
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverMerged\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Imputed");
        
        //LiverCyto vs original imputed
        //0.9961702580911673
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed");
        
        //LiverOmni vs original imputed
        //0.9982103900136859
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed");
        
        //LiverCyto  vs omni imputed 100g
        // 0.9956201693043798
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");
        
        //LiverOmni vs omni imputed 100g
        // 0.9959821144437336
        //CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverOmni\\", "Unimputed", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");

        //original imputed vs omni imputed 100g
        // 0.9893960960146694
        CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\", "Imputed_1", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverImputed100G\\", "Imputed");
    }
    
    public void run(String[] args) throws IOException {
        CompareImputedSNPsToRealSNPs c = new CompareImputedSNPsToRealSNPs("/Data/GeneticalGenomicsDatasets/RotterdamStudy/TriTyper773Samples-GenotypesQCedRotterdam/", "Unimputed", "/Data/GeneticalGenomicsDatasets/RotterdamStudy/MachImputed/TriTyperFixed/", "Imputed");
    }

    public CompareImputedSNPsToRealSNPs(String dataset1, String dataset1Name, String dataset2, String dataset2Name) throws IOException {

        ggDataset1 = new TriTyperGenotypeData();
        ggDataset1.load(dataset1);

        ggDataset2 = new TriTyperGenotypeData();
        ggDataset2.load(dataset2);

        //Added because of potentialy missing snps in both.
        ArrayList snpsTmp = new ArrayList();
        if (ggDataset1.getSNPs().length < ggDataset2.getSNPs().length) {
            
            List list = Arrays.asList(ggDataset2.getSNPs());
            HashSet testSnps = new HashSet(list);
            list = null;
            String[] snps = ggDataset1.getSNPs();
            
            for(int i=0; i list = Arrays.asList(ggDataset1.getSNPs());
            HashSet testSnps = new HashSet(list);
            list = null;
            String[] snps = ggDataset2.getSNPs();
            
            for(int i=0; i 0 && concordant == 0){
//                        System.out.println("0 Concordant!");
//                    }



                    }

                    
                    if(!Double.isNaN(snp1.getMAF())&& !Double.isNaN(snp2.getMAF())){
                        maf1.add(snp1.getMAF());
                        maf2.add(snp2.getMAF());
                    }
                    
                    
                    hwep1.add(snp1.getHWEP());
                    hwep2.add(snp2.getHWEP());
                    
//                  System.out.println("Num shared: "+ concordant +" / "+ called);

                    snp1.clearGenotypes();
                    snp2.clearGenotypes();

                }
            }

            q++;

            if (q % 10000 == 0) {
                System.out.println(q + "snps parsed");
            }
        }

        loader1.close();
        loader2.close();
        System.out.println("Total called: " + numTotalCalled + "\t" + numTotalConcordant + "\t" + ((double) numTotalConcordant / numTotalCalled) + "\t" + nrIncompatible + "\t" + nrAT);
        System.out.println("Correlations between minor allel frequencies: "+JSci.maths.ArrayMath.correlation(maf1.toArray(), maf2.toArray()));
        System.out.println("Correlations between hwep "+JSci.maths.ArrayMath.correlation(hwep1.toArray(), hwep2.toArray()));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy