All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.gwas.Dependifier Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.gwas;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGenotypeData;
import umcg.genetica.io.trityper.util.DetermineLD;

/**
 *
 * @author harm-jan
 */
public class Dependifier {

    private final SNPLoader loader;
    private final TriTyperGenotypeData genotypeData;
    private final DetermineLD ldCalc = new DetermineLD();
    private final String[] allSNPsInReference;
    private HashSet haystack;

    public Dependifier(String datadir) throws IOException {
        genotypeData = new TriTyperGenotypeData();
        genotypeData.load(datadir);
        loader = genotypeData.createSNPLoader();
        allSNPsInReference = genotypeData.getSNPs();
    }

    public Dependifier(TriTyperGenotypeData dataset, SNPLoader loader) {
        this.loader = loader;
        this.genotypeData = dataset;
        allSNPsInReference = genotypeData.getSNPs();
    }

    public HashMap> dependifyReturnProxiesPerSNP(String[] inputsnps, double proxyldthreshold, int maxproxydistance) throws IOException {
        HashMap> proxies = new HashMap>();
        for (String snp : inputsnps) {
            // include the query itself as well in the final proxy list..
            proxies.put(snp, findProxiesForSNP(snp, proxyldthreshold, maxproxydistance));
        }
        return proxies;
    }

    public HashSet dependify(String[] inputsnps, double proxyldthreshold, int maxproxydistance) throws IOException {
        HashSet proxies = new HashSet();
        for (String snp : inputsnps) {
            // include the query itself as well in the final proxy list..
            proxies.addAll(findProxiesForSNP(snp, proxyldthreshold, maxproxydistance));
        }
        return proxies;
    }

    // find SNPs in needles that are in LD with haystack
    public HashSet dependify(String[] needles, String[] haystack, double proxyldthreshold, int maxproxydistance) throws IOException {
        HashSet proxies = new HashSet();
        this.haystack = new HashSet();
        this.haystack.addAll(Arrays.asList(haystack));
        for (String snp : needles) {
            proxies.addAll(findProxiesForSNP(snp, proxyldthreshold, maxproxydistance));
        }
        this.haystack = null; // clean this up after use..
        return proxies;
    }

    public HashMap> dependifyReturnProxiesPerSNP(String[] needles, String[] haystack, double proxyldthreshold, int maxproxydistance) throws IOException {
        HashMap> proxies = new HashMap>();
        this.haystack = new HashSet();
        this.haystack.addAll(Arrays.asList(haystack));
        for (String snp : needles) {
            proxies.put(snp, findProxiesForSNP(snp, proxyldthreshold, maxproxydistance));
        }
        this.haystack = null; // clean this up after use..
        return proxies;
    }

    public HashSet findProxiesForSNP(String snp, double proxyldthreshold, int maxproxydistance) throws IOException {
        HashSet proxies = new HashSet();
        Integer snpIdInReference = genotypeData.getSnpToSNPId().get(snp);
        if (snpIdInReference != -9) {

            SNP snpObj = genotypeData.getSNPObject(snpIdInReference);
            byte chr = snpObj.getChr();
            int chrPos = snpObj.getChrPos();

            // if the SNP has a valid mapping..
            if (chr > 0 && chrPos > 0) {

                HashSet allSNPsWithinMaxDistance = new HashSet();
                for (int i = 0; i < allSNPsInReference.length; i++) {
                    // check whether we want to limit our search within a preset list of SNPs
                    if (haystack == null || haystack.contains(allSNPsInReference[i])) {
                        byte chr2 = genotypeData.getChr(i);
                        if (chr == chr2) {
                            int chrPos2 = genotypeData.getChrPos(i);
                            if (Math.abs(chrPos2 - chrPos) < maxproxydistance) {
                                allSNPsWithinMaxDistance.add(i);
                            }
                        }
                    }
                }
                // if there are any snps to test within maxdistance
                if (!allSNPsWithinMaxDistance.isEmpty()) {
                    loader.loadGenotypes(snpObj);
                    for (Integer i : allSNPsWithinMaxDistance) {
                        SNP snpObj2 = genotypeData.getSNPObject(i);
                        loader.loadGenotypes(snpObj2);
                        if (snpObj.getMAF() > 0 && snpObj2.getMAF() > 0) {
                            double r2 = ldCalc.getRSquared(snpObj, snpObj2, genotypeData, DetermineLD.RETURN_R_SQUARED, DetermineLD.INCLUDE_CASES_AND_CONTROLS, false);
                            if (!Double.isNaN(r2) && r2 >= proxyldthreshold) {
                                proxies.add(snpObj2.getName());
                            }
                        }
                        snpObj2.clearGenotypes();
                    }
                    snpObj.clearGenotypes();
                }
            }
            snpObj = null;
        }
        return proxies;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy