All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eqtlmappingpipeline.metaqtl4.MetaQTL4 Maven / Gradle / Ivy

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package eqtlmappingpipeline.metaqtl4;

import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.log4j.Logger;
import org.molgenis.genotype.RandomAccessGenotypeData;
import org.molgenis.genotype.variant.GeneticVariant;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Pair;
import umcg.genetica.containers.Triple;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.util.ChrAnnotation;
import umcg.genetica.math.stats.Correlation;

/**
 * MetaQTL4 - Experimental Skunkworx edition
 *
 * @author harmjan
 */
public class MetaQTL4 {

    private Set traitsToInclude = null;
    private Set variantsToInclude = null;
    private Set> genotypeTraitCombinations = null;
    private final MetaQTL4Settings m_settings;
    private MetaQTL4TraitAnnotation traitAnnotation;
    private MetaQTL4Dataset[] datasets;
    private Integer[][] traitIndex;
    private ArrayList availableTraits;
    private TObjectIntHashMap availableTraitsHash; //int as value
    private GeneticVariant[][] geneticVariantIndex;
    private static final Logger LOG = Logger.getLogger(MetaQTL4.class);

    public MetaQTL4(MetaQTL4Settings settings) throws IOException, Exception {
        LOG.info("WARNING: MetaQTL4 is experimental code!");
        m_settings = settings;
        initialize();
        run();
    }

    public MetaQTL4(String settings, String replaceText, String testToReplaceWith) throws IOException, Exception {
        LOG.warn("WARNING: MetaQTL4 is experimental code!");
        m_settings = new MetaQTL4Settings(settings, replaceText, testToReplaceWith);
        initialize();
        run();
    }

    public final void initialize() throws IOException {

        // load the probe annotation (only for the platforms that are loaded)..
        HashSet platforms = new HashSet();
        for (int d = 0; d < m_settings.getNrDatasets(); d++) {
            platforms.add(m_settings.getDatasetSettings().get(d).getTraitPlatform());
        }
        traitAnnotation = new MetaQTL4TraitAnnotation(m_settings.getProbeAnnotationFile(), platforms);

        if (m_settings.getConfineProbeFile() != null) {
            if (!Gpio.existsAndReadable(m_settings.getConfineProbeFile())) {
                throw new IOException("Failed to read file with probes to include: " + m_settings.getConfineProbeFile().getAbsolutePath());
            }
            traitsToInclude = loadMetaTraitList(m_settings.getConfineProbeFile());
        }

        if (m_settings.getConfineSNPFile() != null) {
            if (!Gpio.existsAndReadable(m_settings.getConfineSNPFile())) {
                throw new IOException("Failed to read file with SNPs to include: " + m_settings.getConfineSNPFile().getAbsolutePath());
            }
            TextFile tf = new TextFile(m_settings.getConfineSNPFile(), TextFile.R);
            variantsToInclude = tf.readAsSet(0, TextFile.tab);
            tf.close();
        }

        // load list of specific SNP-trait combinations
        if (m_settings.getSNPProbeConfineFile() != null) {
            if (!Gpio.existsAndReadable(m_settings.getSNPProbeConfineFile())) {
                throw new IOException("Failed to read file with SNP Probe combinations to include: " + m_settings.getSNPProbeConfineFile().getAbsolutePath());
            }
            Triple, HashSet, HashSet>> combinations = loadgenotypeMarkerCombinations(m_settings.getSNPProbeConfineFile());
            variantsToInclude = combinations.getLeft();
            traitsToInclude = combinations.getMiddle();
            genotypeTraitCombinations = combinations.getRight();
        }

        // PROBE FILTER...
        // TODO: there is one exception for the probe filter, 
        // which is when all the datasets are on the same platform. 
        // will implement this at a later stage, or not at all.
        LOG.info("Running probe filter");
        runProbeFilter();

        // load the datasets..
        datasets = new MetaQTL4Dataset[m_settings.getNrDatasets()];
        if (datasets.length == 0) {
            throw new IllegalStateException("Error: no dataset information provided.");
        }
        for (int d = 0; d < datasets.length; d++) {
            HashSet platformSpecificProbesToInclude = null;
            if (traitsToInclude != null) {
                String platform = m_settings.getDatasetSettings().get(d).getTraitPlatform();
                Integer platformId = traitAnnotation.getPlatformId(platform);
                platformSpecificProbesToInclude = new HashSet();
                for (MetaQTL4MetaTrait metaprobe : traitsToInclude) {
                    String platformProbe = metaprobe.getPlatformIds()[platformId];
                    if (platformProbe != null) {
                        platformSpecificProbesToInclude.add(platformProbe);
                    }
                }
            }

            datasets[d] = new MetaQTL4Dataset(m_settings.getDatasetSettings().get(d), platformSpecificProbesToInclude);
            if (m_settings.isPerformParametricAnalysis()) {
                datasets[d].rankTraitData(m_settings.isEqualRankForTies());
            }
        }

        // create probe index
        LOG.info("Creating trait index");
        createTraitIndex();

        // create SNP index
        LOG.info("Create SNP index");
        createSNPIndex();

        // create lookup table for ZScores 
        // TODO: put this in a nice test encapsulation thing
        int nrSamples = 0;
        for (MetaQTL4Dataset dataset : datasets) {
            nrSamples += dataset.getGenotypeToTraitCouplingInt().length;
        }
        LOG.info("Meta-analysis will have " + nrSamples + " samples");
        Correlation.correlationToZScore(nrSamples);
    }

    public final void run() {
        // create threadpool
        LOG.info("Running software!");
        int nrPermutations = m_settings.getNrPermutationsFDR();

        // initialize random seed array
        long[] randomizationSeeds = new long[nrPermutations];
        Random rand = new Random();
        for (int permutation = 0; permutation < nrPermutations; permutation++) {
            randomizationSeeds[permutation] = rand.nextLong();
        }

        // create result thread
        ExecutorService resultPool = Executors.newFixedThreadPool(1);
        CompletionService resultPoolService = new ExecutorCompletionService(resultPool);

        // run threads
        int bufferSize = 100;
        int nrThreads = m_settings.getNrThreads();
        ExecutorService threadPool = Executors.newFixedThreadPool(nrThreads);
        CompletionService pool = new ExecutorCompletionService(threadPool);
        int start = 0;
        int stop = geneticVariantIndex.length;
        MetaQTL4ExecutionTask task = new MetaQTL4ExecutionTask(nrThreads, randomizationSeeds, availableTraits, availableTraitsHash, datasets, geneticVariantIndex, m_settings, traitAnnotation, traitIndex, traitsToInclude, variantsToInclude, start, stop, bufferSize, resultPoolService);
        
//        MetaQTL4CorrelationTask task = new MetaQTL4CorrelationTask(nrThreads, distributionSize, randomizationSeeds, availableTraits, availableTraitsHash, datasets, geneticVariantIndex, m_settings, traitAnnotation, traitIndex, traitsToInclude, variantsToInclude, 1);
        pool.submit(task);

        int returned = 0;
        while (returned < nrThreads) {
            try {
                Boolean result = pool.take().get();
                if (result != null) {
                    if(result){
                    returned++;    
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // close threadpool
        threadPool.shutdown();

        // perform FDR estimation
        // shutdown
    }

    private Set loadMetaTraitList(File location) throws IOException {
        TextFile tf = new TextFile(location, TextFile.R);
        Set list = tf.readAsSet(0, TextFile.tab);

        Set traits = new HashSet();
        for (String s : list) {
            MetaQTL4MetaTrait t = traitAnnotation.getMetaTraitNameToObj().get(s);
            if (t != null) {
                traits.add(t);
            }
        }

        tf.close();
        return traits;
    }

    private Triple, HashSet, HashSet>> loadgenotypeMarkerCombinations(File file) throws IOException {
        HashSet> pairs = new HashSet>();
        HashSet snps = new HashSet();
        HashSet probes = new HashSet();
        TextFile tf = new TextFile(file, TextFile.R);
        String[] elems = tf.readLineElems(TextFile.tab);
        while (elems != null) {
            String snp = elems[0];
            String probe = elems[1];

            MetaQTL4MetaTrait t = traitAnnotation.getMetaTraitNameToObj().get(probe);
            if (t != null) {
                Pair p = new Pair(snp, t);
                pairs.add(p);
                snps.add(snp);
                probes.add(t);
            }
            elems = tf.readLineElems(TextFile.tab);
        }
        tf.close();

        return new Triple, HashSet, HashSet>>(snps, probes, pairs);
    }

    private void runProbeFilter() {
        // TODO: improve this code..
        // if we confine to a single chromosome, throw out all the other probes...
        if (m_settings.getConfineToProbesThatMapToChromosome() != null) {
            HashSet finalTraitList = new HashSet();
            if (traitsToInclude != null) {
                for (MetaQTL4MetaTrait metaprobe : traitsToInclude) {
                    String chr = metaprobe.getChr();
                    String chrToInclude = "" + m_settings.getConfineToProbesThatMapToChromosome();
                    if (chr.equals(chrToInclude) && (!m_settings.isExpressionDataLoadOnlyProbesThatMapToChromosome() || (m_settings.isExpressionDataLoadOnlyProbesThatMapToChromosome() && MetaQTL4MetaTrait.mapsToChromosome()))) {
                        finalTraitList.add(metaprobe);
                    }
                }
            } else {
                MetaQTL4MetaTrait[] metaProbes = traitAnnotation.getMetatraits().toArray(new MetaQTL4MetaTrait[0]);
                for (MetaQTL4MetaTrait metaprobe : metaProbes) {
                    String chr = metaprobe.getChr();
                    String chrToInclude = "" + m_settings.getConfineToProbesThatMapToChromosome();
                    if (chr.equals(chrToInclude) && (!m_settings.isExpressionDataLoadOnlyProbesThatMapToChromosome() || (m_settings.isExpressionDataLoadOnlyProbesThatMapToChromosome() && MetaQTL4MetaTrait.mapsToChromosome()))) {
                        finalTraitList.add(metaprobe);
                    }
                }
            }
            traitsToInclude = finalTraitList;

            if (genotypeTraitCombinations != null) {
                // find out whether there are some snp-trait combo's that we cannot test now..
                HashSet> finalgenotypetraitpairs = new HashSet>();
                for (Pair p : genotypeTraitCombinations) {
                    MetaQTL4MetaTrait probe = p.getRight();
                    if (traitsToInclude.contains(probe)) {
                        finalgenotypetraitpairs.add(p);
                    }
                }
                genotypeTraitCombinations = finalgenotypetraitpairs;
            }
        }

        if (traitsToInclude != null && traitsToInclude.isEmpty()) {
            throw new IllegalStateException("No traits remain after filtering.");
        }

        if (genotypeTraitCombinations != null && genotypeTraitCombinations.isEmpty()) {
            throw new IllegalStateException("No snp-trait combinations remain after filtering.");
        }
    }

    // create a map from metaProbeId to dataset probes
    private void createTraitIndex() {
        // link them together in an index
        HashSet tmpAvailableTraits = new HashSet();
        for (int d = 0; d < datasets.length; d++) {
            String platform = m_settings.getDatasetSettings().get(d).getTraitPlatform();
            Integer platformId = traitAnnotation.getPlatformId(platform);
            String[] availableTraitsInDataset = datasets[d].getTraits();
            LOG.info(m_settings.getDatasetSettings().get(d).getName() + " has " + availableTraitsInDataset.length + " traits on platform " + platform + " (" + platformId + ")");
            for (String trait : availableTraitsInDataset) {
                MetaQTL4MetaTrait metatrait = traitAnnotation.getTraitForPlatformId(platformId, trait);
                if (metatrait != null) {
                    String chr = metatrait.getChr();
                    if (m_settings.isCisAnalysis() && m_settings.isTransAnalysis()) {
                        // include all traits when performing a cis+trans analysis
                        tmpAvailableTraits.add(metatrait);
                    } else {
                        // otherwise, only include probes with a proper  annotation.
                        // should we include ChrY traits?
                        byte chrByte = ChrAnnotation.parseChr(chr);
                        if (chr.equals("X") || (chrByte < 23 && chrByte > 0)) {
                            tmpAvailableTraits.add(metatrait);
                        }
                    }
                }
            }
        }

        availableTraits = new ArrayList(tmpAvailableTraits);
        availableTraitsHash = new TObjectIntHashMap();
        traitIndex = new Integer[datasets.length][availableTraits.size()];

        for (int ds = 0; ds < datasets.length; ds++) {
            Integer platformId = traitAnnotation.getPlatformId(m_settings.getDatasetSettings().get(ds).getTraitPlatform());
            for (int i = 0; i < availableTraits.size(); i++) {
                MetaQTL4MetaTrait trait = availableTraits.get(i);
                availableTraitsHash.put(trait, i);
                String probeName = trait.getPlatformIds()[platformId];
                if (probeName != null) {
                    traitIndex[ds][i] = datasets[ds].getTraitId(probeName);
                }
            }
        }

        LOG.info(availableTraits.size() + " traits available to test.");

    }

    private void createSNPIndex() {

        if (traitIndex == null) {
            createTraitIndex();
        }

        // if this is a cis-eQTL analysis, get all the unique variants within the probes vicinity
        LOG.info("Indexing SNPs");
        int distance = m_settings.getCiseQTLAnalysMaxSNPProbeMidPointDistance();
        Set> uniquePositions = new HashSet>();

        // at a later stage, we may want to include gene-begin and gene-end as well..
        if (m_settings.isCisAnalysis() && !m_settings.isTransAnalysis()) {
            LOG.info("Performing cis-eQTL analysis, so keeping only SNPs within " + m_settings.getCiseQTLAnalysMaxSNPProbeMidPointDistance());
            LOG.info("");
            int tctr = 0;
            HashMap, Integer> allPositions = new HashMap, Integer>();
            for (int d = 0; d < datasets.length; d++) {
                RandomAccessGenotypeData data = datasets[d].getGenotypeData();
                ProgressBar pb = new ProgressBar(availableTraits.size(), "Dataset " + d);
                for (MetaQTL4MetaTrait t : availableTraits) {
                    String chr = t.getChr();
                    int midpoint = t.getChrMidpoint();

                    Iterable variants = data.getVariantsByRange(chr, midpoint - distance, midpoint + distance);
                    for (GeneticVariant variant : variants) {
                        String primaryId = variant.getPrimaryVariantId();
                        if (variantsToInclude == null || variantsToInclude.contains(primaryId)) {
                            // we don't need to check the variant chromosome.. This was done already when creating the trait index
                            Pair position = new Pair(variant.getSequenceName(), variant.getStartPos());
                            Integer ctr = allPositions.get(position);
                            if (ctr == null) {
                                ctr = 1;
                            } else {
                                ctr++;
                            }
                            allPositions.put(position, ctr);
                        }
                    }

                    pb.iterate();
                    tctr++;
                }
                pb.close();
            }

            LOG.info(allPositions.size() + " variants amongst all datasets.");

            if (m_settings.getConfineSNPsToSNPsPresentInAllDatasets() != null && m_settings.getConfineSNPsToSNPsPresentInAllDatasets()) {
                Set> tmpPositions = new HashSet>();
                for (Pair pair : allPositions.keySet()) {
                    Integer ctr = allPositions.get(pair);
                    if (ctr == datasets.length) {
                        // include snp
                        tmpPositions.add(pair);
                    }
                }
                uniquePositions = tmpPositions;
            } else {
                uniquePositions = allPositions.keySet();
            }
            allPositions = null;

        } else { // if cistrans or trans.. include all the SNPs..
            // per chromosome.. (for huge datasets, this saves alot of Integer objects)
            for (int i = 0; i < 23; i++) {
                String chr = "" + i;
                if (i == 23) {
                    chr = "X";
                }

                // get all the unique positions for this chromosome.
                HashMap allPositions = new HashMap();
                for (int d = 0; d < datasets.length; d++) {
                    RandomAccessGenotypeData data = datasets[d].getGenotypeData();
                    Iterable variants = data.getSequenceGeneticVariants(chr);
                    for (GeneticVariant variant : variants) {
                        String primaryId = variant.getPrimaryVariantId();
                        if (variantsToInclude == null || variantsToInclude.contains(primaryId)) {
                            // this time we do need to check the chromosome, etc
                            String variantSequence = variant.getSequenceName();
                            if (variantSequence.equals(chr)) {
                                Integer position = variant.getStartPos();
                                if (position > 0) {
                                    Integer ctr = allPositions.get(position);
                                    if (ctr == null) {
                                        ctr = 1;
                                    } else {
                                        ctr++;
                                    }
                                    allPositions.put(position, ctr);
                                }
                            }
                        }
                    }
                }

                for (Integer position : allPositions.keySet()) {
                    Integer ctr = allPositions.get(position);
                    if (m_settings.getConfineSNPsToSNPsPresentInAllDatasets() != null
                            && (m_settings.getConfineSNPsToSNPsPresentInAllDatasets() && ctr == datasets.length) || (!m_settings.getConfineSNPsToSNPsPresentInAllDatasets())) {
                        uniquePositions.add(new Pair(chr, position));
                    }
                }
            }
        }
        // ready to index
        int numberFinalPositions = uniquePositions.size();
        if (numberFinalPositions == 0) {
            System.err.println("Error: no SNPs found to test");
        }
        LOG.info("Creating final index");
        geneticVariantIndex = new GeneticVariant[datasets.length][numberFinalPositions];
        ProgressBar pb2 = new ProgressBar(datasets.length * numberFinalPositions);
        for (int dataset = 0; dataset < datasets.length; dataset++) {
            int ctr = 0;
            RandomAccessGenotypeData data = datasets[dataset].getGenotypeData();
            for (Pair pair : uniquePositions) {
                geneticVariantIndex[dataset][ctr] = data.getSnpVariantByPos(pair.getLeft(), pair.getRight());
                ctr++;
                pb2.iterate();
            }
        }
        pb2.close();
        LOG.info("Done");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy