All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.coverage.DepthOfCoverageStats Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.coverage;

import org.broadinstitute.hellbender.utils.BaseUtils;

import java.util.*;

/**
 * A class for storing summarized coverage statistics for DepthOfCoverage.
 */
public class DepthOfCoverageStats {

    ////////////////////////////////////////////////////////////////////////////////////
    // STANDARD DATA
    ////////////////////////////////////////////////////////////////////////////////////

    private Map granularHistogramBySample; // holds the counts per each bin
    private Map totalCoverages; // holds total coverage per sample
    private int[] binLeftEndpoints; // describes the left endpoint for each bin
    private long[][] locusCoverageCounts; // holds counts of number of bases with >=X samples at >=Y coverage
    private boolean tabulateLocusCounts = false;
    private long nLoci; // number of loci seen
    private long totalDepthOfCoverage;
    private boolean includeDeletions;

    ////////////////////////////////////////////////////////////////////////////////////
    // TEMPORARY DATA ( not worth re-instantiating )
    ////////////////////////////////////////////////////////////////////////////////////

    private int[] locusHistogram; // holds a histogram for each locus; reset after each update() call
    private int totalLocusDepth; // holds the total depth of coverage for each locus; reset after each update() call

    ////////////////////////////////////////////////////////////////////////////////////
    // INITIALIZATION METHODS
    ////////////////////////////////////////////////////////////////////////////////////

    /**
     *
     * @param leftEndpoints Array of the left endpoints for desired coverage bins to track. See {@link}
     * @param includeDeletions whether to count deletions at a site as coverage in the total
     * @param dontComputeLocusTable if true then skip calculations for per-locus output tables
     */
    public DepthOfCoverageStats(int[] leftEndpoints, boolean includeDeletions, boolean dontComputeLocusTable) {
        this.binLeftEndpoints = leftEndpoints;
        this.granularHistogramBySample = new HashMap<>();
        this.totalCoverages = new HashMap<>();
        this.nLoci = 0;
        this.totalLocusDepth = 0;
        this.totalDepthOfCoverage = 0;
        this.includeDeletions = includeDeletions;
        if ( ! dontComputeLocusTable ) {
            initializeLocusCounts();
        }
    }

    // Adds to granularHistogramBySample a per-locus histogram for the given sample to track
    void initializeSample(String sample) {
        if ( granularHistogramBySample.containsKey(sample) ) {
            return;
        }
        // Note: we add 1 here because bin left endpoints has values for ever bin except for the first one (which covers everything below the defined lower bound)
        granularHistogramBySample.put(sample, new long[this.binLeftEndpoints.length+1]);
        totalCoverages.put(sample,0L);
    }

    // Create the per-locus coverage counting arrays and histogram
    void initializeLocusCounts() {
        // Note: we add 1 here because bin left endpoints has values for ever bin except for the first one (which covers everything below the defined lower bound)
        locusCoverageCounts = new long[granularHistogramBySample.size()][binLeftEndpoints.length+1];
        locusHistogram = new int[binLeftEndpoints.length+1];
        tabulateLocusCounts = true;
    }

    ////////////////////////////////////////////////////////////////////////////////////
    // UPDATE METHODS
    ////////////////////////////////////////////////////////////////////////////////////

    private void updateDepths(Map depthBySample) {
        int coverageThresholdIndex;
        for ( String sample : granularHistogramBySample.keySet() ) {
            if ( depthBySample.containsKey(sample) ) {
                coverageThresholdIndex = updateSample(sample,depthBySample.get(sample));
                totalLocusDepth += depthBySample.get(sample);
            } else {
                coverageThresholdIndex = updateSample(sample,0);
            }

            if ( tabulateLocusCounts ) {
                for ( int i = 0; i <= coverageThresholdIndex; i ++ ) {
                    locusHistogram[i]++;
                }
            }
        }
        updateLocusCounts(locusHistogram);

        nLoci++;
        totalDepthOfCoverage += totalLocusDepth;
        totalLocusDepth = 0;
    }

    public void update(Map countsBySample) {
        if ( countsBySample == null ) {
            this.updateDepths(new HashMap<>(1));
            return;
        }
        final HashMap depthBySample = new HashMap();
        for ( Map.Entry entry : countsBySample.entrySet() ) {
            int total = 0;
            int[] counts = entry.getValue();
            for ( byte base : BaseUtils.BASES_EXTENDED ) {
                if ( includeDeletions || base != BaseUtils.Base.D.base ) { // note basesAreEqual assigns TRUE to (N,D) as both have simple index -1
                    total += counts[BaseUtils.extendedBaseToBaseIndex(base)];
                }
            }
            depthBySample.put(entry.getKey(),total);
        }
        
        this.updateDepths(depthBySample);
    }

    // returns the index of the last bin that the given depth exceeds the left endpoint of
    private int updateSample(final String sample, final int depth) {
        totalCoverages.put(sample,totalCoverages.get(sample)+depth);

        long[] granularBins = granularHistogramBySample.get(sample);
        for ( int b = 0; b < binLeftEndpoints.length; b ++ ) {
            if ( depth < binLeftEndpoints[b] ) {
                granularBins[b]++;
                return b;
            }
        }

        granularBins[binLeftEndpoints.length]++; // greater than all left-endpoints
        return binLeftEndpoints.length;
    }

    public void merge(DepthOfCoverageStats newStats) {
        this.mergeSamples(newStats);
        if ( this.tabulateLocusCounts && newStats.tabulateLocusCounts ) {
            this.mergeLocusCounts(newStats.getLocusCounts());
        }
        nLoci += newStats.getTotalLoci();
        totalDepthOfCoverage += newStats.getTotalCoverage();
    }

    private void mergeSamples(DepthOfCoverageStats otherStats) {
        Map otherHistogram = otherStats.getHistograms();
        Map otherMeans = otherStats.getMeans();
        for ( String s : granularHistogramBySample.keySet() ) {
            long[] internalCounts = granularHistogramBySample.get(s);
            long[] externalCounts = otherHistogram.get(s);
            for ( int b = 0; b < internalCounts.length; b++ ) {
                internalCounts[b] += externalCounts[b];
            }

            this.totalCoverages.put(s, this.totalCoverages.get(s) + otherStats.totalCoverages.get(s));
        }
    }

    private void mergeLocusCounts( long[][] otherCounts ) {
        for ( int a = 0; a < locusCoverageCounts.length; a ++ ) {
            for ( int b = 0; b < locusCoverageCounts[0].length; b ++ ) {
                locusCoverageCounts[a][b] += otherCounts[a][b];
            }
        }
    }

    /*
     * Update locus counts -- takes an array in which the number of samples
     * with depth ABOVE [i] is held. So if the bin left endpoints were 2, 5, 10
     * then we'd have an array that represented:
     * [# samples with depth 0 - inf], [# samples with depth 2 - inf],
     * [# samples with depth 5 - inf], [# samples with depth 10-inf];
     *
     * this is
     * @argument cumulativeSamplesByDepthBin - see above
     */
    private void updateLocusCounts(int[] cumulativeSamplesByDepthBin) {
        if ( tabulateLocusCounts ) {
            for ( int bin = 0; bin < cumulativeSamplesByDepthBin.length; bin ++ ) {
                int numSamples = cumulativeSamplesByDepthBin[bin];
                for ( int i = 0; i < numSamples; i ++ ) {
                    locusCoverageCounts[i][bin]++;
                }

                cumulativeSamplesByDepthBin[bin] = 0; // reset counts in advance of next update()
            }
        }
    }

    ////////////////////////////////////////////////////////////////////////////////////
    // ACCESSOR METHODS
    ////////////////////////////////////////////////////////////////////////////////////

    // Return the per-sample histograms of depths
    public Map getHistograms() {
        return granularHistogramBySample;
    }

    // Return array with the counts of number of bases with >=X samples at >=Y coverage
    public long[][] getLocusCounts() {
        return locusCoverageCounts;
    }

    // Return the key used to interpret the histogram bin edges
    public int[] getEndpoints() {
        return binLeftEndpoints;
    }

    public Map getMeans() {
        HashMap means = new HashMap();
        for ( String s : granularHistogramBySample.keySet() ) {
            means.put(s,( (double)totalCoverages.get(s))/( (double) nLoci ));
        }

        return means;
    }

    // Returns the total coverage (sum of coverage at each base) per sample
    public Map getTotals() {
        return totalCoverages;
    }

    // Returns the number of loci counted
    public long getTotalLoci() {
        return nLoci;
    }

    /**
     * @return  An unordered set of all the samples covered by this stats object
     */
    public Set getAllSamples() {
        return granularHistogramBySample.keySet();
    }

    // Returns the mean of coverage for all loci seen
    public double getTotalMeanCoverage() {
        return ( (double) totalDepthOfCoverage )/ ( (double) nLoci );
    }

    // Returns the total coverage across all samples (sum of coverage at each base)
    public long getTotalCoverage() {
        return totalDepthOfCoverage;
    }

    /**
     * Return a list of the cumulative depth counts as a perportion of total number of loci
     * @param sample
     * @return
     */
    public double[] getCoverageProportions(String sample) {
        long[] hist = granularHistogramBySample.get(sample);
        double[] distribution = new double[hist.length];
        long count = 0;
        for ( int i = hist.length-1; i >= 0; i -- ) {
            count += hist[i];
            distribution[i] = ( (double) count) / nLoci;
        }

        return distribution;
    }

    public int value2bin(int value) {
        for ( int index = 0; index < binLeftEndpoints.length; index++ ) {
            if ( binLeftEndpoints[index] > value ) {
                return index;
            }
        }

        return binLeftEndpoints.length-1;
    }

    /**
     * Returns a count of the number of loci summarized in this object
     */
    public long getNumberOfLociCovered() {
        return nLoci;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy