All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.metrics.MultiLevelCollector Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.metrics;

import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import org.broadinstitute.hellbender.exceptions.GATKException;

import java.util.*;

/**
 * MultiLevelCollector handles accumulating Metrics at different MetricAccumulationLevels(ALL_READS, SAMPLE, LIBRARY, READ_GROUP).
 * Based on the accumulationLevels and readGroup records passed to its constructor, MultiLevelCollector
 * will instantiate the number of PerUnitMetricCollector's needed to generate metrics for each of the levels provided.
 *
 * To Use:
 *
 * Instantiate a MultiLevelCollector and call setup(see thoughts about extending MultiLevelCollector below)
 * setup will create the underlying classes that will handle the accumulation level logic.
 * Pass all reads you wish to collect data against to MultiLevelCollector via the acceptRecord method
 * Call finish and use addAllLevelsToFile to add all of the metrics at each accumulation level to the given file.
 *
 * Extend MultiLevelCollector and implement makeArg and makeChildCollector
 * You will most likely want to make a class that extends PerUnitMetricCollector.  This class should do the work of keeping
 * track of values for one specific "accumulation unit" (e.g. for one library, or for one read group depending on what levels
 * you are accumulating at).
 *
 * If a record has any expensive calculations to be done (that don't need to be done differently depending
 * on what sample/library/read group the read is for) then create a container class for the results of these calculations and pass
 * this class as the ARGTYPE of both the PerUnitMetricCollector and MultiLevelCollector.  You can then do these calculations in the makeArg
 * method and they will only be done once per record.
 *
 * @param  The type of metrics being collected
 * @param  If there is are Histograms related to metrics of type  then  is the key value to these Histograms
 * @param  The type of argument passed to individual PerUnitMetricCollector (see SAMRecordMultilevelCollector and PerUnitMetricCollector)
 */
public abstract class MultiLevelCollector, ARGTYPE>  {

    public static final String UNKNOWN = "unknown";
    //The collector that will accept all records (allReads is NULL if !calculateAll)
    private PerUnitMetricCollector allReadCollector;

    //A list of Distributor that is at most length 4, 1 for each (ALL_READS, SAMPLE, LIBRARY, READ_GROUP) accumulation levels
    //these will be listed in the order in which their children would be added to a metric file
    private List outputOrderedDistributors;

    //Convert the current SAMRecord and the ReferenceSequence for that record into an ARGTYPE object
    //see accept record for use
    protected abstract ARGTYPE makeArg(final SAMRecord samRec, final ReferenceSequence refSeq);

    /**
     * Construct a PerUnitMetricCollector with the given arguments.
     * @param sample If aggregating by ALL_READS this will be null, otherwise the sample that will be used to identify
     *               this collector
     * @param library If aggregating by SAMPLE this will be null, otherwise the library that will be used to identify
     *               this collector
     * @param readGroup If aggregating by LIBRARY this will be null, otherwise the readGroup that will be used to identify
     *                  this collector
     * @return A PerUnitMetricCollector parameterized by the given arguments
     */
    protected abstract PerUnitMetricCollector makeChildCollector(final String sample, final String library, final String readGroup);

    //These are exposed here (rather than being encapsulated in the Distributor subclasses below in order
    //to provide subclasses with an explicit point to add initialization (specific to accumulation level) for
    //a PerUnitMetricCollector it is creating
    protected PerUnitMetricCollector makeAllReadCollector() {
        return makeChildCollector(null, null, null);
    }
    protected PerUnitMetricCollector makeSampleCollector(final SAMReadGroupRecord rg) {
        return makeChildCollector(rg.getSample(), null, null);
    }
    protected PerUnitMetricCollector makeLibraryCollector(final SAMReadGroupRecord rg) {
        return makeChildCollector(rg.getSample(), rg.getLibrary(), null);
    }
    protected PerUnitMetricCollector makeReadGroupCollector(final SAMReadGroupRecord rg) {
        return makeChildCollector(rg.getSample(), rg.getLibrary(), rg.getPlatformUnit());
    }

    /**
     * Distributors group PerUnitMetricCollectors based on a AccumulationLevel.  Their structure mimics
     * PerUnitMetricCollectors but instead of adding records to metrics they identify which
     * PerUnitMetricCollector should receive a specific record and distribute records on to the that collector
     *
     * There were will be 0 or 1 Distributors for each of the following MetriAcummulationLevels:
     * ALL_READS, SAMPLE, LIBRARY, READ_GROUP
     */
    private abstract class Distributor {
        //A Map mapping the key for a specific record (as determined by getKey) to the appropriate collector
        private final Map> collectors;

        //Given a SAMReadGroupRecord, return the key that identifies the collector for the corresponding SAMRecord
        protected abstract String getKey(final SAMReadGroupRecord rg);

        //Make a PerUnitMetricCollector for this given Distributor
        protected abstract PerUnitMetricCollector makeCollector(final SAMReadGroupRecord rg);

        protected abstract PerUnitMetricCollector makeUnknownCollector();

        public Distributor(final List rgRecs) {
            collectors = new LinkedHashMap<>();
            for(final SAMReadGroupRecord rg : rgRecs) {
                final String key = getKey(rg);
                if(!collectors.containsKey(key)) {
                    collectors.put(key, makeCollector(rg));
                }
            }
        }

        /** Call finish on each PerUnitMetricCollector in this Aggregate Collector */
        public void finish() {
            for(final PerUnitMetricCollector collector : collectors.values()) {
                collector.finish();
            }
        }

        /** Call acceptRecord(args) on the record collector identified by getKey */
        public void acceptRecord(final ARGTYPE args, final SAMReadGroupRecord rg) {

            String key = UNKNOWN;
            if(rg != null) {
                final String computedKey = getKey(rg);
                if(computedKey != null) {
                    key = computedKey;
                }
            }
            PerUnitMetricCollector collector = collectors.get(key);
            if (collector == null) {
                if (!UNKNOWN.equals(key)) {
                    throw new GATKException("Could not find collector for " + key);
                }
                collector = makeUnknownCollector();
                collectors.put(key, collector);
            }
            collector.acceptRecord(args);
        }

        /** Add all records to the MetricsFile passed in, this will happen in the order they were
         * found in the input ReadGroup records */
        public void addToFile(final MetricsFile file) {
            for(final PerUnitMetricCollector collector : collectors.values()) {
                collector.addMetricsToFile(file);
            }
        }
    }

    /** A dummy Distributor to handle the ALL_READS accumulation level.  No distribution is required
     * since there should only ever be one PerUnitMetricCollector for ALL_READS.
     */
    private class AllReadsDistributor extends Distributor {

        public AllReadsDistributor(final List rgRecs) {
            super(new ArrayList<>());
            makeCollector(null);
        }

        @Override
        protected String getKey(SAMReadGroupRecord rg) {
            return null;
        }

        @Override
        public void acceptRecord(final ARGTYPE args, final SAMReadGroupRecord rg) {
            allReadCollector.acceptRecord(args);
        }

        @Override
        protected PerUnitMetricCollector makeCollector(final SAMReadGroupRecord rg) {
            allReadCollector = makeAllReadCollector();
            return allReadCollector;
        }

        @Override
        protected PerUnitMetricCollector makeUnknownCollector() {
            throw new UnsupportedOperationException("Should not happen");
        }

        @Override
        public void finish() {
            allReadCollector.finish();
        }

        @Override
        public void addToFile(final MetricsFile file) {
            allReadCollector.addMetricsToFile(file);
        }
    }

    //Discriminates between records based on sample name, and calls acceptRecord on the appropriate PerUnitMetricCollectors
    private class SampleDistributor extends Distributor {
        public SampleDistributor(final List rgRecs) {
            super(rgRecs);
        }

        @Override
        protected String getKey(SAMReadGroupRecord rg) {
            return rg.getSample();
        }

        @Override
        protected PerUnitMetricCollector makeCollector(SAMReadGroupRecord rg) {
            return makeSampleCollector(rg);
        }

        @Override
        protected PerUnitMetricCollector makeUnknownCollector() {
            return makeChildCollector(UNKNOWN, null, null);
        }
    }

    //Discriminates between records based on library name, and calls acceptRecord on the appropriate PerUnitMetricCollectors
    private class LibraryDistributor extends Distributor {
        public LibraryDistributor(final List rgRecs) {
            super(rgRecs);
        }

        @Override
        protected String getKey(SAMReadGroupRecord rg) {
            return rg.getLibrary();
        }

        @Override
        protected PerUnitMetricCollector makeCollector(SAMReadGroupRecord rg) {
            return makeLibraryCollector(rg);
        }

        @Override
        protected PerUnitMetricCollector makeUnknownCollector() {
            return makeChildCollector(UNKNOWN, UNKNOWN, null);
        }
    }

    //Discriminates between records based on read group name, and calls acceptRecord on the appropriate PerUnitMetricCollectors
    private class ReadGroupCollector extends Distributor {
        public ReadGroupCollector(final List rgRecs) {
            super(rgRecs);
        }

        @Override
        protected String getKey(SAMReadGroupRecord rg) {
            return rg.getPlatformUnit();
        }

        @Override
        protected PerUnitMetricCollector makeCollector(SAMReadGroupRecord rg) {
            return makeReadGroupCollector(rg);
        }

        @Override
        protected PerUnitMetricCollector makeUnknownCollector() {
            return makeChildCollector(UNKNOWN, UNKNOWN, UNKNOWN);
        }
    }

    /**
     * Use an init method so that overloaded methods in subclasses can pass use parameters that are initialized in their constructor
     * @param accumulationLevels PerUnitMetricCollectors will only be created for the levels identified by accumulationLevels
     * @param samRgRecords PerUnitMetricCollectors will be created for each of the different samples, libraries, and
     *                     readGroups found in the records depending on the accumulationLevels provided
     */
    protected void setup(final Set accumulationLevels, final List samRgRecords) {
        outputOrderedDistributors = new ArrayList<>(4);
        if(accumulationLevels.contains(MetricAccumulationLevel.ALL_READS)) {
            outputOrderedDistributors.add(new AllReadsDistributor(samRgRecords));
        }
        if (accumulationLevels.contains(MetricAccumulationLevel.SAMPLE)) {
            outputOrderedDistributors.add(new SampleDistributor(samRgRecords));
        }

        if(accumulationLevels.contains(MetricAccumulationLevel.LIBRARY)) {
            outputOrderedDistributors.add(new LibraryDistributor(samRgRecords));
        }

        if(accumulationLevels.contains(MetricAccumulationLevel.READ_GROUP)) {
            outputOrderedDistributors.add(new ReadGroupCollector(samRgRecords));
        }
    }

    /**
     * Construct a argument of ARGTYPE using the given SAMRecord and ReferenceSequence then pass
     * this value to all collectors that should include this record
     */
    public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) {
        final ARGTYPE arg = makeArg(record, refSeq);

        for(final Distributor collector : outputOrderedDistributors) {
            collector.acceptRecord(arg, record.getReadGroup());
        }
    }

    /**
     * Call finish on all PerUnitMetricCollectors
     */
    public void finish() {
        for(final Distributor collector : outputOrderedDistributors) {
            collector.finish();
        }
    }

    /** Get the PerUnitMetricCollector that collects reads for all levels */
    public PerUnitMetricCollector getAllReadsCollector() {
        return allReadCollector;
    }

    /** Add all metrics to the given file in the following MetricAccumulationLevel order
     *  ALL_READS, SAMPLE, LIBRARY, READ_GROUP.
     */
    public void addAllLevelsToFile(final MetricsFile file) {
        for(final Distributor collector : outputOrderedDistributors) {
            collector.addToFile(file);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy