All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.analysis.FastWgsMetricsCollector Maven / Gradle / Ivy

There is a newer version: 3.2.0
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2016 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package picard.analysis;

import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.AbstractLocusInfo;
import htsjdk.samtools.util.AbstractRecordAndOffset;
import htsjdk.samtools.util.EdgingRecordAndOffset;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.SequenceUtil;

import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.HashMap;

/**
 * Class represents fast algorithm for collecting data from AbstractLocusInfo
 * with a list of aligned {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects. According to the algorithm
 * we receive only two {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects for each alignment block of a read:
 * one for the start of block and one for the end. When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset}
 * with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#BEGIN}, all information from the alignment block is accumulated in the collector,
 * read name is added to a map holding the names of processed reads for detecting overlapping positions.
 * When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset} with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#END},
 * the read name is removed from the map with names of processed reads.
 * @author [email protected], EPAM Systems, Inc. 
 */

public class FastWgsMetricsCollector extends AbstractWgsMetricsCollector {

    /**
     * Index of the sequence of the previous processed AbstractLocusInfo
     */
    private int previousSequenceIndex;

    /**
     * Manager for {@link this#pileupSize} Counter.
     */
    private final CounterManager counterManager;
    /**
     * Counter, accumulating the data on coverage for the calculation of base quality histogram.
     */
    private final CounterManager.Counter pileupSize;
    /**
     * Counter, accumulating the data on unfiltered coverage (includes all but quality 2 bases)
     * for the calculation of theoretical het sensitivity.
     */
    private final CounterManager.Counter unfilteredDepthSize;

    /**
     * Map, holding information on currently processed reads, that possibly have overlapping regions.
     */
    private Map> readsNames;

    /**
     * Determines the size of created {@link picard.analysis.CounterManager.Counter} objects. The bigger {@link picard.analysis.CounterManager.Counter} objects
     * are created, the less rebasing in {@link picard.analysis.CounterManager.Counter} will occur.
     */
    private final int ARRAY_SIZE_PER_READ_LENGTH = 2000;

    /**
     * Creates a collector and initializes the inner data structures
     *
     * @param collectWgsMetrics CollectWgsMetrics, that creates this collector
     * @param coverageCap       coverage cap
     */
    public FastWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, int coverageCap, final IntervalList intervals) {
        super(collectWgsMetrics, coverageCap, intervals);
        this.previousSequenceIndex = -1;
        this.counterManager = new CounterManager(collectWgsMetrics.READ_LENGTH * ARRAY_SIZE_PER_READ_LENGTH, collectWgsMetrics.READ_LENGTH);
        this.pileupSize = counterManager.newCounter();
        this.unfilteredDepthSize = counterManager.newCounter();
    }

    @Override
    public void addInfo(final AbstractLocusInfo info, final ReferenceSequence ref, boolean referenceBaseN) {
        prepareCollector(info);
        for (final EdgingRecordAndOffset record : info.getRecordAndOffsets()) {
            final String readName = record.getReadName();
            Optional> recordsAndOffsetsForName = Optional.ofNullable((Set)readsNames.get(readName));
            if (record.getType() == EdgingRecordAndOffset.Type.BEGIN) {
                processRecord(info.getPosition(), ref, record, recordsAndOffsetsForName.orElse(new HashSet<>()));
            } else {
                recordsAndOffsetsForName.ifPresent(
                        edgingRecordAndOffsets -> removeRecordFromMap(record,
                                edgingRecordAndOffsets));
            }
        }
        if (!referenceBaseN) {
            final int readNamesSize = pileupSize.get(info.getPosition());
            final int highQualityDepth = Math.min(readNamesSize, coverageCap);
            if (highQualityDepth < readNamesSize) {
                basesExcludedByCapping += readNamesSize - coverageCap;
            }
            highQualityDepthHistogramArray[highQualityDepth]++;
            unfilteredDepthHistogramArray[unfilteredDepthSize.get(info.getPosition())]++;
        }
    }

    private void processRecord(int position, ReferenceSequence ref, EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
        long processedLoci = counter;
        readsNames.put(record.getReadName(), recordsAndOffsetsForName);
        final byte[] qualities = record.getBaseQualities();
        final byte[] bases = record.getRecord().getReadBases();
        for (int i = 0; i < record.getLength(); i++) {
            final int index = i + position;
            if (isReferenceBaseN(index, ref)) {
                continue;
            }
            final byte quality = qualities[i + record.getOffset()];
            if (quality <= 2) {
                basesExcludedByBaseq++;
            } else {
                if (unfilteredDepthSize.get(index) < coverageCap) {
                    unfilteredBaseQHistogramArray[quality]++;
                    unfilteredDepthSize.increment(index);
                }
                if (quality < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(bases[i + record.getOffset()])){
                    basesExcludedByBaseq++;
                } else {
                    final int bsq = excludeByQuality(recordsAndOffsetsForName, index);
                    if (recordsAndOffsetsForName.size() - bsq > 0) {
                        basesExcludedByOverlap++;
                    } else {
                        pileupSize.increment(index);
                    }
                }
            }
            if (isTimeToStop(++processedLoci)) {
                break;
            }
        }
        recordsAndOffsetsForName.add(record);
    }

    private void removeRecordFromMap(EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
        if (recordsAndOffsetsForName.size() == 1) {
            readsNames.remove(record.getReadName());
        } else {
            recordsAndOffsetsForName.remove(record.getStart());
        }
    }

    /**
     * Prepares the accumulator objects to process a new {@link htsjdk.samtools.util.AbstractLocusInfo}.
     * If we switch to a new sequence, all accumulators are cleared.
     *
     * @param info the next {@link htsjdk.samtools.util.AbstractLocusInfo} to process
     */
    private void prepareCollector(AbstractLocusInfo info) {
        if (readsNames == null) {
            readsNames = new HashMap<>();
        }
        if (previousSequenceIndex != info.getSequenceIndex()) {
            readsNames.clear();
            counterManager.clear();
            previousSequenceIndex = info.getSequenceIndex();
        }
        counterManager.checkOutOfBounds(info.getPosition());
    }

    private int excludeByQuality(final Set setForName, int position) {
        int bsq = 0;
        for (EdgingRecordAndOffset recordAndOffset : setForName) {
            if (position - recordAndOffset.getRefPos() >= recordAndOffset.getLength()
                    || recordAndOffset.getBaseQuality(position) < collectWgsMetrics.MINIMUM_BASE_QUALITY) {
                bsq++;
            }
        }
        return bsq;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy