Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.AbstractLocusInfo;
import htsjdk.samtools.util.AbstractRecordAndOffset;
import htsjdk.samtools.util.EdgingRecordAndOffset;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.SequenceUtil;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.HashMap;
/**
* Class represents fast algorithm for collecting data from AbstractLocusInfo
* with a list of aligned {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects. According to the algorithm
* we receive only two {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects for each alignment block of a read:
* one for the start of block and one for the end. When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset}
* with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#BEGIN}, all information from the alignment block is accumulated in the collector,
* read name is added to a map holding the names of processed reads for detecting overlapping positions.
* When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset} with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#END},
* the read name is removed from the map with names of processed reads.
* @author [email protected], EPAM Systems, Inc.
*/
public class FastWgsMetricsCollector extends AbstractWgsMetricsCollector {
/**
* Index of the sequence of the previous processed AbstractLocusInfo
*/
private int previousSequenceIndex;
/**
* Manager for {@link this#pileupSize} Counter.
*/
private final CounterManager counterManager;
/**
* Counter, accumulating the data on coverage for the calculation of base quality histogram.
*/
private final CounterManager.Counter pileupSize;
/**
* Counter, accumulating the data on unfiltered coverage (includes all but quality 2 bases)
* for the calculation of theoretical het sensitivity.
*/
private final CounterManager.Counter unfilteredDepthSize;
/**
* Map, holding information on currently processed reads, that possibly have overlapping regions.
*/
private Map> readsNames;
/**
* Determines the size of created {@link picard.analysis.CounterManager.Counter} objects. The bigger {@link picard.analysis.CounterManager.Counter} objects
* are created, the less rebasing in {@link picard.analysis.CounterManager.Counter} will occur.
*/
private final int ARRAY_SIZE_PER_READ_LENGTH = 2000;
/**
* Creates a collector and initializes the inner data structures
*
* @param collectWgsMetrics CollectWgsMetrics, that creates this collector
* @param coverageCap coverage cap
*/
public FastWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, int coverageCap, final IntervalList intervals) {
super(collectWgsMetrics, coverageCap, intervals);
this.previousSequenceIndex = -1;
this.counterManager = new CounterManager(collectWgsMetrics.READ_LENGTH * ARRAY_SIZE_PER_READ_LENGTH, collectWgsMetrics.READ_LENGTH);
this.pileupSize = counterManager.newCounter();
this.unfilteredDepthSize = counterManager.newCounter();
}
@Override
public void addInfo(final AbstractLocusInfo info, final ReferenceSequence ref, boolean referenceBaseN) {
prepareCollector(info);
for (final EdgingRecordAndOffset record : info.getRecordAndOffsets()) {
final String readName = record.getReadName();
Optional> recordsAndOffsetsForName = Optional.ofNullable((Set)readsNames.get(readName));
if (record.getType() == EdgingRecordAndOffset.Type.BEGIN) {
processRecord(info.getPosition(), ref, record, recordsAndOffsetsForName.orElse(new HashSet<>()));
} else {
recordsAndOffsetsForName.ifPresent(
edgingRecordAndOffsets -> removeRecordFromMap(record,
edgingRecordAndOffsets));
}
}
if (!referenceBaseN) {
final int readNamesSize = pileupSize.get(info.getPosition());
final int highQualityDepth = Math.min(readNamesSize, coverageCap);
if (highQualityDepth < readNamesSize) {
basesExcludedByCapping += readNamesSize - coverageCap;
}
highQualityDepthHistogramArray[highQualityDepth]++;
unfilteredDepthHistogramArray[unfilteredDepthSize.get(info.getPosition())]++;
}
}
private void processRecord(int position, ReferenceSequence ref, EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
long processedLoci = counter;
readsNames.put(record.getReadName(), recordsAndOffsetsForName);
final byte[] qualities = record.getBaseQualities();
final byte[] bases = record.getRecord().getReadBases();
for (int i = 0; i < record.getLength(); i++) {
final int index = i + position;
if (isReferenceBaseN(index, ref)) {
continue;
}
final byte quality = qualities[i + record.getOffset()];
if (quality <= 2) {
basesExcludedByBaseq++;
} else {
if (unfilteredDepthSize.get(index) < coverageCap) {
unfilteredBaseQHistogramArray[quality]++;
unfilteredDepthSize.increment(index);
}
if (quality < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(bases[i + record.getOffset()])){
basesExcludedByBaseq++;
} else {
final int bsq = excludeByQuality(recordsAndOffsetsForName, index);
if (recordsAndOffsetsForName.size() - bsq > 0) {
basesExcludedByOverlap++;
} else {
pileupSize.increment(index);
}
}
}
if (isTimeToStop(++processedLoci)) {
break;
}
}
recordsAndOffsetsForName.add(record);
}
private void removeRecordFromMap(EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
if (recordsAndOffsetsForName.size() == 1) {
readsNames.remove(record.getReadName());
} else {
recordsAndOffsetsForName.remove(record.getStart());
}
}
/**
* Prepares the accumulator objects to process a new {@link htsjdk.samtools.util.AbstractLocusInfo}.
* If we switch to a new sequence, all accumulators are cleared.
*
* @param info the next {@link htsjdk.samtools.util.AbstractLocusInfo} to process
*/
private void prepareCollector(AbstractLocusInfo info) {
if (readsNames == null) {
readsNames = new HashMap<>();
}
if (previousSequenceIndex != info.getSequenceIndex()) {
readsNames.clear();
counterManager.clear();
previousSequenceIndex = info.getSequenceIndex();
}
counterManager.checkOutOfBounds(info.getPosition());
}
private int excludeByQuality(final Set setForName, int position) {
int bsq = 0;
for (EdgingRecordAndOffset recordAndOffset : setForName) {
if (position - recordAndOffset.getRefPos() >= recordAndOffset.getLength()
|| recordAndOffset.getBaseQuality(position) < collectWgsMetrics.MINIMUM_BASE_QUALITY) {
bsq++;
}
}
return bsq;
}
}