
org.broadinstitute.hellbender.utils.downsampling.MutectDownsampler Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.utils.downsampling;
import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import java.util.*;
public final class MutectDownsampler extends ReadsDownsampler {
private static final int SUSPICIOUS_MAPPING_QUALITY = 50;
private final MutableInt suspiciousReadCount;
private final int maxSuspiciousReadsPerStride;
private boolean rejectAllReadsInStride;
private final int stride;
private final int maxCoverage;
private final List pendingReads;
private List finalizedReads;
private GATKRead firstReadInStride;
/**
* @param maxReadsPerAlignmentStart Maximum number of reads per alignment start position. Must be > 0
* @param stride Length in bases constituting a single pool of reads to downsample
*/
public MutectDownsampler(final int maxReadsPerAlignmentStart,
final int maxSuspiciousReadsPerAlignmentStart,
final int stride) {
// convert coverage per base to coverage per stride
maxCoverage = maxReadsPerAlignmentStart <= 0 ? Integer.MAX_VALUE : (maxReadsPerAlignmentStart * stride);
this.stride = ParamUtils.isPositive(stride, "stride must be > 0");
maxSuspiciousReadsPerStride = maxSuspiciousReadsPerAlignmentStart <= 0 ? Integer.MAX_VALUE : stride * ParamUtils.isPositive(maxSuspiciousReadsPerAlignmentStart, "maxSuspiciousReadsPerAlignmentStart must be > 0");
pendingReads = new ArrayList<>();
finalizedReads = new ArrayList<>();
rejectAllReadsInStride = false;
suspiciousReadCount = new MutableInt(0);
clearItems();
resetStats();
}
@Override
public void submit( final GATKRead newRead ) {
Utils.nonNull(newRead);
if (ReadUtils.readHasNoAssignedPosition(newRead)) {
finalizedReads.add(newRead);
return;
} else {
handlePositionalChange(newRead);
if (rejectAllReadsInStride) {
return;
}
if (newRead.getMappingQuality() <= SUSPICIOUS_MAPPING_QUALITY) {
suspiciousReadCount.increment();
}
rejectAllReadsInStride |= suspiciousReadCount.intValue() >= maxSuspiciousReadsPerStride;
pendingReads.add(newRead);
}
}
private void handlePositionalChange( final GATKRead newRead ) {
if (ReadUtils.readHasNoAssignedPosition(newRead)) {
return;
} else if (firstReadInStride == null) {
firstReadInStride = newRead;
} else {
final boolean newContig = !newRead.getAssignedContig().equals(firstReadInStride.getAssignedContig());
final boolean newStride = newContig || newRead.getAssignedStart() >= firstReadInStride.getAssignedStart() + stride;
if (newStride) {
finalizePendingReads();
firstReadInStride = newRead;
rejectAllReadsInStride = false;
suspiciousReadCount.setValue(0);
}
}
}
private void finalizePendingReads() {
if (!rejectAllReadsInStride) {
// most common case: no downsampling necessary
if (pendingReads.size() <= maxCoverage) {
finalizedReads.addAll(pendingReads);
} else {
// if we exceed the max coverage, just use well-mapped reads. Maybe the number of such reads won't reach
// the desired coverage, but if the region is decently mappable the shortfall will be minor.
final ReservoirDownsampler wellMappedDownsampler = new ReservoirDownsampler(maxCoverage, false);
pendingReads.stream().filter(read -> read.getMappingQuality() > SUSPICIOUS_MAPPING_QUALITY).forEach(wellMappedDownsampler::submit);
wellMappedDownsampler.signalEndOfInput();
final List readsToFinalize = wellMappedDownsampler.consumeFinalizedItems();
if (stride > 1) {
Collections.sort(readsToFinalize, Comparator.comparingInt(GATKRead::getAssignedStart));
}
finalizedReads.addAll(readsToFinalize);
}
}
pendingReads.clear();
}
@Override
public boolean hasFinalizedItems() {
return ! finalizedReads.isEmpty();
}
@Override
public boolean hasPendingItems() { return !pendingReads.isEmpty(); }
@Override
public GATKRead peekFinalized() { return finalizedReads.isEmpty() ? null : finalizedReads.get(0); }
@Override
public GATKRead peekPending() { return pendingReads.isEmpty() ? null : pendingReads.get(0); }
@Override
public List consumeFinalizedItems() {
Collections.sort(finalizedReads, Comparator.comparingInt(GATKRead::getAssignedStart));
final List toReturn = finalizedReads;
finalizedReads = new ArrayList<>();
return toReturn;
}
@Override
public int size() { return finalizedReads.size() + pendingReads.size(); }
@Override
public void signalEndOfInput() {
finalizePendingReads();
}
@Override
public void clearItems() {
pendingReads.clear();
finalizedReads.clear();
firstReadInStride = null;
rejectAllReadsInStride = false;
suspiciousReadCount.setValue(0);
}
@Override
public boolean requiresCoordinateSortOrder() {
return true;
}
@Override
public void signalNoMoreReadsBefore( final GATKRead read ) {
handlePositionalChange(read);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy