org.broadinstitute.hellbender.tools.sv.cluster.BinnedCNVLinkage Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools.sv.cluster;
import htsjdk.samtools.SAMSequenceDictionary;
import org.broadinstitute.hellbender.tools.sv.SVCallRecord;
import org.broadinstitute.hellbender.utils.*;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* CNV defragmenter for when the intervals used for coverage collection are available. The intervals are used to adjust
* variant padding by rounding to the nearest bin boundary. This is particularly important for exomes, which typically
* have a wide range of bin sizes.
*/
public class BinnedCNVLinkage extends CNVLinkage {
protected final TreeMap genomicCoordinatesToBinIndexMap;
protected final List coverageIntervals;
final GenomeLocParser parser;
public BinnedCNVLinkage(final SAMSequenceDictionary dictionary, final double paddingFraction,
final double minSampleOverlap, final List coverageIntervals) {
super(dictionary, paddingFraction, minSampleOverlap);
this.coverageIntervals = Utils.nonNull(coverageIntervals);
genomicCoordinatesToBinIndexMap = new TreeMap<>();
for (int i = 0; i < coverageIntervals.size(); i++) {
genomicCoordinatesToBinIndexMap.put(coverageIntervals.get(i),i);
}
parser = new GenomeLocParser(this.dictionary);
}
// TODO: try to calculate this by sweeping through the bin intervals
@Override
public int getMaxClusterableStartingPosition(final SVCallRecord call) {
Utils.nonNull(call);
return dictionary.getSequence(call.getContigA()).getSequenceLength();
}
@Override
protected SimpleInterval getPaddedRecordInterval(final String contig, final int start, final int end) {
Utils.nonNull(contig);
final GenomeLoc callStart = parser.createGenomeLoc(contig, start, start);
final GenomeLoc callEnd = parser.createGenomeLoc(contig, end, end);
//first interval that is equal to or "greater than" the call start, such that the start of the bin should match the call start, with a little wiggle room
final Map.Entry startBin = genomicCoordinatesToBinIndexMap.ceilingEntry(callStart);
Utils.validate(startBin != null, "Call start " + callStart + " for call at " + contig + ":" + start + "-" + end + " not found in model call intervals.");
final int callStartIndex = startBin.getValue();
//last interval that is equal to or "less than" the call end, such that the end of the bin should match the call end
final Map.Entry endBin = genomicCoordinatesToBinIndexMap.floorEntry(callEnd);
Utils.validate(endBin != null, "Call end " + callEnd + " for call at " + contig + ":" + start + "-" + end + " not found in model call intervals.");
final int callEndIndex = endBin.getValue();
final int callLengthInBins = callEndIndex - callStartIndex + 1;
Utils.validate(callLengthInBins > 0, "Copy number call at " + contig + ":" + start + "-"
+ end + " does not align with supplied model calling intervals. Use the filtered intervals input " +
"from GermlineCNVCaller for this cohort/model.");
final int paddedStartIndex = Math.max(callStartIndex - (int)Math.round(callLengthInBins * paddingFraction), 0);
final int paddedCallStart;
if (coverageIntervals.get(paddedStartIndex).getContig().equals(callStart.getContig())) {
paddedCallStart = coverageIntervals.get(paddedStartIndex).getStart();
} else {
paddedCallStart = callStart.getStart();
}
final int paddedEndIndex = Math.min(callEndIndex + (int)Math.round(callLengthInBins * paddingFraction), genomicCoordinatesToBinIndexMap.size() - 1);
final int paddedCallEnd;
if (coverageIntervals.get(paddedEndIndex).getContig().equals(callEnd.getContig())) {
paddedCallEnd = coverageIntervals.get(paddedEndIndex).getEnd();
} else {
paddedCallEnd = callEnd.getEnd();
}
final int contigLength = dictionary.getSequence(contig).getSequenceLength();
return IntervalUtils.trimIntervalToContig(contig, paddedCallStart, paddedCallEnd, contigLength);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy