org.broadinstitute.hellbender.tools.copynumber.models.CopyRatioSegmentedData Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools.copynumber.models;
import com.google.common.primitives.Doubles;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.OverlapDetector;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.Variance;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CopyRatio;
import org.broadinstitute.hellbender.utils.IndexRange;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.mcmc.DataCollection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* {@link DataCollection} for the copy-ratio model containing the copy-ratio data grouped by segment.
*
* @author Samuel Lee <[email protected]>
*/
final class CopyRatioSegmentedData implements DataCollection {
private final CopyRatioCollection copyRatios;
private final SimpleIntervalCollection segments;
private final double minLog2CopyRatioValue;
private final double maxLog2CopyRatioValue;
private final List indexedCopyRatios;
private final List indexRangesPerSegment;
CopyRatioSegmentedData(final CopyRatioCollection copyRatios,
final SimpleIntervalCollection segments) {
this.copyRatios = Utils.nonNull(copyRatios);
this.segments = Utils.nonNull(segments);
final List log2CopyRatioValues = copyRatios.getLog2CopyRatioValues();
minLog2CopyRatioValue = log2CopyRatioValues.stream().min(Double::compareTo).orElse(Double.NaN);
maxLog2CopyRatioValue = log2CopyRatioValues.stream().max(Double::compareTo).orElse(Double.NaN);
final List indexedCopyRatios = new ArrayList<>(copyRatios.size());
final List indexRangesPerSegment = new ArrayList<>(segments.size());
//construct list of lists of copy ratios with an index in order corresponding to that of segments;
//segment assignment is based on midpoint of copy-ratio interval
final OverlapDetector copyRatioMidpointOverlapDetector = copyRatios.getMidpointOverlapDetector();
final Comparator comparator = copyRatios.getComparator();
int index = 0;
for (int segmentIndex = 0; segmentIndex < segments.size(); segmentIndex++) {
final SimpleInterval segment = segments.getRecords().get(segmentIndex);
final List copyRatiosInSegment = copyRatioMidpointOverlapDetector.getOverlaps(segment).stream()
.sorted(comparator)
.collect(Collectors.toList());
final int segmentStartIndex = index;
final int si = segmentIndex;
IntStream.range(0, copyRatiosInSegment.size()).boxed()
.map(i -> new IndexedCopyRatio(copyRatiosInSegment.get(i), segmentStartIndex + i, si))
.forEach(indexedCopyRatios::add);
indexRangesPerSegment.add(new IndexRange(segmentStartIndex, segmentStartIndex + copyRatiosInSegment.size()));
index += copyRatiosInSegment.size();
}
this.indexedCopyRatios = Collections.unmodifiableList(indexedCopyRatios);
this.indexRangesPerSegment = Collections.unmodifiableList(indexRangesPerSegment);
}
CopyRatioCollection getCopyRatios() {
return copyRatios;
}
SimpleIntervalCollection getSegments() {
return segments;
}
int getNumSegments() {
return segments.size();
}
int getNumPoints() {
return copyRatios.size();
}
double getMinLog2CopyRatioValue() {
return minLog2CopyRatioValue;
}
double getMaxLog2CopyRatioValue() {
return maxLog2CopyRatioValue;
}
List getIndexedCopyRatios() {
return indexedCopyRatios;
}
List getIndexedCopyRatiosInSegment(final int segmentIndex) {
return indexedCopyRatios.subList(
indexRangesPerSegment.get(segmentIndex).getStart(), indexRangesPerSegment.get(segmentIndex).getEnd());
}
//estimate global variance empirically by taking average of all per-segment variances
double estimateVariance() {
return IntStream.range(0, segments.size())
.mapToDouble(s -> new Variance().evaluate(Doubles.toArray(
getIndexedCopyRatiosInSegment(s).stream()
.map(IndexedCopyRatio::getLog2CopyRatioValue)
.collect(Collectors.toList()))))
.filter(v -> !Double.isNaN(v))
.average().orElse(Double.NaN);
}
//estimate segment means empirically by taking averages of log2 copy ratios in each segment
CopyRatioState.SegmentMeans estimateSegmentMeans() {
final List means = IntStream.range(0, segments.size()).boxed()
.map(s -> new Mean().evaluate(Doubles.toArray(
getIndexedCopyRatiosInSegment(s).stream()
.map(IndexedCopyRatio::getLog2CopyRatioValue)
.collect(Collectors.toList()))))
.collect(Collectors.toList());
return new CopyRatioState.SegmentMeans(means);
}
static final class IndexedCopyRatio extends CopyRatio {
private final int index;
private final int segmentIndex;
private IndexedCopyRatio(final CopyRatio copyRatio,
final int index,
final int segmentIndex) {
super(copyRatio.getInterval(), copyRatio.getLog2CopyRatioValue());
this.index = index;
this.segmentIndex = segmentIndex;
}
int getIndex() {
return index;
}
int getSegmentIndex() {
return segmentIndex;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy