
picard.analysis.HistogramGenerator Maven / Gradle / Ivy
The newest version!
/*
* The MIT License
*
* Copyright (c) 2022 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.QualityUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
final class HistogramGenerator {
final boolean useOriginalQualities;
int maxLengthSoFar = 0;
double[] firstReadTotalsByCycle = new double[maxLengthSoFar];
double[] firstReadTotalProbsByCycle = new double[maxLengthSoFar];
long[] firstReadCountsByCycle = new long[maxLengthSoFar];
double[] secondReadTotalsByCycle = new double[maxLengthSoFar];
double[] secondReadTotalProbsByCycle = new double[maxLengthSoFar];
long[] secondReadCountsByCycle = new long[maxLengthSoFar];
int recordsCount = 0;
final public int skipBases = 10; //qualities of the first bases are ignored (inference issues)
final public int minimalCount = 25;
public HistogramGenerator(final boolean useOriginalQualities) {
this.useOriginalQualities = useOriginalQualities;
}
public HistogramGenerator(final double[] firstReadTotalsByCycle, final double[] firstReadTotalProbsByCycle,
final long[] firstReadCountsByCycle, final double[] secondReadTotalsByCycle,
final double[] secondReadTotalProbsByCycle, final long[] secondReadCountsByCycle, int nRecords){
this.firstReadCountsByCycle = firstReadCountsByCycle.clone();
this.firstReadTotalsByCycle = firstReadTotalsByCycle.clone();
this.firstReadTotalProbsByCycle = firstReadTotalProbsByCycle.clone();
this.secondReadCountsByCycle = secondReadCountsByCycle.clone();
this.secondReadTotalsByCycle = secondReadTotalsByCycle.clone();
this.secondReadTotalProbsByCycle = secondReadTotalProbsByCycle.clone();
this.useOriginalQualities = false;
this.recordsCount = nRecords;
}
public void addRecord(final SAMRecord rec) {
final byte[] quals = (useOriginalQualities ? rec.getOriginalBaseQualities() : rec.getBaseQualities());
if (quals == null) return;
recordsCount++;
final int length = quals.length;
final boolean rc = rec.getReadNegativeStrandFlag();
ensureArraysBigEnough(length+1);
for (int i=0; i result = new ArrayList<>();
List weights = new ArrayList<>();
double[] accumulator;
long[] counts;
if (readInPair == 1){
accumulator = firstReadTotalProbsByCycle;
counts = firstReadCountsByCycle;
} else {
accumulator = secondReadTotalProbsByCycle;
counts = secondReadCountsByCycle;
}
for (int i = skipBases; i < accumulator.length; i++ ){
if (counts[i] < minimalCount){
break;
}
result.add(accumulator[i]/counts[i]);
weights.add(counts[i]);
}
applySpanningWindowMean(result, weights, spanningWindowLength);
return longestHighQuality(result,errorProbThreshold);
}
private void applySpanningWindowMean(List vector, List weights, final int spanLength){
List tmp = new ArrayList<>(vector);
for (int i = 0; i < vector.size(); i++){
double tmpEr = 0;
long tmpWeight = 0;
for (int j = Math.max(i-spanLength,0); j < Math.min(i+spanLength+1, vector.size()); j++){
tmpEr += tmp.get(j)*weights.get(j);
tmpWeight += weights.get(j);
}
vector.set(i, tmpEr/tmpWeight);
}
}
private int longestHighQuality(List averageErrorProbabilities, double errorProbThreshold){
int curStart = 0;
int curEnd = 0;
int curBestIntervalLength = 0;
while ( curEnd < averageErrorProbabilities.size() ) {
if (averageErrorProbabilities.get(curEnd) <= errorProbThreshold) {
curEnd++;
} else {
if ((curEnd - curStart) > curBestIntervalLength) {
curBestIntervalLength = curEnd - curStart;
}
curStart = curEnd + 1;
curEnd = curStart;
}
}
if ((curEnd - curStart) > curBestIntervalLength) {
curBestIntervalLength = curEnd - curStart;
}
return curBestIntervalLength;
}
boolean isEmpty() {
return maxLengthSoFar == 0;
}
private void ensureArraysBigEnough(final int length) {
if (length > maxLengthSoFar) {
firstReadTotalsByCycle = Arrays.copyOf(firstReadTotalsByCycle, length);
firstReadTotalProbsByCycle = Arrays.copyOf(firstReadTotalProbsByCycle, length);
firstReadCountsByCycle = Arrays.copyOf(firstReadCountsByCycle, length);
secondReadTotalsByCycle = Arrays.copyOf(secondReadTotalsByCycle , length);
secondReadTotalProbsByCycle = Arrays.copyOf(secondReadTotalProbsByCycle , length);
secondReadCountsByCycle = Arrays.copyOf(secondReadCountsByCycle, length);
maxLengthSoFar = length;
}
}
Histogram getMeanQualityHistogram() {
final String label = useOriginalQualities ? "MEAN_ORIGINAL_QUALITY" : "MEAN_QUALITY";
final Histogram meanQualities = new Histogram("CYCLE", label);
int firstReadLength = 0;
for (int cycle=0; cycle < firstReadTotalsByCycle.length; ++cycle) {
if (firstReadTotalsByCycle[cycle] > 0) {
meanQualities.increment(cycle, firstReadTotalsByCycle[cycle] / firstReadCountsByCycle[cycle]);
firstReadLength = cycle;
}
}
for (int i=0; i< secondReadTotalsByCycle.length; ++i) {
if (secondReadCountsByCycle[i] > 0) {
final int cycle = firstReadLength + i;
meanQualities.increment(cycle, secondReadTotalsByCycle[i] / secondReadCountsByCycle[i]);
}
}
return meanQualities;
}
Histogram getMeanErrorProbHistogram() {
final String label = useOriginalQualities ? "MEAN_ORIGINAL_ERROR_PROB" : "MEAN_ERROR_PROB";
final Histogram meanQualities = new Histogram("CYCLE", label);
int firstReadLength = 0;
for (int cycle=0; cycle < firstReadTotalsByCycle.length; ++cycle) {
if (firstReadTotalsByCycle[cycle] > 0) {
meanQualities.increment(cycle, firstReadTotalProbsByCycle[cycle] / firstReadCountsByCycle[cycle]);
firstReadLength = cycle;
}
}
for (int i=0; i< secondReadTotalsByCycle.length; ++i) {
if (secondReadCountsByCycle[i] > 0) {
final int cycle = firstReadLength + i;
meanQualities.increment(cycle, secondReadTotalProbsByCycle[i] / secondReadCountsByCycle[i]);
}
}
return meanQualities;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy