All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.sam.markduplicates.EstimateLibraryComplexity Maven / Gradle / Ivy

There is a newer version: 3.2.0
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package picard.sam.markduplicates;

import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.Metrics;
import picard.sam.DuplicationMetrics;
import picard.sam.markduplicates.util.AbstractOpticalDuplicateFinderCommandLineProgram;
import picard.sam.util.PhysicalLocationShort;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.*;

import static java.lang.Math.pow;

/**
 * 

Attempts to estimate library complexity from sequence alone. Does so by sorting all reads * by the first N bases (5 by default) of each read and then comparing reads with the first * N bases identical to each other for duplicates. Reads are considered to be duplicates if * they match each other with no gaps and an overall mismatch rate less than or equal to * MAX_DIFF_RATE (0.03 by default).

*

*

Reads of poor quality are filtered out so as to provide a more accurate estimate. The filtering * removes reads with any no-calls in the first N bases or with a mean base quality lower than * MIN_MEAN_QUALITY across either the first or second read.

*

*

The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes * these in the calculation of library size. Also, since there is no alignment to screen out technical * reads one further filter is applied on the data. After examining all reads a Histogram is built of * [#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are * then removed from the Histogram as outliers before library size is estimated.

* * @author Tim Fennell */ @CommandLineProgramProperties( usage = EstimateLibraryComplexity.USAGE_SUMMARY + EstimateLibraryComplexity.USAGE_DETAILS, usageShort = EstimateLibraryComplexity.USAGE_SUMMARY, programGroup = Metrics.class ) public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCommandLineProgram { static final String USAGE_SUMMARY = "Estimates the numbers of unique molecules in a sequencing library. "; static final String USAGE_DETAILS = "

This tool outputs quality metrics for a sequencing library preparation." + "Library complexity refers to the number of unique DNA fragments present in a given library. Reductions in complexity " + "resulting from PCR amplification during library preparation will ultimately compromise downstream analyses " + "via an elevation in the number of duplicate reads. PCR-associated duplication artifacts can result from: inadequate amounts " + "of starting material (genomic DNA, cDNA, etc.), losses during cleanups, and size selection issues. " + "Duplicate reads can also arise from optical duplicates resulting from sequencing-machine optical sensor artifacts.

" + "

This tool attempts to estimate library complexity from sequence of read pairs alone. Reads are sorted by the first N bases " + "(5 by default) of the first read and then the first N bases of the second read of a pair. Read pairs are considered to " + "be duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE " + "(0.03 by default). Reads of poor quality are filtered out to provide a more accurate estimate. The filtering removes reads" + " with any poor quality bases as defined by a read's MIN_MEAN_QUALITY (20 is the default value) across either the first or " + "second read. Unpaired reads are ignored in this computation.

" + "" + "

The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation " + "of library size. Also, since there is no alignment information used in this algorithm, an additional filter is applied to " + "the data as follows. After examining all reads, a histogram is built in which the number of reads in a duplicate set is " + "compared with the number of of duplicate sets. All bins that contain exactly one duplicate set are then removed from the " + "histogram as outliers prior to the library size estimation.

" + "

Usage example:

" + "
" +
            "java -jar picard.jar EstimateLibraryComplexity \\
" + " I=input.bam \\
" + " O=est_lib_complex_metrics.txt" + "
" + "Please see the documentation for the companion " + "MarkDuplicates tool." + "
"; @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "One or more files to combine and " + "estimate library complexity from. Reads can be mapped or unmapped.") public List INPUT; @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output file to writes per-library metrics to.") public File OUTPUT; @Option(doc = "The minimum number of bases at the starts of reads that must be identical for reads to " + "be grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads will " + "be compared at a time, so lower numbers will produce more accurate results but consume " + "exponentially more memory and CPU.") public int MIN_IDENTICAL_BASES = 5; @Option(doc = "The maximum rate of differences between two reads to call them identical.") public double MAX_DIFF_RATE = 0.03; @Option(doc = "The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads with " + "lower average quality are filtered out and not considered in any calculations.") public int MIN_MEAN_QUALITY = 20; @Option(doc = "Do not process self-similar groups that are this many times over the mean expected group size. " + "I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then the mean expected " + "group size would be approximately 10 reads.") public int MAX_GROUP_RATIO = 500; @Option(doc = "Barcode SAM tag (ex. BC for 10X Genomics)", optional = true) public String BARCODE_TAG = null; @Option(doc = "Read one barcode SAM tag (ex. BX for 10X Genomics)", optional = true) public String READ_ONE_BARCODE_TAG = null; @Option(doc = "Read two barcode SAM tag (ex. BX for 10X Genomics)", optional = true) public String READ_TWO_BARCODE_TAG = null; @Option(doc = "The maximum number of bases to consider when comparing reads (0 means no maximum).", optional = true) public int MAX_READ_LENGTH = 0; @Option(doc = "Minimum number group count. On a per-library basis, we count the number of groups of duplicates " + "that have a particular size. Omit from consideration any count that is less than this value. For " + "example, if we see only one group of duplicates with size 500, we omit it from the metric calculations if " + "MIN_GROUP_COUNT is set to two. Setting this to two may help remove technical artifacts from the library " + "size calculation, for example, adapter dimers.", optional = true) public int MIN_GROUP_COUNT = 2; private final Log log = Log.getInstance(EstimateLibraryComplexity.class); @Override protected String[] customCommandLineValidation() { final List errorMsgs = new ArrayList(); if (0 < MAX_READ_LENGTH && MAX_READ_LENGTH < MIN_IDENTICAL_BASES) { errorMsgs.add("MAX_READ_LENGTH must be greater than MIN_IDENTICAL_BASES"); } if (MIN_IDENTICAL_BASES <= 0) { errorMsgs.add("MIN_IDENTICAL_BASES must be greater than 0"); } return errorMsgs.isEmpty() ? super.customCommandLineValidation() : errorMsgs.toArray(new String[errorMsgs.size()]); } /** * Little class to hold the sequence of a pair of reads and tile location information. */ static class PairedReadSequence extends PhysicalLocationShort { //just for rough estimate size of reads, does not affect the fundamental operation of the algorithm static final int NUMBER_BASES_IN_READ = 150; short readGroup = -1; boolean qualityOk = true; byte[] read1; byte[] read2; short libraryId; // Hashes corresponding to read1 and read2 int[] hashes1; int[] hashes2; public static int getSizeInBytes() { // rough guess at memory footprint, summary size of all fields return 16 + 4 + (2 * 4) + 1 + 2 * (24 + 8 + NUMBER_BASES_IN_READ) + 2 + (2 * (24 + 8)) + 8 + 4; } public short getReadGroup() { return this.readGroup; } public void setReadGroup(final short readGroup) { this.readGroup = readGroup; } public short getLibraryId() { return this.libraryId; } public void setLibraryId(final short libraryId) { this.libraryId = libraryId; } public static SortingCollection.Codec getCodec() { return new PairedReadCodec(); } void initHashes(int numberOfHashes, int skippedBases, int minReadLength) { hashes1 = getHashes(read1, numberOfHashes, skippedBases, minReadLength); hashes2 = getHashes(read2, numberOfHashes, skippedBases, minReadLength); } // Split read by numberOfHashes parts and hash each part // For instance: // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 // read = A C G T A C G T A C G T A C G T A C G T // numberOfHashes = 5 // skippedBases = 1 // minReadLength = 15 // So, method returns hashValues with 5 hash value // first value calculated from read[1], read[6], read[11] // second value calculated from read[2], read[7], read[12] // etc. // chars from 16 to 19 position is a tail, see compareTails() in ElcHashBasedDuplicatesFinder private int[] getHashes(byte[] read, int numberOfHashes, int skippedBases, int minReadLength) { final int[] hashValues = new int[numberOfHashes]; for (int i = 0; i < numberOfHashes; ++i) { hashValues[i] = 1; int position = skippedBases + i; while (position < minReadLength) { hashValues[i] = 31 * hashValues[i] + read[position]; position += numberOfHashes; } } return hashValues; } } static class PairedReadSequenceWithBarcodes extends PairedReadSequence { int barcode; // primary barcode for this read (and pair) int readOneBarcode; // read one barcode, 0 if not present int readTwoBarcode; // read two barcode, 0 if not present or not paired public PairedReadSequenceWithBarcodes() { super(); } public PairedReadSequenceWithBarcodes(final PairedReadSequence val) { if (null == val) throw new PicardException("val was null"); this.readGroup = val.getReadGroup(); this.tile = val.getTile(); this.x = val.getX(); this.y = val.getY(); this.qualityOk = val.qualityOk; this.read1 = val.read1.clone(); this.read2 = val.read2.clone(); this.libraryId = val.getLibraryId(); } public static int getSizeInBytes() { return PairedReadSequence.getSizeInBytes() + (3 * 4); // rough guess at memory footprint } } /** * Codec class for writing and read PairedReadSequence objects. */ static class PairedReadCodec implements SortingCollection.Codec { protected DataOutputStream out; protected DataInputStream in; public void setOutputStream(final OutputStream out) { this.out = new DataOutputStream(out); } public void setInputStream(final InputStream in) { this.in = new DataInputStream(in); } public void encode(final PairedReadSequence val) { try { this.out.writeShort(val.readGroup); this.out.writeShort(val.tile); this.out.writeShort(val.x); this.out.writeShort(val.y); this.out.writeInt(val.read1.length); this.out.write(val.read1); this.out.writeInt(val.read2.length); this.out.write(val.read2); } catch (final IOException ioe) { throw new PicardException("Error write out read pair.", ioe); } } public PairedReadSequence decode() { try { final PairedReadSequence val = new PairedReadSequence(); try { val.readGroup = this.in.readShort(); } catch (final EOFException eof) { return null; } val.tile = this.in.readShort(); val.x = this.in.readShort(); val.y = this.in.readShort(); int length = this.in.readInt(); val.read1 = new byte[length]; if (this.in.read(val.read1) != length) { throw new PicardException("Could not read " + length + " bytes from temporary file."); } length = this.in.readInt(); val.read2 = new byte[length]; if (this.in.read(val.read2) != length) { throw new PicardException("Could not read " + length + " bytes from temporary file."); } return val; } catch (final IOException ioe) { throw new PicardException("Exception reading read pair.", ioe); } } @Override public SortingCollection.Codec clone() { return new PairedReadCodec(); } } /** * Codec class for writing and read PairedReadSequence objects. */ static class PairedReadWithBarcodesCodec extends PairedReadCodec { @Override public void encode(final PairedReadSequence val) { if (!(val instanceof PairedReadSequenceWithBarcodes)) { throw new PicardException("Val was not a PairedReadSequenceWithBarcodes"); } final PairedReadSequenceWithBarcodes data = (PairedReadSequenceWithBarcodes) val; super.encode(val); try { this.out.writeInt(data.barcode); this.out.writeInt(data.readOneBarcode); this.out.writeInt(data.readTwoBarcode); } catch (final IOException ioe) { throw new PicardException("Error write out read pair.", ioe); } } @Override public PairedReadSequence decode() { try { final PairedReadSequence parentVal = super.decode(); if (null == parentVal) return null; // EOF final PairedReadSequenceWithBarcodes val = new PairedReadSequenceWithBarcodes(parentVal); val.barcode = this.in.readInt(); val.readOneBarcode = this.in.readInt(); val.readTwoBarcode = this.in.readInt(); return val; } catch (final IOException ioe) { throw new PicardException("Exception reading read pair.", ioe); } } @Override public SortingCollection.Codec clone() { return new PairedReadWithBarcodesCodec(); } } /** * Comparator that orders read pairs on the first N bases of both reads. * There is no tie-breaking, so any sort is stable, not total. */ private class PairedReadComparator implements Comparator { final int BASES = EstimateLibraryComplexity.this.MIN_IDENTICAL_BASES; public int compare(final PairedReadSequence lhs, final PairedReadSequence rhs) { // First compare the first N bases of the first read for (int i = 0; i < BASES; ++i) { final int retval = lhs.read1[i] - rhs.read1[i]; if (retval != 0) return retval; } // Then compare the first N bases of the second read for (int i = 0; i < BASES; ++i) { final int retval = lhs.read2[i] - rhs.read2[i]; if (retval != 0) return retval; } return 0; } } public int getBarcodeValue(final SAMRecord record) { return getReadBarcodeValue(record, BARCODE_TAG); } public static int getReadBarcodeValue(final SAMRecord record, final String tag) { if (null == tag) return 0; final String attr = record.getStringAttribute(tag); if (null == attr) return 0; else return attr.hashCode(); } private int getReadOneBarcodeValue(final SAMRecord record) { return getReadBarcodeValue(record, READ_ONE_BARCODE_TAG); } private int getReadTwoBarcodeValue(final SAMRecord record) { return getReadBarcodeValue(record, READ_TWO_BARCODE_TAG); } /** Stock main method. */ public static void main(final String[] args) { new EstimateLibraryComplexity().instanceMainWithExit(args); } public EstimateLibraryComplexity() { final int sizeInBytes; if (null != BARCODE_TAG || null != READ_ONE_BARCODE_TAG || null != READ_TWO_BARCODE_TAG) { sizeInBytes = PairedReadSequenceWithBarcodes.getSizeInBytes(); } else { sizeInBytes = PairedReadSequence.getSizeInBytes(); } MAX_RECORDS_IN_RAM = (int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2; } /** * Method that does most of the work. Reads through the input BAM file and extracts the * read sequences of each read pair and sorts them via a SortingCollection. Then traverses * the sorted reads and looks at small groups at a time to find duplicates. */ @Override protected int doWork() { for (final File f : INPUT) IOUtil.assertFileIsReadable(f); log.info("Will store " + MAX_RECORDS_IN_RAM + " read pairs in memory before sorting."); final List readGroups = new ArrayList(); final SortingCollection sorter; final boolean useBarcodes = (null != BARCODE_TAG || null != READ_ONE_BARCODE_TAG || null != READ_TWO_BARCODE_TAG); if (!useBarcodes) { sorter = SortingCollection.newInstance(PairedReadSequence.class, new PairedReadCodec(), new PairedReadComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); } else { sorter = SortingCollection.newInstance(PairedReadSequence.class, new PairedReadWithBarcodesCodec(), new PairedReadComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); } // Loop through the input files and pick out the read sequences etc. final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read"); for (final File f : INPUT) { final Map pendingByName = new HashMap(); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f); readGroups.addAll(in.getFileHeader().getReadGroups()); for (final SAMRecord rec : in) { if (!rec.getReadPairedFlag()) continue; if (!rec.getFirstOfPairFlag() && !rec.getSecondOfPairFlag()) { continue; } if (rec.isSecondaryOrSupplementary()) continue; PairedReadSequence prs = pendingByName.remove(rec.getReadName()); if (prs == null) { // Make a new paired read object and add RG and physical location information to it prs = useBarcodes ? new PairedReadSequenceWithBarcodes() : new PairedReadSequence(); if (opticalDuplicateFinder.addLocationInformation(rec.getReadName(), prs)) { final SAMReadGroupRecord rg = rec.getReadGroup(); if (rg != null) prs.setReadGroup((short) readGroups.indexOf(rg)); } pendingByName.put(rec.getReadName(), prs); } // Read passes quality check if both ends meet the mean quality criteria final boolean passesQualityCheck = passesQualityCheck(rec.getReadBases(), rec.getBaseQualities(), MIN_IDENTICAL_BASES, MIN_MEAN_QUALITY); prs.qualityOk = prs.qualityOk && passesQualityCheck; // Get the bases and restore them to their original orientation if necessary final byte[] bases = rec.getReadBases(); if (rec.getReadNegativeStrandFlag()) SequenceUtil.reverseComplement(bases); final PairedReadSequenceWithBarcodes prsWithBarcodes = (useBarcodes) ? (PairedReadSequenceWithBarcodes) prs : null; if (rec.getFirstOfPairFlag()) { prs.read1 = bases; if (useBarcodes) { prsWithBarcodes.barcode = getBarcodeValue(rec); prsWithBarcodes.readOneBarcode = getReadOneBarcodeValue(rec); } } else { prs.read2 = bases; if (useBarcodes) { prsWithBarcodes.readTwoBarcode = getReadTwoBarcodeValue(rec); } } if (prs.read1 != null && prs.read2 != null && prs.qualityOk) { sorter.add(prs); } progress.record(rec); } CloserUtil.close(in); } log.info(String.format("Finished reading - read %d records - moving on to scanning for duplicates.", progress.getCount())); // Now go through the sorted reads and attempt to find duplicates final PeekableIterator iterator = new PeekableIterator(sorter.iterator()); final Map> duplicationHistosByLibrary = new HashMap>(); final Map> opticalHistosByLibrary = new HashMap>(); int groupsProcessed = 0; long lastLogTime = System.currentTimeMillis(); final int meanGroupSize = (int) (Math.max(1, (progress.getCount() / 2) / (int) pow(4, MIN_IDENTICAL_BASES * 2))); ElcDuplicatesFinderResolver algorithmResolver = new ElcDuplicatesFinderResolver( MAX_DIFF_RATE, MAX_READ_LENGTH, MIN_IDENTICAL_BASES, useBarcodes, opticalDuplicateFinder ); while (iterator.hasNext()) { // Get the next group and split it apart by library final List group = getNextGroup(iterator); if (group.size() > meanGroupSize * MAX_GROUP_RATIO) { final PairedReadSequence prs = group.get(0); log.warn("Omitting group with over " + MAX_GROUP_RATIO + " times the expected mean number of read pairs. " + "Mean=" + meanGroupSize + ", Actual=" + group.size() + ". Prefixes: " + StringUtil.bytesToString(prs.read1, 0, MIN_IDENTICAL_BASES) + " / " + StringUtil.bytesToString(prs.read2, 0, MIN_IDENTICAL_BASES)); } else { final Map> sequencesByLibrary = splitByLibrary(group, readGroups); // Now process the reads by library for (final Map.Entry> entry : sequencesByLibrary.entrySet()) { final String library = entry.getKey(); final List seqs = entry.getValue(); Histogram duplicationHisto = duplicationHistosByLibrary.get(library); Histogram opticalHisto = opticalHistosByLibrary.get(library); if (duplicationHisto == null) { duplicationHisto = new Histogram<>("duplication_group_count", library); opticalHisto = new Histogram<>("duplication_group_count", "optical_duplicates"); duplicationHistosByLibrary.put(library, duplicationHisto); opticalHistosByLibrary.put(library, opticalHisto); } algorithmResolver.resolveAndSearch(seqs, duplicationHisto, opticalHisto); } ++groupsProcessed; if (lastLogTime < System.currentTimeMillis() - 60000) { log.info("Processed " + groupsProcessed + " groups."); lastLogTime = System.currentTimeMillis(); } } } iterator.close(); sorter.cleanup(); final MetricsFile file = getMetricsFile(); for (final String library : duplicationHistosByLibrary.keySet()) { final Histogram duplicationHisto = duplicationHistosByLibrary.get(library); final Histogram opticalHisto = opticalHistosByLibrary.get(library); final DuplicationMetrics metrics = new DuplicationMetrics(); metrics.LIBRARY = library; // Filter out any bins that have fewer than MIN_GROUP_COUNT entries in them and calculate derived metrics for (final Integer bin : duplicationHisto.keySet()) { final double duplicateGroups = duplicationHisto.get(bin).getValue(); final double opticalDuplicates = opticalHisto.get(bin) == null ? 0 : opticalHisto.get(bin).getValue(); if (duplicateGroups >= MIN_GROUP_COUNT) { metrics.READ_PAIRS_EXAMINED += (bin * duplicateGroups); metrics.READ_PAIR_DUPLICATES += ((bin - 1) * duplicateGroups); metrics.READ_PAIR_OPTICAL_DUPLICATES += opticalDuplicates; } } metrics.calculateDerivedFields(); file.addMetric(metrics); file.addHistogram(duplicationHisto); } file.write(OUTPUT); return 0; } /** * Pulls out of the iterator the next group of reads that can be compared to each other to * identify duplicates. */ List getNextGroup(final PeekableIterator iterator) { final List group = new ArrayList(); final PairedReadSequence first = iterator.next(); group.add(first); outer: while (iterator.hasNext()) { final PairedReadSequence next = iterator.peek(); for (int i = 0; i < MIN_IDENTICAL_BASES; ++i) { if (first.read1[i] != next.read1[i] || first.read2[i] != next.read2[i]) break outer; } group.add(iterator.next()); } return group; } /** * Takes a list of PairedReadSequence objects and splits them into lists by library. */ Map> splitByLibrary(final List input, final List rgs) { final Map> out = new HashMap<>(); for (final PairedReadSequence seq : input) { String library; if (seq.getReadGroup() != -1) { library = rgs.get(seq.getReadGroup()).getLibrary(); if (library == null) library = "Unknown"; } else { library = "Unknown"; } List librarySeqs = out.get(library); if (librarySeqs == null) { librarySeqs = new ArrayList<>(); out.put(library, librarySeqs); } librarySeqs.add(seq); } return out; } /** * Checks that the average quality over the entire read is >= min, and that the first N bases do * not contain any no-calls. */ boolean passesQualityCheck(final byte[] bases, final byte[] quals, final int seedLength, final int minQuality) { if (bases.length < seedLength) return false; for (int i = 0; i < seedLength; ++i) { if (SequenceUtil.isNoCall(bases[i])) return false; } final int maxReadLength = (MAX_READ_LENGTH <= 0) ? Integer.MAX_VALUE : MAX_READ_LENGTH; final int readLength = Math.min(bases.length, maxReadLength); int total = 0; for (int i = 0; i < readLength; i++) total += quals[i]; return total / readLength >= minQuality; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy