All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.validation.CompareBaseQualities Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.tools.validation;

import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SecondaryOrSupplementarySkippingIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.PositionalArguments;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.*;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import org.broadinstitute.hellbender.engine.ProgressMeter;
import org.broadinstitute.hellbender.exceptions.UserException;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

import static org.broadinstitute.hellbender.transformers.BQSRReadTransformer.constructStaticQuantizedMapping;

/**
 * Compares the base qualities of two SAM/BAM/CRAM files. The reads in the two files must have exactly the same
 * names and appear in the same order. The two files are each specified without a flag.
 * The tool summarizes the results in a text file.
 *
 * 

Usage example

*
 * gatk CompareBaseQualities \
 *   input_1.bam \
 *   input_2.bam \
 *   -O diff.txt
 * 
* *

An example result with identical base qualities between two inputs

*
 * -----------CompareMatrix summary------------
 * all 10000 quality scores are the same
 *
 * ---------CompareMatrix full matrix (non-zero entries) ----------
 * QRead1	QRead2	diff	count
 * 40	40	0	10000
 * -----------CompareMatrix-binned summary------------
 * all 10000 quality scores are the same
 *
 * ---------CompareMatrix-binned full matrix (non-zero entries) ----------
 * QRead1	QRead2	diff	count
 * 40	40	0	10000
 * 
* */ @DocumentedFeature @CommandLineProgramProperties( summary = "Compares the base qualities of two SAM/BAM/CRAM files. The reads in the two files must have " + "exactly the same names and appear in the same order.", oneLineSummary = "Compares the base qualities of two SAM/BAM/CRAM files", programGroup = DiagnosticsAndQCProgramGroup.class ) public final class CompareBaseQualities extends PicardCommandLineProgram { @PositionalArguments(minElements = 2, maxElements = 2) public List samFiles; public static final String THROW_ON_DIFF_LONG_NAME = "throw-on-diff"; public static final String STATIC_QUANTIZED_QUALS_LONG_NAME = "static-quantized-quals"; public static final String ROUND_DOWN_QUANTIZED_LONG_NAME = "round-down-quantized"; @Argument( doc = "Summary output file.", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, optional = true ) protected String outputFilename = null; @Argument( doc = "Throw exception on difference.", fullName = THROW_ON_DIFF_LONG_NAME, optional = true ) protected boolean throwOnDiff = false; /** * Return value is 0 if the two files have identical base qualities and non-zero otherwise. * Use static quantized quality scores to a given number of levels. */ @Advanced @Argument( doc = "Use static quantized quality scores to a given number of levels " + "(with --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME+ ")", fullName = STATIC_QUANTIZED_QUALS_LONG_NAME, optional = true ) public List staticQuantizationQuals = new ArrayList<>(); /** * Round down quantized only works with the {@link #STATIC_QUANTIZED_QUALS_LONG_NAME} option. If enabled, * rounding is done in probability space to the nearest bin. Otherwise, the value is rounded to the * nearest bin that is smaller than the current bin. * * Note: this option only works when {@link #STATIC_QUANTIZED_QUALS_LONG_NAME} is set. */ @Advanced @Argument( doc = "Round down quality scores to nearest quantized value.", fullName = ROUND_DOWN_QUANTIZED_LONG_NAME, optional = true ) public boolean roundDown = false; private byte[] staticQuantizedMapping; @Override protected Object doWork() { if (roundDown && (staticQuantizationQuals == null || staticQuantizationQuals.isEmpty())){ throw new CommandLineException.BadArgumentValue( ROUND_DOWN_QUANTIZED_LONG_NAME, "true", "This option can only be used if " + STATIC_QUANTIZED_QUALS_LONG_NAME + " is also used."); } staticQuantizedMapping = constructStaticQuantizedMapping(staticQuantizationQuals, roundDown); IOUtil.assertFileIsReadable(samFiles.get(0)); IOUtil.assertFileIsReadable(samFiles.get(1)); final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY); final SamReader reader1 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(0)); final SamReader reader2 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(1)); final SecondaryOrSupplementarySkippingIterator it1 = new SecondaryOrSupplementarySkippingIterator(reader1.iterator()); final SecondaryOrSupplementarySkippingIterator it2 = new SecondaryOrSupplementarySkippingIterator(reader2.iterator()); final CompareMatrix finalMatrix = new CompareMatrix(staticQuantizedMapping); final ProgressMeter progressMeter = new ProgressMeter(1.0); progressMeter.start(); while (it1.hasCurrent() && it2.hasCurrent()) { final SAMRecord read1= it1.getCurrent(); final SAMRecord read2= it2.getCurrent(); progressMeter.update(read1); if (!read1.getReadName().equals(read2.getReadName())){ throw new UserException.BadInput("files do not have the same exact order of reads:" + read1.getReadName() + " vs " + read2.getReadName()); } finalMatrix.add(read1.getBaseQualities(), read2.getBaseQualities()); it1.advance(); it2.advance(); } progressMeter.stop(); if (it1.hasCurrent() || it2.hasCurrent()){ throw new UserException.BadInput("files do not have the same exact number of reads"); } CloserUtil.close(reader1); CloserUtil.close(reader2); finalMatrix.printOutResults(outputFilename); if (throwOnDiff && finalMatrix.hasNonDiagonalElements()) { throw new UserException("Quality scores from the two BAMs do not match"); } return finalMatrix.hasNonDiagonalElements() ? 1 : 0; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy