All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.CRAMIssue8768Detector Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools;

import org.broadinstitute.barclay.argparser.*;
import org.broadinstitute.hellbender.tools.filediagnostics.CRAMIssue8768Analyzer;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import picard.cmdline.programgroups.OtherProgramGroup;

/**
 * A diagnostic tool that analyzes a CRAM file to look for possible base corruption caused by
 * GATK issue 8768.
 *
 * 

This issue affects GATK versions 4.3.0.0 through 4.5.0.0, and is fixed in GATK 4.6.0.0.

* *

This issue also affects Picard versions 2.27.3 through 3.1.1, and is fixed in Picard 3.2.0.

* *

The bug is triggered when writing a CRAM file using one of the affected GATK/Picard versions, * and both of the following conditions are met:

* *
    *
  • At least one read is mapped to the very first base of a reference contig
  • *
  • The file contains more than one CRAM container (10,000 reads) with reads mapped to that same reference contig
  • *
* *

When both of these conditions are met, the resulting CRAM file may have corrupt containers containing reads * with an incorrect sequence.

* *

This tool writes a report to an output text file indicating whether the CRAM file appears to have read base corruption caused by issue 8768, * and listing the affected containers. By default, the output report will have a summary of the average mismatch rate for all suspected bad containers * and a few presumed good containers in order to determine if there is a large difference in the base mismatch rate.

* *

Optionally, a TSV file with the same information as the textual report, but in tabular form, can be written * using the "--output-tsv" argument.

* *

To analyze the base mismatch rate for ALL containers, use the "verbose" option.

* *

Works on files ending in .cram.

*
* *

Sample Usage:

*
 * gatk CRAMIssue8768Detector \
 *     -I input.cram \
 *     -O output_report.txt \
 *     -R reference.fasta
 * 
*
 * gatk CRAMIssue8768Detector \
 *     -I input.cram \
 *     -O output_report.txt \
 *     -R reference.fasta \
 *     --output-tsv output_report_as_table.tsv
 * 
*/ @ExperimentalFeature @WorkflowProperties @CommandLineProgramProperties( summary = "Analyze a CRAM file to check for base corruption caused by GATK issue 8768", oneLineSummary = "Analyze a CRAM file to check for base corruption caused by GATK issue 8768", programGroup = OtherProgramGroup.class ) public class CRAMIssue8768Detector extends CommandLineProgram { // default average mismatch rate threshold above which we consider the file to be corrupt private static final double DEFAULT_MISMATCH_RATE_THRESHOLD = 0.05; @Argument(fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME, doc = "Input path of CRAM file to analyze", common = true) @WorkflowInput public GATKPath inputPath; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Output diagnostics text file", common = true) @WorkflowOutput public GATKPath textOutputPath; public static final String OUTPUT_TSV__ARG_NAME = "output-tsv"; @Argument(fullName = OUTPUT_TSV__ARG_NAME, shortName = OUTPUT_TSV__ARG_NAME, doc = "Output diagnostics tsv file", optional = true) @WorkflowOutput public GATKPath tsvOutputPath; @Argument(fullName = StandardArgumentDefinitions.REFERENCE_LONG_NAME, shortName = StandardArgumentDefinitions.REFERENCE_SHORT_NAME, doc = "Reference for the CRAM file", common = true) @WorkflowOutput public GATKPath referencePath; public static final String MISMATCH_RATE_THRESHOLD_ARG_NAME = "mismatch-rate-threshold"; @Argument(fullName = MISMATCH_RATE_THRESHOLD_ARG_NAME, shortName = MISMATCH_RATE_THRESHOLD_ARG_NAME, doc = "Mismatch rate threshold above which we consider the file to be corrupt", optional = true) public double mismatchRateThreshold = DEFAULT_MISMATCH_RATE_THRESHOLD; public static final String VERBOSE_ARG_NAME = "verbose"; @Argument(fullName = VERBOSE_ARG_NAME, shortName= VERBOSE_ARG_NAME, doc="Calculate and print the mismatch rate for all containers", optional=true) public boolean verbose = false; public static final String ECHO_ARG_NAME = "echo-to-stdout"; @Argument(fullName = ECHO_ARG_NAME, shortName= ECHO_ARG_NAME, doc="Echo text output to stdout", optional=true) public boolean echoToStdout = false; private CRAMIssue8768Analyzer cramAnalyzer; @Override protected Object doWork() { cramAnalyzer = new CRAMIssue8768Analyzer( inputPath, textOutputPath, tsvOutputPath, referencePath, mismatchRateThreshold, verbose, echoToStdout); cramAnalyzer.doAnalysis(); return cramAnalyzer.getRetCode(); } @Override protected void onShutdown() { if ( cramAnalyzer != null ) { try { cramAnalyzer.close(); } catch (Exception e) { throw new RuntimeException(e); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy