All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.spark.ApplyBQSRSpark Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.spark;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.tools.ApplyBQSRArgumentCollection;
import org.broadinstitute.hellbender.tools.spark.transforms.ApplyBQSRSparkFn;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.recalibration.RecalibrationReport;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

/**
 * Apply base quality score recalibration with Spark.
 * 

*

This tool performs the second pass in a two-stage process called Base Quality Score Recalibration (BQSR). * Specifically, it recalibrates the base qualities of the input reads based on the recalibration table produced by * the BaseRecalibrator tool, and outputs a recalibrated BAM or CRAM file.

*

See Tutorial#10060 * for an example of how to set up and run a Spark tool on a cloud Spark cluster. *

*

Usage examples

*
 * gatk ApplyBQSRSpark \
 * -I gs://my-gcs-bucket/input.bam \
 * -bqsr gs://my-gcs-bucket/recalibration.table \
 * -O gs://my-gcs-bucket/output.bam \
 * -- \
 * --sparkRunner GCS \
 * --cluster my-dataproc-cluster
 * 
*

* To additionally bin base qualities: *

 * gatk ApplyBQSRSpark \
 * -I gs://my-gcs-bucket/input.bam \
 * -bqsr gs://my-gcs-bucket/recalibration.table \
 * --static-quantized-quals 10 --static-quantized-quals 20 \
 * --static-quantized-quals 30 --static-quantized-quals 40 \
 * -O gs://my-gcs-bucket/output.bam \
 * -- \
 * --sparkRunner GCS \
 * --cluster my-dataproc-cluster
 * 
*/ @CommandLineProgramProperties( summary=ApplyBQSRSpark.USAGE_SUMMARY, oneLineSummary=ApplyBQSRSpark.USAGE_ONE_LINE_SUMMARY, programGroup = ReadDataManipulationProgramGroup.class ) @DocumentedFeature @BetaFeature public final class ApplyBQSRSpark extends GATKSparkTool { private static final long serialVersionUID = 0l; static final String USAGE_ONE_LINE_SUMMARY = "Apply base quality score recalibration on Spark"; static final String USAGE_SUMMARY = "Apply a linear base quality recalibration model trained with the BaseRecalibrator tool on Spark."; @Override public boolean requiresReads() { return true; } @Argument(doc = "the output bam", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, optional = false) private String output; /** * Enables recalibration of base qualities. * The covariates tables are produced by the BaseRecalibrator tool. * Please be aware that you should only run recalibration with the covariates file created on the same input bam(s). */ @Argument(fullName= StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, shortName= StandardArgumentDefinitions.BQSR_TABLE_SHORT_NAME, doc="Input covariates table file for base quality score recalibration") private String bqsrRecalFile; @ArgumentCollection private ApplyBQSRArgumentCollection applyBQSRArgs = new ApplyBQSRArgumentCollection(); @Override protected void runTool(JavaSparkContext ctx) { JavaRDD initialReads = getReads(); Broadcast recalibrationReportBroadCast = ctx.broadcast(new RecalibrationReport(BucketUtils.openFile(bqsrRecalFile))); final JavaRDD recalibratedReads = ApplyBQSRSparkFn.apply(initialReads, recalibrationReportBroadCast, getHeaderForReads(), applyBQSRArgs); writeReads(ctx, output, recalibratedReads); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy