org.broadinstitute.hellbender.tools.spark.pipelines.FlagStatSpark Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools.spark.pipelines;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.tools.FlagStat.FlagStatus;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import java.io.PrintStream;
/**
* Spark tool to accumulate flag statistics given a BAM file, e.g. total number of reads with QC failure flag set, number of
* duplicates, percentage mapped etc.
*
*
* This tool can be run without explicitly specifying Spark options. That is to say,
* the given example command without Spark options will run locally.
* See Tutorial#10060
* for an example of how to set up and run a Spark tool on a cloud Spark cluster.
*
*
* Input
*
* - A BAM file containing aligned read data
*
*
* Output
*
* - Accumulated flag statistics
*
*
* Example Usage
*
* gatk FlagStatSpark \
* -I input.bam \
* -O statistics.txt
*
*/
@DocumentedFeature
@CommandLineProgramProperties(
summary = "Spark tool to accumulate flag statistics given a BAM file, e.g. total number of reads with QC failure flag set," +
"number of duplicates, percentage mapped etc.",
oneLineSummary = "Spark tool to accumulate flag statistics",
programGroup = DiagnosticsAndQCProgramGroup.class
)
public final class FlagStatSpark extends GATKSparkTool {
private static final long serialVersionUID = 1L;
@Override
public boolean requiresReads() { return true; }
@Argument(
doc = "uri for the output file: a local file path",
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
optional = true
)
public String out;
@Override
protected void runTool(final JavaSparkContext ctx) {
final JavaRDD reads = getReads();
final FlagStatus result = reads.aggregate(new FlagStatus(), FlagStatus::add, FlagStatus::merge);
System.out.println(result);
if(out != null ) {
try ( final PrintStream ps = new PrintStream(BucketUtils.createFile(out)) ) {
ps.print(result);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy