org.broadinstitute.hellbender.tools.spark.pipelines.CountReadsSpark Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools.spark.pipelines;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.CoverageAnalysisProgramGroup;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
/**
* Calculate the overall number of reads in a SAM/BAM file
*
* Input
*
* - A single BAM file
*
*
* Output
*
* - A text file containing number of reads
*
*
* Example
*
* Output number of reads to file
*
* gatk CountReadsSpark \
* -I input_reads.bam \
* -O read_count.txt
*
*
* Print read count
*
* gatk CountReadsSpark \
* -I input_reads.bam
*
*/
@DocumentedFeature
@CommandLineProgramProperties(
summary = "Counts reads in the input SAM/BAM",
oneLineSummary = "Counts reads in the input SAM/BAM",
programGroup = CoverageAnalysisProgramGroup.class
)
public final class CountReadsSpark extends GATKSparkTool {
private static final long serialVersionUID = 1L;
@Override
public boolean requiresReads() { return true; }
@Argument(
doc = "uri for the output file: a local file path",
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
optional = true
)
public String out;
@Override
public List getDefaultReadFilters() {
return Collections.singletonList(ReadFilterLibrary.ALLOW_ALL_READS);
}
@Override
protected void runTool(final JavaSparkContext ctx) {
final JavaRDD reads = getReads();
final long count = reads.count();
System.out.println(count);
if(out != null) {
try (final PrintStream ps = new PrintStream(BucketUtils.createFile(out))) {
ps.print(count);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy