All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.spark.pipelines.CountReadsSpark Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.spark.pipelines;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.CoverageAnalysisProgramGroup;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.io.PrintStream;
import java.util.Collections;
import java.util.List;

/**
 * Calculate the overall number of reads in a SAM/BAM file
 *
 * 

Input

*
    *
  • A single BAM file
  • *
* *

Output

*
    *
  • A text file containing number of reads
  • *
* *

Example

* *

Output number of reads to file

*
 *   gatk CountReadsSpark \
 *     -I input_reads.bam \
 *     -O read_count.txt
 * 
* *

Print read count

*
 *   gatk CountReadsSpark \
 *     -I input_reads.bam
 * 
*/ @DocumentedFeature @CommandLineProgramProperties( summary = "Counts reads in the input SAM/BAM", oneLineSummary = "Counts reads in the input SAM/BAM", programGroup = CoverageAnalysisProgramGroup.class ) public final class CountReadsSpark extends GATKSparkTool { private static final long serialVersionUID = 1L; @Override public boolean requiresReads() { return true; } @Argument( doc = "uri for the output file: a local file path", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, optional = true ) public String out; @Override public List getDefaultReadFilters() { return Collections.singletonList(ReadFilterLibrary.ALLOW_ALL_READS); } @Override protected void runTool(final JavaSparkContext ctx) { final JavaRDD reads = getReads(); final long count = reads.count(); System.out.println(count); if(out != null) { try (final PrintStream ps = new PrintStream(BucketUtils.createFile(out))) { ps.print(count); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy