org.broadinstitute.hellbender.tools.ConvertHeaderlessHadoopBamShardToBam Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.spark.SparkUtils;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import java.io.File;
import java.io.IOException;
/**
* This is a troubleshooting utility that converts a headerless BAM shard (e.g., a part-r-00000.bam, part-r-00001.bam, etc.),
* produced by a Spark tool with --sharded-output set to true, into a readable BAM file by adding a header and a BGZF terminator.
*
* This tool is not intended for use with BAM shards that already have a header -- these shards are
* already readable using samtools.
*
* Input
*
* - A headerless BAM shard
* - A well-formed BAM whose header will be used for the converted fragment
*
*
* Output
*
* - The converted BAM shard
*
*
* Usage example
*
* gatk ConvertHeaderlessHadoopBamShardToBam \
* --bam-shard part-r-00000.bam \
* --bam-with-header input.bam \
* -O output.bam
*
*/
@DocumentedFeature
@CommandLineProgramProperties(
summary = "This is a troubleshooting utility that converts a headerless BAM shard (e.g., a part-r-00000.bam, part-r-00001.bam, etc.)," +
" produced by a Spark tool with --sharded-output set to true, into a readable BAM file by adding a header and a BGZF terminator.",
oneLineSummary = "Convert a headerless BAM shard into a readable BAM",
programGroup = ReadDataManipulationProgramGroup.class
)
@BetaFeature
public final class ConvertHeaderlessHadoopBamShardToBam extends CommandLineProgram {
public static final String BAM_SHARD_LONG_NAME = "bam-shard";
public static final String BAM_WITH_HEADER_LONG_NAME = "bam-with-header";
public static final String OUTPUT_LONG_NAME = StandardArgumentDefinitions.OUTPUT_LONG_NAME;
public static final String OUTPUT_SHORT_NAME = StandardArgumentDefinitions.OUTPUT_SHORT_NAME;
@Argument(fullName = BAM_SHARD_LONG_NAME, doc = "Headerless Hadoop BAM shard to be converted into a readable BAM", optional = false)
private File bamShard = null;
@Argument(fullName = BAM_WITH_HEADER_LONG_NAME, doc = "Well-formed BAM whose header to use for the converted fragment", optional = false)
private File bamWithHeader = null;
@Argument(shortName = OUTPUT_SHORT_NAME, fullName = OUTPUT_LONG_NAME, doc = "Location to write the converted BAM shard", optional = false)
private File outputBam = null;
@Override
protected Object doWork(){
SAMFileHeader header = null;
try ( final SamReader headerReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamWithHeader) ) {
header = headerReader.getFileHeader();
}
catch ( IOException e ) {
throw new UserException("Error reading header from " + bamWithHeader.getAbsolutePath(), e);
}
SparkUtils.convertHeaderlessHadoopBamShardToBam(bamShard, header, outputBam);
return null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy