All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.spark.CreateHadoopBamSplittingIndex Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.tools.spark;

import htsjdk.samtools.*;
import htsjdk.samtools.BAMSBIIndexer;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
import htsjdk.samtools.util.FileExtensions;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.ReadConstants;
import org.codehaus.plexus.util.FileUtils;
import picard.cmdline.programgroups.OtherProgramGroup;

import java.io.*;

/**
 * Create a Hadoop BAM splitting index and optionally a BAM index from a BAM file.
 *
 * 

Input

* *
    *
  • A BAM file
  • *
* *

Output

* *
    *
  • BAM splitting index file
  • *
  • BAM bai index (optional)
  • *
* *

Usage example

*
 *     gatk CreateHadoopBamSplittingIndex \
 *         -I input_reads.bam \
 *         -O input_reads.bam.sbi
 * 
* or if one wants to generate bai as well *
 *     gatk CreateHadoopBamSplittingIndex \
 *         -I input_reads.bam \
 *         -O input_reads.bam.sbi \
 *         --create-bai
 * 
*/ @CommandLineProgramProperties(summary = "Create a Hadoop BAM splitting index and optionally a BAM index from a BAM file", oneLineSummary = "Create a Hadoop BAM splitting index" , programGroup = OtherProgramGroup.class) @DocumentedFeature @BetaFeature public final class CreateHadoopBamSplittingIndex extends CommandLineProgram { private static final Logger logger = LogManager.getLogger(CreateHadoopBamSplittingIndex.class); public static final String SPLITTING_INDEX_GRANULARITY_LONG_NAME = "splitting-index-granularity"; public static final String CREATE_BAI_LONG_NAME = "create-bai"; @Argument(fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME, doc = "BAM file to create a HadoopBam splitting index for", optional = false ) public File inputBam; @Argument(fullName = StandardArgumentDefinitions.READ_VALIDATION_STRINGENCY_LONG_NAME, shortName = StandardArgumentDefinitions.READ_VALIDATION_STRINGENCY_SHORT_NAME, doc = "Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT " + "can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) " + "do not otherwise need to be decoded.", common=true, optional=true) public ValidationStringency readValidationStringency = ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "The splitting index (SBI) file. If this is unspecified an index will be created with the same name as " + "the input file but with the additional extension " + FileExtensions.SBI, optional = true) public File output; @Argument(fullName = SPLITTING_INDEX_GRANULARITY_LONG_NAME, doc = "Splitting index granularity, an entry is created in the index every this many reads.", optional = true) public long granularity = SBIIndexWriter.DEFAULT_GRANULARITY; @Argument(fullName = CREATE_BAI_LONG_NAME, doc = "Set this to create a bai index at the same time as creating a splitting index", optional = true) public boolean createBai = false; @Override public Object doWork() { if( granularity <= 0) { throw new CommandLineException.BadArgumentValue(SPLITTING_INDEX_GRANULARITY_LONG_NAME, Long.toString(granularity), "Granularity must be > 0"); } final File index = getOutputFile(output, inputBam); if(createBai){ createBaiAndSplittingIndex(inputBam, index, granularity, readValidationStringency); } else { createOnlySplittingIndex(inputBam, index, granularity); } return 0; } private static void createOnlySplittingIndex(final File inputBam, final File index, final long granularity) { assertIsBam(inputBam); try(SeekableStream in = new SeekableFileStream(inputBam); BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(index))) { BAMSBIIndexer.createIndex(in, out, granularity); } catch (final IOException e) { throw new UserException("Couldn't create splitting index", e); } } private static void createBaiAndSplittingIndex(final File inputBam, final File index, final long granularity, final ValidationStringency readValidationStringency) { assertIsBam(inputBam); try(SamReader reader = SamReaderFactory.makeDefault() .validationStringency(readValidationStringency) .setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true) .open(inputBam); BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(index))) { final SAMFileHeader header = reader.getFileHeader(); assertBamIsCoordinateSorted(header); final SBIIndexWriter indexer = new SBIIndexWriter(out, granularity); final BAMIndexer bamIndexer = new BAMIndexer(IOUtils.replaceExtension(index, FileExtensions.BAI_INDEX), header); BAMFileSpan lastFilePointer = null; for(final SAMRecord read : reader){ BAMFileSpan filePointer = (BAMFileSpan) read.getFileSource().getFilePointer(); indexer.processRecord(filePointer.getFirstOffset()); bamIndexer.processAlignment(read); lastFilePointer = filePointer; } long nextStart = 0; if (lastFilePointer != null && !lastFilePointer.getChunks().isEmpty()) { nextStart = lastFilePointer.getChunks().get(0).getChunkEnd(); } if (nextStart == 0) { nextStart = BlockCompressedFilePointerUtil.makeFilePointer(inputBam.length()); // default to file length (in case of no reads) } indexer.finish(nextStart, inputBam.length()); // nextStart is start of next record that would be added bamIndexer.finish(); } catch (final IOException e) { throw new UserException("Couldn't create splitting index", e); } } private static void assertBamIsCoordinateSorted(final SAMFileHeader header) { if( header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new UserException.BadInput("Cannot create a " + FileExtensions.BAI_INDEX + " index for a file " + "that isn't coordinate sorted."); } } private static void assertIsBam(final File inputBam) { if(!BamFileIoUtils.isBamFile(inputBam)) { throw new UserException.BadInput("A splitting index is only relevant for a bam file, but a " + "file with extension "+ FileUtils.getExtension(inputBam.getName()) + " was specified."); } } private static File getOutputFile(final File suggestedOutput, final File input) { if(suggestedOutput == null){ return new File(input.getPath() + FileExtensions.SBI); } else { if (!suggestedOutput.getAbsolutePath().endsWith("bam" + FileExtensions.SBI)){ logger.warn("Creating a splitting index (SBI) with an extension that doesn't match " + "bam"+ FileExtensions.SBI + ". Output file: "+suggestedOutput); } return suggestedOutput; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy