All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.sam.SetNmMdAndUqTags Maven / Gradle / Ivy

/*
 * The MIT License
 *
 * Copyright (c) 2016 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package picard.sam;

import htsjdk.samtools.*;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

import java.io.File;
import java.util.stream.StreamSupport;

/**
 * Fixes the NM, MD, and UQ tags in a SAM or BAM file.
 *
 * 

This tool takes in a coordinate-sorted SAM or BAM file and calculates the NM, MD, and UQ * tags by comparing with the reference.

* *

This may be needed when MergeBamAlignment was run with SORT_ORDER other than 'coordinate' * and thus could not fix these tags then. The input must be coordinate sorted in order to run. * If specified, the MD and NM tags can be ignored and only the UQ tag be set.

* *

Inputs

*

*

  • The BAM or SAM file to fix
  • *
  • A reference sequence
  • *

    * *

    Output

    *

    * A BAM or SAM output file with recalculated NM, MD, and UQ tags *

    * *

    *

    Usage example:

    *

    Fix the tags in a BAM file:

    *
     *     java -jar picard.jar SetNmMdAndUqTags \
     *          R=reference_sequence.fasta \
     *          I=sorted.bam \
     *          O=fixed.bam
     * 
    *

    * * @author Yossi Farjoun */ @CommandLineProgramProperties( summary = SetNmMdAndUqTags.USAGE_DETAILS, oneLineSummary = SetNmMdAndUqTags.USAGE_SUMMARY, programGroup = ReadDataManipulationProgramGroup.class) @DocumentedFeature public class SetNmMdAndUqTags extends CommandLineProgram { static final String USAGE_SUMMARY = "Fixes the NM, MD, and UQ tags in a SAM file "; static final String USAGE_DETAILS = "This tool takes in a coordinate-sorted SAM or BAM and calculates"+ "the NM, MD, and UQ tags by comparing with the reference."+ "
    " + "This may be needed when MergeBamAlignment was run with SORT_ORDER other than 'coordinate' and thus"+ "could not fix these tags then. The input must be coordinate sorted in order to run. If specified,"+ "the MD and NM tags can be ignored and only the UQ tag be set."+ "
    "+ "

    Usage example:

    " + "
    " +
                "java -jar picard.jar SetNmMdAndUqTags 
    " + " R=reference_sequence.fasta
    " + " I=sorted.bam
    " + " O=fixed.bam
    "+ "
    "; @Argument(doc = "The BAM or SAM file to fix. ", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) public File INPUT; @Argument(doc = "The fixed BAM or SAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) public File OUTPUT; @Argument(doc = "Whether the file contains bisulfite sequence (used when calculating the NM tag).") public boolean IS_BISULFITE_SEQUENCE = false; @Argument(doc = "Only set the UQ tag, ignore MD and NM.") public boolean SET_ONLY_UQ = false; @Override protected boolean requiresReference() { return true; } private final Log log = Log.getInstance(SetNmMdAndUqTags.class); public static void main(final String[] argv) { new SetNmMdAndUqTags().instanceMainWithExit(argv); } protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder()); } final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records")); final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); StreamSupport.stream(reader.spliterator(), false) .peek(rec -> fixRecord(rec, refSeqWalker)) .forEach(writer::addAlignment); CloserUtil.close(reader); writer.close(); return 0; } private void fixRecord(SAMRecord record, ReferenceSequenceFileWalker refSeqWalker){ if (!record.getReadUnmappedFlag()) { if (SET_ONLY_UQ) { AbstractAlignmentMerger.fixUq(record, refSeqWalker, IS_BISULFITE_SEQUENCE); } else { AbstractAlignmentMerger.fixNmMdAndUq(record, refSeqWalker, IS_BISULFITE_SEQUENCE); } } } }




    © 2015 - 2024 Weber Informatics LLC | Privacy Policy