All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.sv.PrintReadCounts Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.sv;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.Feature;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.SimpleCount;
import org.broadinstitute.hellbender.utils.codecs.copynumber.SimpleCountCodec;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.List;

/**
 * 

Prints (and optionally subsets) an rd (DepthEvidence) file or a counts file * as one or more (for multi-sample DepthEvidence files) counts files for CNV determination.

* *

Inputs

* *
    *
  • * A locus-sorted DepthEvidence file (name ends with ".rd.txt", ".rd.txt.gz", or "rd.bci"), * or a counts file (name ends with ".counts.tsv", or ".counts.tsv.gz"). *
  • *
* *

Output

* *
    *
  • * A counts file (or files) for use by one of the CNV-calling tools. *
  • *
*

The output files are named "{output-prefix}{sample-name}.counts.tsv"

* *

Usage example

* *
 *     gatk PrintCNVCounts \
 *       -F input.rd.txt.gz \
 *       -L chr1
 * 
* * @author Ted Sharpe <[email protected]> */ @CommandLineProgramProperties( summary = "Prints count files for CNV determination.", oneLineSummary = "Prints count files for CNV determination.", programGroup = StructuralVariantDiscoveryProgramGroup.class ) @ExperimentalFeature @DocumentedFeature public class PrintReadCounts extends FeatureWalker { public static final String INPUT_ARGNAME = "input-counts"; public static final String OUTPUT_PREFIX_ARGNAME = "output-prefix"; public static final String OUTPUT_FILES_ARGNAME = "output-file-list"; @Argument( doc = "Input file of counts", fullName = INPUT_ARGNAME, shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) private GATKPath inputPath; @Argument( doc = "Output file path prefix. Paths have the form \"{output-prefix}{sample-name}.counts.tsv\". " + "Default is the current working directory.", fullName = OUTPUT_PREFIX_ARGNAME, optional = true ) private String outputPrefix = ""; @Argument( doc = "Two-column list of outputs in the form \\t", fullName = OUTPUT_FILES_ARGNAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, optional = true ) private String outputFilesFilename; private BufferedWriter[] writers; private List sampleNames; @Override public GATKPath getDrivingFeaturePath() { return inputPath; } @Override protected boolean isAcceptableFeatureType( final Class featureType ) { return featureType.equals(DepthEvidence.class) || featureType.equals(SimpleCount.class); } @Override public void onTraversalStart() { final Object header = getDrivingFeaturesHeader(); if ( header instanceof SampleLocatableMetadata ) { initializeWriter((SampleLocatableMetadata)header); } else if ( header instanceof SVFeaturesHeader ) { initializeWriters((SVFeaturesHeader)header); } else { throw new UserException("Input file has no header."); } } @Override public void apply( final Feature feature, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext ) { if ( feature instanceof DepthEvidence ) { apply((DepthEvidence)feature); } else { apply((SimpleCount)feature); } } @Override public Object onTraversalSuccess() { final int nSamples = sampleNames.size(); for ( int idx = 0; idx != nSamples; ++idx ) { try { writers[idx].close(); } catch ( final IOException ioe ) { throw new UserException("Failed to close output file for sample " + sampleNames.get(idx), ioe); } } return null; } private void initializeWriter( final SampleLocatableMetadata metadata ) { final String sampleName = metadata.getSampleName(); sampleNames = Collections.singletonList(sampleName); writers = new BufferedWriter[1]; try ( final BufferedWriter outputFilesWriter = outputFilesFilename == null ? null : new BufferedWriter(new FileWriter(outputFilesFilename)) ) { writers[0] = createWriter(sampleName, metadata.toHeader(), outputFilesWriter); } catch ( final IOException ioe ) { throw new UserException("Can't open list of output files " + outputFilesFilename, ioe); } } private void initializeWriters( final SVFeaturesHeader featuresHeader ) { sampleNames = featuresHeader.getSampleNames(); SAMSequenceDictionary dictionary = featuresHeader.getDictionary(); if ( dictionary == null ) { dictionary = getMasterSequenceDictionary(); if ( dictionary == null ) { throw new UserException("No dictionary available. Supply one with --sequence-dictonary."); } } final int nSampleNames = sampleNames.size(); writers = new BufferedWriter[nSampleNames]; try ( final BufferedWriter outputFilesWriter = outputFilesFilename == null ? null : new BufferedWriter(new FileWriter(outputFilesFilename)) ) { for ( int idx = 0; idx != nSampleNames; ++idx ) { final SAMFileHeader samFileHeader = new SAMFileHeader(dictionary); final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("GATKCopyNumber"); final String sampleName = sampleNames.get(idx); readGroupRecord.setSample(sampleName); samFileHeader.addReadGroup(readGroupRecord); writers[idx] = createWriter(sampleName, samFileHeader, outputFilesWriter); } } catch ( final IOException ioe ) { throw new UserException("Can't open list of output files " + outputFilesFilename, ioe); } } private BufferedWriter createWriter( final String sampleName, final SAMFileHeader header, final BufferedWriter outputFilesWriter ) { final String filename = outputPrefix + sampleName + ".counts.tsv"; if ( outputFilesWriter != null ) { try { outputFilesWriter.write(sampleName + "\t" + filename); outputFilesWriter.newLine(); } catch ( final IOException ioe ) { throw new UserException("Can't write to list of output files " + outputFilesFilename, ioe); } } final BufferedWriter writer; try { writer = new BufferedWriter(new FileWriter(filename)); } catch ( final IOException ioe ) { throw new UserException("Can't open " + filename + " for output.", ioe); } try { new SAMTextHeaderCodec().encode(writer, header, true); } catch ( final RuntimeIOException ioe ) { throw new UserException("Can't write header to " + filename, ioe); } try { writer.write("CONTIG\tSTART\tEND\tCOUNT"); writer.newLine(); } catch ( final IOException ioe ) { throw new UserException("Can't writer column header to " + filename, ioe); } return writer; } private void apply( final SimpleCount simpleCount ) { final BufferedWriter writer = writers[0]; try { writer.write(SimpleCountCodec.encode(simpleCount)); writer.newLine(); } catch ( final IOException ioe ) { throw new UserException("Can't write record to output.", ioe); } } private void apply( final DepthEvidence depthEvidence ) { // don't use DepthEvidence codec to encode--we want 1-based coordinates final String intervalFields = depthEvidence.getContig() + "\t" + depthEvidence.getStart() + "\t" + depthEvidence.getEnd() + "\t"; final int[] counts = depthEvidence.getCounts(); for ( int idx = 0; idx != writers.length; ++idx ) { try { writers[idx].write(intervalFields + counts[idx]); writers[idx].newLine(); } catch ( final IOException ioe ) { throw new UserException("Can't write to output file for sample " + sampleNames.get(idx), ioe); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy