All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.sv.PrintSVEvidence Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.sv;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.tribble.Feature;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.codecs.*;

import java.util.*;
/**
 * 

Merges locus-sorted files of evidence for structural variation into a single output file.

*

The tool can also subset the inputs to specified genomic intervals, or to a specified list of samples.

*

The evidence types and their files extensions are:

*
*
BafEvidence
*
The biallelic frequency of a SNP in some sample at some locus. * File extensions are *.baf.txt, *.baf.txt.gz, or *.baf.bci.
*
DepthEvidence
*
The read counts of any number of samples on some interval. * File extensions are *.rd.txt, *.rd.txt.gz, or *.rd.bci.
*
DiscordantPairEvidence
*
Evidence of a read pair that spans a genomic distance that's too large or too small. * File extensions are *.pe.txt, *.pe.txt.gz, or *.pe.bci.
*
SiteDepth
*
The read counts of each base call for some sample at some locus. * File extensions are *.sd.txt, *.sd.txt.gz, or *.sd.bci.
*
SplitReadEvidence
*
The number of chimeric reads in some sample at some locus. * File extensions are *.sr.txt, *.sr.txt.gz, or *.sr.bci.
*
* *

Inputs

* *
    *
  • * One or more evidence files. * These must be locus-sorted, and must all contain the same type of evidence. *
    Or a file containing a list of evidence files, one per line. *
  • *
  • Optional: A list of sample names to extract.
  • *
* *

Output

* *
    *
  • * An output file containing merged evidence from the inputs. *
  • *
* *

Usage example

* *
 *     gatk SVCluster \
 *       -F file1.baf.txt.gz [-F file2.baf.txt.gz ...] \
 *       -O merged.baf.bci \
 *       --sample-names sample1 [--sample-names sample2 ...]
 * 
* * @author Ted Sharpe <[email protected]> */ @CommandLineProgramProperties( summary = "Merges multiple sources of SV evidence records of some particular feature type" + " into a single output file. Inputs must be locus-sorted." + " Can also subset by regions or samples.", oneLineSummary = "Merges SV evidence records.", programGroup = StructuralVariantDiscoveryProgramGroup.class ) @ExperimentalFeature public class PrintSVEvidence extends MultiFeatureWalker { public static final String EVIDENCE_FILE_NAME = "evidence-file"; public static final String SAMPLE_NAMES_NAME = "sample-names"; public static final String COMPRESSION_LEVEL_NAME = "compression-level"; @Argument( doc = "Input feature file URI(s) with extension '" + SplitReadEvidenceCodec.FORMAT_SUFFIX + "', '" + DiscordantPairEvidenceCodec.FORMAT_SUFFIX + "', '" + SiteDepthCodec.FORMAT_SUFFIX + "', '" + BafEvidenceCodec.FORMAT_SUFFIX + "', or '" + DepthEvidenceCodec.FORMAT_SUFFIX + "' (may be gzipped). " + "Can also handle bci rather than txt files.", fullName = EVIDENCE_FILE_NAME, shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) private List> inputPaths; @Argument(doc = "List of sample names to extract from the sources (either as a .list file or " + " as repeated arguments). If not specified, all samples will be merged.", fullName = SAMPLE_NAMES_NAME, optional = true) @VisibleForTesting Set sampleNames = new LinkedHashSet<>(); @Argument( doc = "Output file for features of a type matching the input. Will be indexed if it " + "has a block-compressed extension (e.g. '.gz' or '.bci').", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) private GATKPath outputFilePath; @Argument( doc = "Output compression level", fullName = COMPRESSION_LEVEL_NAME, minValue = 0, maxValue = 9, optional = true ) private int compressionLevel = 4; private boolean noSampleFiltering = false; private FeatureSink outputSink; @Override @SuppressWarnings("unchecked") public void onTraversalStart() { super.onTraversalStart(); final FeatureOutputCodec> codec = FeatureOutputCodecFinder.find(outputFilePath); final Class outputClass = codec.getFeatureType(); if ( !SVFeature.class.isAssignableFrom(outputClass) ) { throw new UserException("Output file " + outputFilePath + " implies Feature subtype " + outputClass.getSimpleName() + " but this tool requires an SVFeature subtype."); } for ( FeatureInput input : inputPaths ) { try { final Class inputClass = input.getFeatureCodecClass().getDeclaredConstructor().newInstance().getFeatureType(); if ( !outputClass.isAssignableFrom(inputClass) ) { throw new UserException("Incompatible feature input " + input.getFeaturePath() + " produces features of type " + inputClass.getSimpleName() + " rather than features of type " + outputClass.getSimpleName() + " as dictated by the output path " + outputFilePath); } } catch ( final ReflectiveOperationException roe ) { throw new GATKException("Failed to instantiate codec " + input.getFeatureCodecClass().getSimpleName()); } } if ( sampleNames.isEmpty() ) { // use the complete set of sample names we found in the headers of the feature files sampleNames.addAll(getSampleNames()); if ( sampleNames.isEmpty() ) { noSampleFiltering = true; } } // the validity of this cast was checked at the beginning of this method outputSink = (FeatureSink)codec.makeSortMerger(outputFilePath, getDictionary(), new ArrayList<>(sampleNames), compressionLevel); } @Override public void apply( final SVFeature featureArg, final Object header, final ReadsContext readsContext, final ReferenceContext referenceContext ) { final SVFeature feature; if ( noSampleFiltering ) { feature = featureArg; } else { feature = featureArg.extractSamples(sampleNames, header); if ( feature == null ) { return; } } outputSink.write(feature); } @Override public Object onTraversalSuccess() { super.onTraversalSuccess(); outputSink.close(); return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy