All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.validation.AnnotateVcfWithBamDepth Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.validation;

import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;

import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.HashSet;
import java.util.Set;

/**
 * Annotate every variant in a VCF with the depth at that locus in a bam.  Note that this bam is *not* the bam
 * from which the vcf was derived, otherwise we would simply use the DP INFO field.
 *
 * 

* In the CRSP sensitivity validation, we have a bam derived from a pool of 5, 10, or 20 samples * and a vcf of all known variants in those samples. The pooled bam is a simulated tumor and * the vcf of individual variants is our truth data. We annotate the truth data with the depth * in the pooled bam in order to bin the results of our sensitivity analysis by depth. *

* *

Example

* *
 * gatk --java-options "-Xmx4g" AnnotateVcfWithBamDepth \
 *   -V input.vcf \
 *   -I reads.bam \
 *   -O output.vcf
 * 
* * Created by David Benjamin on 1/30/17. */ @CommandLineProgramProperties( summary = "Annotate a vcf with a bam's read depth at each variant locus", oneLineSummary = "(Internal) Annotate a vcf with a bam's read depth at each variant locus", programGroup = VariantEvaluationProgramGroup.class ) @DocumentedFeature public class AnnotateVcfWithBamDepth extends VariantWalker { @Argument(fullName= StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName=StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="The output filtered VCF file", optional=false) private final GATKPath outputVcf = null; private VariantContextWriter vcfWriter; public static final String POOLED_BAM_DEPTH_ANNOTATION_NAME = "BAM_DEPTH"; @Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(POOLED_BAM_DEPTH_ANNOTATION_NAME, 1, VCFHeaderLineType.Integer, "pooled bam depth")); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); } @Override public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) { final MutableInt depth = new MutableInt(0); for (final GATKRead read : readsContext) { if (!read.failsVendorQualityCheck() && !read.isDuplicate() && !read.isUnmapped() && read.getEnd() > read.getStart() && new SimpleInterval(read).contains(vc) ) { depth.increment(); } } vcfWriter.add(new VariantContextBuilder(vc).attribute(POOLED_BAM_DEPTH_ANNOTATION_NAME, depth.intValue()).make()); } @Override public void closeTool() { if ( vcfWriter != null ) { vcfWriter.close(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy