 
                        
        
                        
        org.broadinstitute.hellbender.tools.walkers.validation.AnnotateVcfWithBamDepth Maven / Gradle / Ivy
 The newest version!
        
        package org.broadinstitute.hellbender.tools.walkers.validation;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import java.util.HashSet;
import java.util.Set;
/**
 * Annotate every variant in a VCF with the depth at that locus in a bam.  Note that this bam is *not* the bam
 * from which the vcf was derived, otherwise we would simply use the DP INFO field.
 *
 * 
 * In the CRSP sensitivity validation, we have a bam derived from a pool of 5, 10, or 20 samples
 * and a vcf of all known variants in those samples.  The pooled bam is a simulated tumor and
 * the vcf of individual variants is our truth data.  We annotate the truth data with the depth
 * in the pooled bam in order to bin the results of our sensitivity analysis by depth.
 * 
 *
 * Example
 *
 * 
 * gatk --java-options "-Xmx4g" AnnotateVcfWithBamDepth \
 *   -V input.vcf \
 *   -I reads.bam \
 *   -O output.vcf
 * 
 *
 * Created by David Benjamin on 1/30/17.
 */
@CommandLineProgramProperties(
        summary = "Annotate a vcf with a bam's read depth at each variant locus",
        oneLineSummary = "(Internal) Annotate a vcf with a bam's read depth at each variant locus",
        programGroup = VariantEvaluationProgramGroup.class
)
@DocumentedFeature
public class AnnotateVcfWithBamDepth extends VariantWalker {
    @Argument(fullName= StandardArgumentDefinitions.OUTPUT_LONG_NAME,
            shortName=StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
            doc="The output filtered VCF file",
            optional=false)
    private final GATKPath outputVcf = null;
    private VariantContextWriter vcfWriter;
    public static final String POOLED_BAM_DEPTH_ANNOTATION_NAME = "BAM_DEPTH";
    @Override
    public void onTraversalStart() {
        final VCFHeader inputHeader = getHeaderForVariants();
        final Set headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
        headerLines.add(new VCFInfoHeaderLine(POOLED_BAM_DEPTH_ANNOTATION_NAME, 1, VCFHeaderLineType.Integer, "pooled bam depth"));
        headerLines.addAll(getDefaultToolVCFHeaderLines());
        final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
        vcfWriter = createVCFWriter(outputVcf);
        vcfWriter.writeHeader(vcfHeader);
    }
    @Override
    public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
        final MutableInt depth = new MutableInt(0);
        for (final GATKRead read : readsContext) {
            if (!read.failsVendorQualityCheck() && !read.isDuplicate() && !read.isUnmapped() && read.getEnd() > read.getStart() && new SimpleInterval(read).contains(vc) ) {
                depth.increment();
            }
        }
        vcfWriter.add(new VariantContextBuilder(vc).attribute(POOLED_BAM_DEPTH_ANNOTATION_NAME, depth.intValue()).make());
    }
    @Override
    public void closeTool() {
        if ( vcfWriter != null ) {
            vcfWriter.close();
        }
    }
}
 © 2015 - 2025 Weber Informatics LLC | Privacy Policy