All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.validation.RemoveNearbyIndels Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.validation;

import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import picard.cmdline.programgroups.VariantManipulationProgramGroup;

import java.util.ArrayDeque;

/**
 * Remove indels that are close to another indel from a vcf file.
 *
 * 

* This is a preprocessing step for the CRSP sensitivity validation truth data. For any pair of indels that are within * a minimum allowed distance (given by --min-indel-spacing), both indels are removed, regardless of any intervening * non-indel variants. *

* *

Usage example

* *
 * gatk RemoveNearbyIndels \
 *   -V input.vcf \
 *   -O output.vcf \
 *   --min-indel-spacing 20
 * 
* * Created by David Benjamin on 1/30/17. */ @CommandLineProgramProperties( summary = "Remove indels that are close to each other from a vcf. For any pair of indels that are within" + "some minimum allowed distance, both indels are removed, regardless of any intervening non-indel variants.", oneLineSummary = "(Internal) Remove indels from the VCF file that are close to each other.", programGroup = VariantManipulationProgramGroup.class ) @DocumentedFeature public class RemoveNearbyIndels extends VariantWalker { public static final String MIN_INDEL_SPACING_NAME = "min-indel-spacing"; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "The output filtered VCF file", optional = false) private final GATKPath outputVcf = null; @Argument(fullName = MIN_INDEL_SPACING_NAME, shortName = MIN_INDEL_SPACING_NAME, doc = "Minimum spacing between neighboring indels to be emitted", optional = false) private int minIndelSpacing = 1; private VariantContextWriter vcfWriter; private VariantBuffer variantBuffer = new VariantBuffer(); @Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples()); getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); } @Override public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) { variantBuffer.add(vc); } @Override public Object onTraversalSuccess() { variantBuffer.emitRemaining(); return "SUCCESS"; } @Override public void closeTool() { if ( vcfWriter != null ) { vcfWriter.close(); } } private final class VariantBuffer { private VariantContext lastIndel = null; // INVARIANT: this buffer will contain at most one indel private final ArrayDeque buffer = new ArrayDeque<>(); public VariantBuffer() { } public void add(final VariantContext vc) { if (lastIndel == null && vc.isIndel()) { buffer.add(vc); lastIndel = vc; } else if (nearby(lastIndel, vc)) { if (vc.isIndel()) { emitAllNonIndels(); // throw out {@code lastIndel} and {@code vc} } else { buffer.add(vc); } } else { emitAllVariants(); buffer.add(vc); } lastIndel = vc.isIndel() ? vc : lastIndel; } private boolean nearby(final VariantContext left, final VariantContext right) { return left != null && left.getContig().equals(right.getContig()) && (right.getStart() - left.getEnd() < minIndelSpacing); } private void emitAllNonIndels() { buffer.stream().filter(vc -> !vc.isIndel()).forEach(vcfWriter::add); buffer.clear(); } private void emitAllVariants() { buffer.forEach(vcfWriter::add); buffer.clear(); } public void emitRemaining() { buffer.stream().filter(vc -> !(vc.isIndel() && nearby(lastIndel, vc)) || vc == lastIndel).forEach(vcfWriter::add); buffer.clear(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy