All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.engine.MultiVariantWalkerGroupedOnStart Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.engine;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;


/**
 * A MultiVariantWalker that walks over multiple variant context sources in reference order and emits to client tools
 * groups of all input variant contexts by their start position. This is intended to mimic GATK3 traversal behavior for
 * some tools.
 *
 * As such, the argument '-ignore-variants-starting-outside-interval' has been provided to mimic GATK3's behavior
 * only presenting variants that start inside the requested interval regardless of whether there is a spanning variant.
 *
 * Client tools must implement apply(List variantContexts, ReferenceContext referenceContext)
 */
public abstract class MultiVariantWalkerGroupedOnStart extends MultiVariantWalker {
    private List currentVariants = new ArrayList<>();
    private int firstCurrentVariantStart;
    private int lastCurrentVariantStart;
    private List currentReadsContexts = new ArrayList<>();
    private OverlapDetector overlapDetector;

    public static final String IGNORE_VARIANTS_THAT_START_OUTSIDE_INTERVAL = "ignore-variants-starting-outside-interval";

    public static final String COMBINE_VARIANTS_DISTANCE = "combine-variants-distance";

    public static final String MAX_COMBINED_DISTANCE = "max-distance";

    public static final String REFERENCE_WINDOW_PADDING = "ref-padding";

    /**
     * this option has no effect unless intervals are specified.
     * 

* This exists to mimic GATK3 interval traversal patterns */ @Advanced @Argument(fullName = IGNORE_VARIANTS_THAT_START_OUTSIDE_INTERVAL, doc = "Restrict variant output to sites that start within provided intervals (only applies when an interval is specified)", optional = true) private boolean ignoreIntervalsOutsideStart = false; @Advanced @Argument(fullName = COMBINE_VARIANTS_DISTANCE, doc = "Maximum distance for variants to be grouped together", optional = true) protected int distanceToCombineVariants = defaultDistanceToGroupVariants(); @Advanced @Argument(fullName = MAX_COMBINED_DISTANCE, doc = "Maximum distance for variants to be grouped together", optional = true) protected int maxCombinedDistance = defaultMaxGroupedSpan(); @Advanced @Argument(fullName = REFERENCE_WINDOW_PADDING, doc = "Number of bases on either side to expand spanning reference window", optional = true) protected int referenceWindowPadding = defaultReferenceWindowPadding(); // override to group variants that start nearby but not at the same locus protected int defaultDistanceToGroupVariants() { return 0; } // override to change reference padding protected int defaultReferenceWindowPadding() { return 1; } // override to cap the size span of combined variants protected int defaultMaxGroupedSpan() { return Integer.MAX_VALUE; } @Override public boolean requiresReference() { return true; } /** * This method keeps track of all the variants it is passed and will feed all the variants that start at the same * site to the reduce method. * * {@inheritDoc} */ @Override public final void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { // Filtering out variants that start outside of the specified intervals if (ignoreIntervalsOutsideStart && !isWithinInterval(new SimpleInterval(variant.getContig(), variant.getStart(), variant.getStart()))) { return; } // Collecting all the reads that start at a particular base into one. if (currentVariants.isEmpty()) { firstCurrentVariantStart = variant.getStart(); } else if (!currentVariants.get(0).contigsMatch(variant) || lastCurrentVariantStart < variant.getStart() - distanceToCombineVariants || firstCurrentVariantStart < variant.getStart() - maxCombinedDistance) { // Emptying any sites which should emit a new VC since the last one apply(new ArrayList<>(currentVariants), currentReadsContexts); currentVariants.clear(); currentReadsContexts.clear(); firstCurrentVariantStart = variant.getStart(); } currentVariants.add(variant); currentReadsContexts.add(readsContext); lastCurrentVariantStart = variant.getStart(); } /** * This method must be implemented by tool authors. * * This is the primary traversal for any MultiVariantWalkerGroupedOnStart walkers. Will traverse over input variant contexts * and call #apply() exactly once for each unique reference start position. All variants starting at each locus * across source files will be grouped and passed as a list of VariantContext objects. * @param variantContexts VariantContexts from driving variants with matching start positon * NOTE: This will never be empty * @param referenceContext ReferenceContext object covering the reference of the longest spanning VariantContext * @param readsContexts */ public abstract void apply(List variantContexts, ReferenceContext referenceContext, final List readsContexts); public void apply(List variantContexts, final List readsContexts) { apply(variantContexts, makeSpanningReferenceContext(variantContexts, referenceWindowPadding), readsContexts); } /** * Helper method that ensures the reference context it returns is adequate to span the length of all the accumulated * VariantContexts. It assumes that all variant contexts in currentVariants have the same contig. */ private ReferenceContext makeSpanningReferenceContext(final List variantContexts, final int referenceWindowPadding) { Utils.nonEmpty(variantContexts, "Must have at least one current variant context"); final List contigs = variantContexts.stream().map(VariantContext::getContig).distinct().collect(Collectors.toList()); Utils.validate(contigs.size() == 1, "variant contexts should all have the same contig"); final int minStart = variantContexts.stream().mapToInt(VariantContext::getStart).min().getAsInt(); final int maxEnd = variantContexts.stream().mapToInt(VariantContext::getEnd).max().getAsInt(); final SimpleInterval combinedInterval = new SimpleInterval(contigs.get(0), minStart, maxEnd); final ReferenceContext combinedReferenceContext = new ReferenceContext(reference, combinedInterval); combinedReferenceContext.setWindow(referenceWindowPadding,referenceWindowPadding); return combinedReferenceContext; } /** * {@inheritDoc} * * Implementation of multi-variant grouped on start traversal. * * NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the * engine package that extends this class. It is not meant to be overridden by tools outside of the * engine package. */ @Override public void traverse() { beforeTraverse(); super.traverse(); afterTraverse(); } /** * Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalStart} instead. */ private void beforeTraverse() { overlapDetector = hasUserSuppliedIntervals() ? OverlapDetector.create(intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary())) : null; } /** * @param loc locatable to query * @return true if the query loc is entirely contained by the interval, true if no interval */ protected final boolean isWithinInterval(Locatable loc) { return (overlapDetector==null || overlapDetector.overlapsAny(loc)); } /** * Clear accumulated reads before {@link #onTraversalSuccess()} is accessed */ private void afterTraverse() { // Clearing the accumulator if (currentVariants.isEmpty()) { logger.warn("Error: The requested interval contained no data in source VCF files"); } else { apply(currentVariants, currentReadsContexts); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy