All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.engine.VariantLocusWalker Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.engine;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.engine.filters.CountingVariantFilter;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.transformers.VariantTransformer;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.iterators.IntervalLocusIterator;
import org.broadinstitute.hellbender.utils.iterators.ShardedIntervalIterator;

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * VariantLocusWalker processes variants from a single source, grouped by locus overlap, or optionally one
 * at a time in order, with optional contextual information from a reference, sets of reads, and/or supplementary sources
 * of Features. By-variant traversal is opt in, via {@link #changeTraversalModeToByVariant()}, otherwise only loci with
 * overlapping variants are traversed.
 *
 * VariantLocusWalker authors must implement the {@link #apply} method to process each variant, and may optionally implement
 * {@link #onTraversalStart}, {@link #onTraversalSuccess} and/or {@link #closeTool}.
 */
public abstract class VariantLocusWalker extends VariantWalkerBase {

    // NOTE: using String rather than FeatureInput here so that we can keep this driving source
    //       of variants separate from any other potential sources of Features
    @Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, doc = "A VCF file containing variants", common = false, optional = false)
    public String drivingVariantFile;

    // NOTE: keeping the driving source of variants separate from other, supplementary FeatureInputs in our FeatureManager in GATKTool
    //we do add the driving source to the Feature manager but we do need to treat it differently and thus this field.
    private FeatureDataSource drivingVariants;
    private FeatureInput drivingVariantsFeatureInput;

    private boolean traverseByVariant = false;

    /**
     * Traverse by variant instead of by grouped by locus. When true, the {@link #apply} method will be called for each
     * individual variant in the driving variants input, instead of by locus, with all variants overlapping that locus.
     * This mehtod must be called in OnTraversalStart();
     */
    protected void changeTraversalModeToByVariant() { traverseByVariant = true; }

    @Override
    protected SAMSequenceDictionary getSequenceDictionaryForDrivingVariants() { return drivingVariants.getSequenceDictionary(); }

    @Override
    protected Spliterator getSpliteratorForDrivingVariants() { return drivingVariants.spliterator(); }

    /**
     * Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalStart} instead.
     */
    @Override
    protected final void onStartup() {
        super.onStartup();
        if ( hasUserSuppliedIntervals() ) {
            drivingVariants.setIntervalsForTraversal(userIntervals);
        }
    }

    @Override
    protected void initializeDrivingVariants() {
        drivingVariantsFeatureInput = new FeatureInput<>(drivingVariantFile, "drivingVariantFile");

        // This is the data source for the driving source of variants,
        // which uses a cache lookahead of getDrivingVariantCacheLookAheadBases()
        drivingVariants = new FeatureDataSource<>(drivingVariantsFeatureInput, getDrivingVariantCacheLookAheadBases(), VariantContext.class, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
                getGenomicsDBOptions());

        // Also add the driving datasource to the feature manager so that it can be queried. Setting cache lookahead
        // to 0 to avoid caching. Note: we are disabling lookahead here because of windowed queries that need to "look behind" as well.
        features.addToFeatureSources(0, drivingVariantsFeatureInput, VariantContext.class, cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
                getGenomicsDBOptions());

        // Note: the intervals for the driving variants are set in onStartup()
    }

    /**
     * Returns the feature input for the driving variants file.
     */
    protected final FeatureInput getDrivingVariantsFeatureInput() {
        return drivingVariantsFeatureInput;
    }

    /**
     * Gets the header associated with our driving source of variants as a VCFHeader.
     *
     * @return VCFHeader for our driving source of variants
     */
    public final VCFHeader getHeaderForVariants() {
        final Object header = drivingVariants.getHeader();

        if ( ! (header instanceof VCFHeader) ) {
            throw new GATKException("Header for " + drivingVariantFile + " is not in VCF header format");
        }

        return (VCFHeader)header;
    }

    @Override
    public boolean requiresReference() {
        return true;
    }

    /**
     * {@inheritDoc}
     *
     * Implementation of variant-locus traversal.
     *
     * NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
     * engine package that extends this class. It is not meant to be overridden by tools outside of the engine
     * package.
     */
    @Override
    public void traverse() {
        final CountingReadFilter readFilter = makeReadFilter();
        final CountingVariantFilter variantFilter = makeVariantFilter();
        final VariantTransformer preTransformer  = makePreVariantFilterTransformer();
        final VariantTransformer postTransformer = makePostVariantFilterTransformer();

        if (traverseByVariant) {
            // Process each variant in the input stream, one at a time.
            getTransformedVariantStream( getSpliteratorForDrivingVariants(), preTransformer, variantFilter, postTransformer )
                    .forEachOrdered(variant -> {

                        final SimpleInterval variantInterval = new SimpleInterval(variant);

                        try{
                            apply(variant,
                                    Collections.singletonList(variant),
                                    new ReadsContext(reads, variantInterval, readFilter),
                                    new ReferenceContext(reference, variantInterval),
                                    new FeatureContext(features, variantInterval));
                        } catch (final IllegalStateException e) {
                            throw new GATKException("Exception thrown at " + variant.getContig() + ":" + variant.getStart()
                                    + " " + variant.toString(), e);
                        }

                        progressMeter.update(variantInterval);
                    });
        } else {
            // Traverse loci in shards. For any shard with overlapping variants, drop down to per-locus iteration,
            // calling apply for a single locus, only if there are overlapping variants, passing all such variants
            // as a group.
            Utils.stream(new ShardedIntervalIterator(getTraversalIntervals().iterator(), getDrivingVariantCacheLookAheadBases()))
                    .forEachOrdered (shard -> {
                        if (drivingVariants.query(shard).hasNext()) {
                            getLocusStream(new SimpleInterval(shard))
                                    .forEachOrdered(locus -> {
                                        final Iterator overlappingVariants = drivingVariants.query(locus);
                                        if (overlappingVariants.hasNext()) {
                                            final List filteredVariants = getTransformedVariantStream(
                                                    Spliterators.spliteratorUnknownSize(overlappingVariants, 0),
                                                    preTransformer,
                                                    variantFilter,
                                                    postTransformer)
                                                    .collect(Collectors.toList());
                                            if (!filteredVariants.isEmpty()) {
                                                try {
                                                    apply(locus,
                                                            filteredVariants,
                                                            new ReadsContext(reads, locus, readFilter),
                                                            new ReferenceContext(reference, locus),
                                                            new FeatureContext(features, locus));
                                                } catch (final IllegalStateException e) {
                                                    throw new GATKException("Exception thrown at first variant start " + filteredVariants.get(0).getContig() + ":" + filteredVariants.get(0).getStart()
                                                            + " " + filteredVariants.get(0).toString(), e);
                                                }

                                                progressMeter.update(locus);
                                            }
                                        }
                                    });
                        }
                    });
        }
    }

    // Return a Stream of SimpleInterval covering the entire territory sketched out by requestedInterval
    private Stream getLocusStream(final SimpleInterval requestedInterval) {
        return Utils.stream(new IntervalLocusIterator(Collections.singletonList(requestedInterval).iterator()));
    }

    /**
     * Process by locus, with all variants overlapping the current locus, or by individual variant when in by-variants
     * traversal mode. Must be implemented by tool authors. In general, tool authors should simply stream their output
     * from apply(), and maintain as little internal state as possible.
     *
     * @param loc the current locus being traversed (in group by locus), or the span of the current variant (in by-variants
     *            traversal)
     * @param variants The current variant(s) being processed. NOTE: When {@link #changeTraversalModeToByVariant()}
     *                 has been called, the {@link #apply} method will be called for each individual variant in the
     *                 driving variants input. Otherwise, this method is called once for every locus that has
     *                 overlapping variants, with all variants overlapping that locus.
     *
     * @param readsContext Reads overlapping the current variant. Will be an empty, but non-null, context object
     *                     if there is no backing source of reads data (in which case all queries on it will return
     *                     an empty array/iterator)
     * @param referenceContext Reference bases spanning the current variant. Will be an empty, but non-null, context object
     *                         if there is no backing source of reference data (in which case all queries on it will return
     *                         an empty array/iterator). Can request extra bases of context around the current variant's interval
     *                         by invoking {@link ReferenceContext#setWindow}
     *                         on this object before calling {@link ReferenceContext#getBases}
     * @param featureContext Features spanning the current variant. Will be an empty, but non-null, context object
     *                       if there is no backing source of Feature data (in which case all queries on it will return an
     *                       empty List).
     */
    public abstract void apply(Locatable loc, List variants, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext );

    /**
     * Close all data sources.
     *
     * Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalSuccess} and/or
     * {@link #closeTool} instead.
     */
    @Override
    protected final void onShutdown() {
        super.onShutdown();

        if ( drivingVariants != null )
            drivingVariants.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy