All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.examples.ExampleReadWalkerWithVariantsSpark Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.examples;

import htsjdk.variant.variantcontext.VariantContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.spark.ReadWalkerContext;
import org.broadinstitute.hellbender.engine.spark.ReadWalkerSpark;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.List;

/**
 * Example/toy program that prints reads from the provided file or files along with overlapping variants
 * (if a source of variants is provided). Intended to show how to implement the ReadWalker interface.
 */
@CommandLineProgramProperties(
        summary = "Prints reads from the provided file(s) along with overlapping variants (if a source of variants is provided) to the specified output file (or STDOUT if none specified)",
        oneLineSummary = "Print reads with overlapping variants",
        programGroup = ExampleProgramGroup.class,
        omitFromCommandLine = true
)
public final class ExampleReadWalkerWithVariantsSpark extends ReadWalkerSpark {
    private static final long serialVersionUID = 1L;

    @Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, doc = "One or more VCF files", optional = true)
    private List> variants;

    @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (if not provided, defaults to STDOUT)", common = false, optional = true)
    private String outputFile = null;

    @Argument(fullName = "groupVariantsBySource", doc = "If true, group overlapping variants by their source when outputting them", optional = true)
    private boolean groupVariantsBySource = false;

    @Override
    protected void processReads(JavaRDD rdd, JavaSparkContext ctx) {
        rdd.map(readFunction(variants, groupVariantsBySource)).saveAsTextFile(outputFile);
    }

    private static Function readFunction(List> variants, boolean groupVariantsBySource) {
        return (Function) context -> {
            GATKRead read = context.getRead();
            ReferenceContext referenceContext = context.getReferenceContext();
            FeatureContext featureContext = context.getFeatureContext();

            StringBuilder sb = new StringBuilder();

            sb.append(String.format("Read at %s:%d-%d:\n%s\n", read.getContig(), read.getStart(), read.getEnd(), read.getBasesString()));

            if ( groupVariantsBySource ) {
                // We can keep the variants from each source separate by passing in the FeatureInputs
                // individually to featureContext.getValues()
                for ( FeatureInput featureSource : variants ) {
                    sb.append("From source " + featureSource.getName());
                    sb.append("\n");

                    for ( VariantContext variant : featureContext.getValues(featureSource) ) {
                        sb.append(String.format("\t"));
                        printOverlappingVariant(sb, variant);
                    }
                }
            }
            else {
                // Passing in all FeatureInputs at once to featureContext.getValues() lets us get
                // all overlapping variants without regard to source
                for ( VariantContext variant : featureContext.getValues(variants) ) {
                    printOverlappingVariant(sb, variant);
                }
            }

            sb.append("\n");

            return sb.toString();
        };
    }

    private static void printOverlappingVariant(StringBuilder sb, final VariantContext variant) {
        sb.append(String.format("Overlapping variant at %s:%d-%d: Ref: %s Alt(s): %s\n",
                variant.getContig(), variant.getStart(), variant.getEnd(),
                variant.getReference(), variant.getAlternateAlleles()));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy