All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.examples.ExampleVariantWalkerSpark Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.examples;

import htsjdk.variant.variantcontext.VariantContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.engine.spark.VariantWalkerContext;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.engine.spark.VariantWalkerSpark;

import java.io.PrintStream;

/**
 * Example/toy program that shows how to implement the VariantWalker interface. Prints supplied variants
 * along with overlapping reads/reference bases/variants (if present).
 */
@CommandLineProgramProperties(
        summary = "Example tool that prints variants supplied to the specified output file (stdout if none provided), along with overlapping reads/reference bases/variants (if provided)",
        oneLineSummary = "Example tool that prints variants with optional contextual data",
        programGroup = ExampleProgramGroup.class,
        omitFromCommandLine = true
)
public final class ExampleVariantWalkerSpark extends VariantWalkerSpark {
    private static final long serialVersionUID = 1L;

    @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (if not provided, defaults to STDOUT)", common = false, optional = true)
    private GATKPath outputFile = null;

    @Argument(fullName="auxiliaryVariants", shortName="av", doc="Auxiliary set of variants", optional=true)
    private FeatureInput auxiliaryVariants;

    @Override
    protected void processVariants(JavaRDD rdd, JavaSparkContext ctx) {
        rdd.map(variantFunction(auxiliaryVariants)).saveAsTextFile(outputFile.toPath().toString());
    }

    private static Function variantFunction(FeatureInput auxiliaryVariants) {
        return (Function) context -> {
            VariantContext variant = context.getVariant();
            ReadsContext readsContext = context.getReadsContext();
            ReferenceContext referenceContext = context.getReferenceContext();
            FeatureContext featureContext = context.getFeatureContext();

            StringBuilder sb = new StringBuilder();
            sb.append("Current variant: " + variant);
            sb.append("\n");

            if ( referenceContext.hasBackingDataSource() ) {
                sb.append(String.format("\tOverlapping reference bases: %s\n\n", new String(referenceContext.getBases())));
            }

            if ( readsContext.hasBackingDataSource() ) {
                for ( final GATKRead read : readsContext) {
                    sb.append(String.format("\tOverlapping read at %s:%d-%d\n", read.getContig(), read.getStart(), read.getEnd()));
                }
                sb.append("\n");
            }

            if ( featureContext.hasBackingDataSource() ) {
                for ( final VariantContext variant1 : featureContext.getValues(auxiliaryVariants) ) {
                    sb.append(String.format("\tOverlapping variant at %s:%d-%d. Ref: %s Alt(s): %s\n",
                            variant1.getContig(), variant1.getStart(), variant1.getEnd(), variant1.getReference(), variant1.getAlternateAlleles()));
                }
                sb.append("\n");
            }

            return sb.toString();
        };
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy