
org.broadinstitute.hellbender.tools.examples.ExampleAssemblyRegionWalkerSpark Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.tools.examples;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionArgumentCollection;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionWalkerContext;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionWalkerSpark;
import java.io.Serializable;
/**
* Example/toy program that shows how to implement the AssemblyRegionWalker interface.
*
* Prints out the bounds of each assembly region with and without padding, as well as the number of reads in each region.
* Also prints overlapping reference bases / variants for each region, if provided.
*/
@CommandLineProgramProperties(
summary = "Example AssemblyRegionWalker that prints out the bounds of each assembly region with and without padding, as well as the number of reads in each region",
oneLineSummary = "Example AssemblyRegionWalker",
programGroup = ExampleProgramGroup.class,
omitFromCommandLine = true
)
public final class ExampleAssemblyRegionWalkerSpark extends AssemblyRegionWalkerSpark {
private static final long serialVersionUID = 1L;
@Argument(doc="the output file path", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, optional = false)
protected String outputFile;
@Argument(fullName="knownVariants", shortName="knownVariants", doc="Known set of variants", optional=true)
private FeatureInput knownVariants;
@Override
public AssemblyRegionEvaluator assemblyRegionEvaluator() {
// This example AssemblyRegionEvaluator considers all loci to be active
// (ie., it assigns an isActive probability of 1.0 to all loci):
return new ARE();
}
static class ARE implements AssemblyRegionEvaluator, Serializable {
private static final long serialVersionUID = 1L;
@Override
public ActivityProfileState isActive(AlignmentContext locusPileup, ReferenceContext referenceContext, FeatureContext featureContext) {
return new ActivityProfileState(new SimpleInterval(locusPileup), 1.0);
}
}
@Override
protected void processAssemblyRegions(JavaRDD rdd, JavaSparkContext ctx) {
rdd.map(assemblyFunction(knownVariants)).saveAsTextFile(outputFile);
}
private static Function assemblyFunction(FeatureInput knownVariants) {
return (Function) context -> {
AssemblyRegion region = context.getAssemblyRegion();
ReferenceContext referenceContext = context.getReferenceContext();
FeatureContext featureContext = context.getFeatureContext();
StringBuilder sb = new StringBuilder();
sb.append(String.format("%s assembly region at %s (%s with padding), containing %d reads.\n\n",
region.isActive() ? "ACTIVE" : "INACTIVE", region.getSpan(), region.getPaddedSpan(), region.getReads().size()));
sb.append(String.format("\tOverlapping reference bases: %s\n\n", new String(referenceContext.getBases())));
if ( featureContext.hasBackingDataSource() ) {
for ( final VariantContext variant : featureContext.getValues(knownVariants) ) {
sb.append(String.format("\tOverlapping variant at %s:%d-%d. Ref: %s Alt(s): %s\n\n",
variant.getContig(), variant.getStart(), variant.getEnd(), variant.getReference(), variant.getAlternateAlleles()));
}
}
return sb.toString();
};
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy