All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.validation.AnnotateVcfWithExpectedAlleleFraction Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.validation;

import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.math3.util.MathArrays;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
import org.broadinstitute.hellbender.utils.MathUtils;

import java.io.File;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Given mixing weights of different samples in a pooled bam, annotate a corresponding
 * vcf containing individual sample genotypes.
 *
 * 

The formula is:

* *

* Expected allele fraction = SUM_samples {mixing_fraction(sample) * [0 if hom ref, 0.5 is het, 1.0 if hom var]} *

* *

Example

* *
 * gatk --java-options "-Xmx4g" AnnotateVcfWithExpectedAlleleFraction \
 *   -V input.vcf \
 *   -O output.vcf \
 *   --mixing-fractions mixingFractions.table
 * 
* * Created by David Benjamin on 1/31/17. */ @CommandLineProgramProperties( summary = "Annotate a multi-sample vcf with expected allele fractions in pooled sequencing given mixing" + " fractions of the different samples in the pool via the formula" + " Expected allele fraction = SUM_samples {mixing_fraction(sample) * [0 if hom ref, 0.5 is het, 1.0 if hom var]}", oneLineSummary = "(Internal) Annotate a vcf with expected allele fractions in pooled sequencing", programGroup = VariantEvaluationProgramGroup.class ) @DocumentedFeature public class AnnotateVcfWithExpectedAlleleFraction extends VariantWalker { public static final String MIXING_FRACTIONS_TABLE_NAME = "mixing-fractions"; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "The output annotated VCF file", optional = false) private final GATKPath outputVcf = null; @Argument(fullName = MIXING_FRACTIONS_TABLE_NAME, shortName = MIXING_FRACTIONS_TABLE_NAME, doc = "The input mixing fractions table", optional = false) private final File inputMixingFractions = null; private VariantContextWriter vcfWriter; public static final String EXPECTED_ALLELE_FRACTION_NAME = "AF_EXP"; double[] mixingFractionsInSampleOrder; @Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); } @Override public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) { final double[] weights = vc.getGenotypes().stream().mapToDouble(g -> weight(g)).toArray(); final double expectedAlleleFraction = MathUtils.sum(MathArrays.ebeMultiply(weights, mixingFractionsInSampleOrder)); vcfWriter.add(new VariantContextBuilder(vc).attribute(EXPECTED_ALLELE_FRACTION_NAME, expectedAlleleFraction).make()); } @Override public void closeTool() { if ( vcfWriter != null ) { vcfWriter.close(); } } private static double weight(final Genotype genotype) { if (genotype.isHomVar()) { return 1.0; } else if (genotype.isHet()) { return 0.5; } else { return 0.0; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy