All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.featuremapping.FlowFeatureMapper Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.featuremapping;

import htsjdk.samtools.*;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.util.Precision;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.*;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.FlowBasedProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter;
import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;

import java.util.*;


/**
 * Finds specific features in reads, scores the confidence of each feature relative to the
 * reference in each read and writes them into a VCF file.
 *
 * The sense of what a 'feature' is left somewhat open. In the most general sense, it is a haplotype
 * located in a specific location on the read. It is not necessarily defined as a deviation from the reference.
 *
 * A feature is indeed scored against the reference (in terms of its deviation).
 *
 * The current version implements a single type of feature: a SNP (aka SNV).
 *
 * 

* At this point, this tool finds SNVs *

* *

Input

*
    *
  • Coordinate-sorted and indexed SAM/BAM/CRAM
  • *
* *

Output

*
    *
  • Coordinate-sorted and indexed VCF
  • *
* *

Usage examples

* Find SNVs in chromosome 20. *
 * gatk FlowFeatureMapper \
 *   -I input.bam \
 *   -L 20 \
 *   -O chr20_snv.vcf
 * 
* * {@GATK.walkertype ReadWalker} */ @CommandLineProgramProperties( summary = "Mapping features (flow space processing)", oneLineSummary = "Map/find features in BAM file, output VCF. Initially mapping SNVs", programGroup = FlowBasedProgramGroup.class ) @DocumentedFeature @ExperimentalFeature public final class FlowFeatureMapper extends ReadWalker { static class CopyAttrInfo { public final String name; public final VCFHeaderLineType type; public final String desc; public CopyAttrInfo(final String spec) { final String[] toks = spec.split(","); name = toks[0]; type = toks.length > 1 ? VCFHeaderLineType.valueOf(toks[1]) : VCFHeaderLineType.String; desc = toks.length > 2 ? StringUtils.join(Arrays.copyOfRange(toks, 2, toks.length), ",") : ("copy-attr: " + name); } } private static final Logger logger = LogManager.getLogger(FlowFeatureMapper.class); private static final String VCB_SOURCE = "fm"; private static final String VCF_READ_NAME = "X_RN"; private static final String VCF_SCORE = "X_SCORE"; private static final String VCF_FLAGS = "X_FLAGS"; private static final String VCF_MAPQ = "X_MAPQ"; private static final String VCF_CIGAR = "X_CIGAR"; private static final String VCF_READ_COUNT = "X_READ_COUNT"; private static final String VCF_FILTERED_COUNT = "X_FILTERED_COUNT"; private static final String VCF_FC1 = "X_FC1"; private static final String VCF_FC2 = "X_FC2"; private static final String VCF_LENGTH = "X_LENGTH"; private static final String VCF_EDIST = "X_EDIST"; private static final String VCF_INDEX = "X_INDEX"; private static final String VCF_SMQ_LEFT = "X_SMQ_LEFT"; private static final String VCF_SMQ_RIGHT = "X_SMQ_RIGHT"; private static final String VCF_SMQ_LEFT_MEAN = "X_SMQ_LEFT_MEAN"; private static final String VCF_SMQ_RIGHT_MEAN = "X_SMQ_RIGHT_MEAN"; private static final String VCF_ADJACENT_REF_DIFF = "X_ADJACENT_REF_DIFF"; private static final Double LOWEST_PROB = 0.0001; private static final int VENDOR_QUALITY_CHECK_FLAG = 0x200; private static final String INCLUDE_QC_FAILED_READS_FULL_NAME = "include-qc-failed-reads"; final private List copyAttrInfo = new LinkedList<>(); // order here is according to SequenceUtil.VALID_BASES_UPPER final private String scoreForBaseNames[] = new String[SequenceUtil.VALID_BASES_UPPER.length]; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "File to which variants should be written") public GATKPath outputVCF = null; @ArgumentCollection private FlowFeatureMapperArgumentCollection fmArgs = new FlowFeatureMapperArgumentCollection(); @Advanced @Argument(fullName= AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, shortName= AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_SHORT_NAME, doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true) public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE; @Advanced @Argument(fullName = HaplotypeCallerArgumentCollection.GQ_BAND_LONG_NAME, shortName = HaplotypeCallerArgumentCollection.GQ_BAND_SHORT_NAME, doc= "Exclusive upper bounds for reference confidence GQ bands " + "(must be in [1, 100] and specified in increasing order)", optional = true) public List GVCFGQBands = new ArrayList<>(70); { for (int i=1; i<=60; ++i) { GVCFGQBands.add(i); } GVCFGQBands.add(70); GVCFGQBands.add(80); GVCFGQBands.add(90); GVCFGQBands.add(99); }; @Advanced @Argument(fullName=HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS, doc = "Output the band lower bound for each GQ block regardless of the data it represents", optional = true) public boolean floorBlocks = false; @Advanced @Argument(fullName=INCLUDE_QC_FAILED_READS_FULL_NAME, doc = "include reads with QC failed flag", optional = true) public boolean includeQcFailedReads = false; @ArgumentCollection public FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection(); protected static class ReadContext implements Comparable { final GATKRead read; final ReferenceContext referenceContext; ReadContext(final GATKRead read, final ReferenceContext referenceContext) { this.read = read; this.referenceContext = referenceContext; } @Override public int compareTo(ReadContext o) { int delta = read.getContig().compareTo(o.read.getContig()); delta = (delta != 0) ? delta : Integer.compare(read.getStart(), o.read.getStart()); delta = (delta != 0) ? delta : Integer.compare(read.getEnd(), o.read.getEnd()); return delta; } } protected static class MappedFeature implements Comparable { GATKRead read; FlowFeatureMapperArgumentCollection.MappingFeatureEnum type; byte[] readBases; byte[] refBases; int readBasesOffset; // offset of read bases array int start; // location (on reference) int offsetDelta; double score; int readCount; int filteredCount; int nonIdentMBasesOnRead; int featuresOnRead; int refEditDistance; int index; int smqLeft; int smqRight; int smqLeftMean; int smqRightMean; double scoreForBase[]; boolean adjacentRefDiff; public MappedFeature(GATKRead read, FlowFeatureMapperArgumentCollection.MappingFeatureEnum type, byte[] readBases, byte[] refBases, int readBasesOffset, int start, int offsetDelta) { this.read = read; this.type = type; this.readBases = readBases; this.refBases = refBases; this.readBasesOffset = readBasesOffset; this.start = start; this.offsetDelta = offsetDelta; } static MappedFeature makeSNV(GATKRead read, int offset, byte refBase, int start, int offsetDelta) { byte[] readBases = {read.getBasesNoCopy()[offset]}; byte[] refBases = {refBase}; return new MappedFeature( read, FlowFeatureMapperArgumentCollection.MappingFeatureEnum.SNV, readBases, refBases, offset, start, offsetDelta); } @Override public String toString() { return "Feature{" + "read=" + read + ", type=" + type + ", readBases=" + Arrays.toString(readBases) + ", refBases=" + Arrays.toString(refBases) + ", readBasesOffset=" + readBasesOffset + ", start=" + start + '}'; } @Override public int compareTo(MappedFeature o) { int delta = this.read.getContig().compareTo(o.read.getContig()); if ( delta != 0 ) { return delta; } else { return this.start - o.start; } } } // locals private VariantContextWriter vcfWriter; final private PriorityQueue featureQueue = new PriorityQueue<>(); final private PriorityQueue readQueue = new PriorityQueue<>(); private FeatureMapper mapper; @Override public void onTraversalStart() { super.onTraversalStart(); mapper = buildMapper(); // enforce requirement for sorted input if ( getHeaderForReads().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) { throw new IllegalArgumentException("input file must be coordinated sorted"); } // open output vcf // The HC engine will make the right kind (VCF or GVCF) of writer for us final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary(); vcfWriter = makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs); vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, getDefaultToolVCFHeaderLines())); } @Override public void closeTool() { flushQueue(null, null); super.closeTool(); if ( vcfWriter != null ) { vcfWriter.close(); } } public VariantContextWriter makeVCFWriter( final GATKPath outputVCF, final SAMSequenceDictionary readsDictionary, final boolean createOutputVariantIndex, final boolean createOutputVariantMD5, final boolean sitesOnlyMode ) { Utils.nonNull(outputVCF); Utils.nonNull(readsDictionary); final List options = new ArrayList<>(2); if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);} if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);} VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter( outputVCF.toPath(), readsDictionary, createOutputVariantMD5, options.toArray(new Options[options.size()]) ); if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { try { writer = new GVCFWriter(writer, new ArrayList(GVCFGQBands), floorBlocks); } catch ( IllegalArgumentException e ) { throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage()); } } return writer; } public VCFHeader makeVCFHeader(final SAMSequenceDictionary sequenceDictionary, final Set defaultToolHeaderLines ) { final Set headerInfo = new HashSet<>(); headerInfo.addAll(defaultToolHeaderLines); // all callers need to add these standard annotation header lines headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY)); headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY)); // all callers need to add these standard FORMAT field header lines VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); // add our own headers headerInfo.add(new VCFInfoHeaderLine(VCF_READ_NAME, 1, VCFHeaderLineType.String, "Read name")); headerInfo.add(new VCFInfoHeaderLine(VCF_SCORE, 1, VCFHeaderLineType.Float, "Mapping score")); headerInfo.add(new VCFInfoHeaderLine(VCF_FLAGS, 1, VCFHeaderLineType.Integer, "Read flags")); headerInfo.add(new VCFInfoHeaderLine(VCF_MAPQ, 1, VCFHeaderLineType.Integer, "Read mapqe")); headerInfo.add(new VCFInfoHeaderLine(VCF_CIGAR, 1, VCFHeaderLineType.String, "Read CIGAR")); headerInfo.add(new VCFInfoHeaderLine(VCF_READ_COUNT, 1, VCFHeaderLineType.Integer, "Number of reads containing this location")); headerInfo.add(new VCFInfoHeaderLine(VCF_FILTERED_COUNT, 1, VCFHeaderLineType.Integer, "Number of reads containing this location that pass the adjacent base filter")); headerInfo.add(new VCFInfoHeaderLine(VCF_FC1, 1, VCFHeaderLineType.Integer, "Number of M bases different on read from references")); headerInfo.add(new VCFInfoHeaderLine(VCF_FC2, 1, VCFHeaderLineType.Integer, "Number of features before score threshold filter")); headerInfo.add(new VCFInfoHeaderLine(VCF_LENGTH, 1, VCFHeaderLineType.Integer, "Read length")); headerInfo.add(new VCFInfoHeaderLine(VCF_EDIST, 1, VCFHeaderLineType.Integer, "Read Levenshtein edit distance from reference")); headerInfo.add(new VCFInfoHeaderLine(VCF_INDEX, 1, VCFHeaderLineType.Integer, "Ordinal index, from start of the read, where the feature was found")); headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_LEFT, 1, VCFHeaderLineType.Integer, "Ordinal Median quality of N bases to the left of the feature")); headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_RIGHT, 1, VCFHeaderLineType.Integer, "Ordinal Median quality of N bases to the right of the feature")); headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_LEFT_MEAN, 1, VCFHeaderLineType.Integer, "Ordinal Mean quality of N bases to the left of the feature")); headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_RIGHT_MEAN, 1, VCFHeaderLineType.Integer, "Ordinal Mean quality of N bases to the right of the feature")); for ( String spec : fmArgs.copyAttr ) { final CopyAttrInfo info = new CopyAttrInfo(spec); headerInfo.add(new VCFInfoHeaderLine(fmArgs.copyAttrPrefix + info.name, 1, info.type, info.desc)); copyAttrInfo.add(info); } // validation mode? if ( fmArgs.reportAllAlts ) { for ( int baseIndex = 0 ; baseIndex < SequenceUtil.VALID_BASES_UPPER.length ; baseIndex++ ) { headerInfo.add(new VCFInfoHeaderLine(scoreNameForBase(baseIndex), 1, VCFHeaderLineType.Float, "Base specific mapping score")); } } if ( fmArgs.reportAllAlts || fmArgs.tagBasesWithAdjacentRefDiff ) { headerInfo.add(new VCFInfoHeaderLine(VCF_ADJACENT_REF_DIFF, 1, VCFHeaderLineType.Flag, "Adjacent base filter indication: indel in the adjacent 5 bases to the considered base on the read")); } final VCFHeader vcfHeader = new VCFHeader(headerInfo); vcfHeader.setSequenceDictionary(sequenceDictionary); return vcfHeader; } private String scoreNameForBase(int baseIndex) { if ( scoreForBaseNames[baseIndex] == null ) { scoreForBaseNames[baseIndex] = VCF_SCORE + "_" + new String(new byte[]{SequenceUtil.VALID_BASES_UPPER[baseIndex]}); } return scoreForBaseNames[baseIndex]; } @Override public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { // include dups? if ( read.isDuplicate() && !fmArgs.includeDupReads ) { return; } // include supplementary alignments? if ( read.isSupplementaryAlignment() && !fmArgs.keepSupplementaryAlignments ) { return; } // include qc-failed reads? if ( ((read.getFlags() & VENDOR_QUALITY_CHECK_FLAG) != 0) && !includeQcFailedReads ) { return; } // flush qeues up to this read flushQueue(read, referenceContext); // find features in read mapper.forEachOnRead(read, referenceContext, fr -> { if ( logger.isDebugEnabled() ) { logger.debug("fr: " + fr); } // score the feature fr.score = scoreFeature(fr); // score for validation mode? if ( fmArgs.reportAllAlts) { fr.scoreForBase = new double[SequenceUtil.VALID_BASES_UPPER.length]; for ( int baseIndex = 0 ; baseIndex < fr.scoreForBase.length ; baseIndex++ ) { final byte base = SequenceUtil.VALID_BASES_UPPER[baseIndex]; boolean incl = base != fr.readBases[0]; if ( incl ) { fr.scoreForBase[baseIndex] = scoreFeature(fr, base); } else { fr.scoreForBase[baseIndex] = Double.NaN; } } } // emit feature if filters in if ( filterFeature(fr) ) { featureQueue.add(fr); } }); } private void flushQueue(final GATKRead read, final ReferenceContext referenceContext) { // emit all? if ( read == null ) { while ( featureQueue.size() != 0 ) { final MappedFeature fr = featureQueue.poll(); enrichFeature(fr); emitFeature(fr); } } else { // enter read into the queue readQueue.add(new ReadContext(read, referenceContext)); // emit all features that start before this read while ( featureQueue.size() != 0 ) { MappedFeature fr = featureQueue.peek(); if ( !fr.read.getContig().equals(read.getContig()) || (fr.start < read.getStart()) ) { fr = featureQueue.poll(); enrichFeature(fr); emitFeature(fr); } else { break; } } // remove all reads that start before this read while ( readQueue.size() != 0 ) { ReadContext rc = readQueue.peek(); if ( !rc.read.getContig().equals(read.getContig()) || (rc.read.getEnd() < read.getStart()) ) { rc = readQueue.poll(); } else { break; } } } } private void enrichFeature(final MappedFeature fr) { // loop on queued reads, count and check if should be counted as filtered final Locatable loc = new SimpleInterval(fr.read.getContig(), fr.start, fr.start); for ( ReadContext rc : readQueue ) { if ( rc.read.contains(loc) ) { fr.readCount++; FeatureMapper.FilterStatus fs = mapper.noFeatureButFilterAt(rc.read, rc.referenceContext, fr.start); if ( (fs == FeatureMapper.FilterStatus.Filtered) || (fs == FeatureMapper.FilterStatus.NoFeatureAndFiltered) ) { fr.filteredCount++; } } } } private double scoreFeature(final MappedFeature fr) { return scoreFeature(fr, (byte)0); } private double scoreFeature(final MappedFeature fr, byte altBase) { // build haplotypes final FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(getHeaderForReads(), fr.read); final FlowBasedHaplotype[] haplotypes = buildHaplotypes(fr, rgInfo.flowOrder, altBase); // create flow read final FlowBasedRead flowRead = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs); final int diffLeft = haplotypes[0].getStart() - flowRead.getStart() + fr.offsetDelta; final int diffRight = flowRead.getEnd() - haplotypes[0].getEnd(); flowRead.applyBaseClipping(Math.max(0, diffLeft), Math.max(diffRight, 0), false); if ( !flowRead.isValid() ) { return -1; } // compute alternative score final int hapKeyLength = Math.min(haplotypes[0].getKeyLength(), haplotypes[1].getKeyLength()); final double readScore = computeLikelihoodLocal(flowRead, haplotypes[0], hapKeyLength, false); final double refScore = computeLikelihoodLocal(flowRead, haplotypes[1], hapKeyLength, false); double score = readScore - refScore; if ( !Double.isNaN(fmArgs.limitScore) ) { score = Math.min(score, fmArgs.limitScore); } if ( ((Double.isNaN(score) || (score < 0)) && fmArgs.debugNegatives) || (fmArgs.debugReadName != null && fmArgs.debugReadName.contains(fr.read.getName())) ) { logger.info("**** debug read: " + fr.read); logger.info("readBases: " + fr.read.getBasesString()); logger.info("flowRead: " + flowRead); logger.info("flowBases: " + flowRead.getBasesString()); logger.info("flowOrder: " + flowRead.getFlowOrder()); logger.info("flowKey: " + flowRead.getKeyLength() + " " + Arrays.toString(flowRead.getKey())); logger.info("readHaplotype: " + haplotypes[0]); logger.info("readHapKey: " + haplotypes[0].getKeyLength() + " " + Arrays.toString(haplotypes[0].getKey())); computeLikelihoodLocal(flowRead, haplotypes[0], hapKeyLength, true); logger.info("refrHaplotype: " + haplotypes[1]); logger.info("refrHapKey: " + haplotypes[1].getKeyLength() + " " + Arrays.toString(haplotypes[1].getKey())); computeLikelihoodLocal(flowRead, haplotypes[1], hapKeyLength, true); logger.info("score: " + score); // analyze read final FlowBasedRead flowRead2 = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs); final int[] key2 = flowRead2.getKey(); for ( int i = 0 ; i < key2.length ; i++ ) { final double p1 = flowRead2.getProb(i, key2[i]); for ( int j = 0 ; j < rgInfo.maxClass ; j++ ) { final double p2 = flowRead2.getProb(i, j); if ( p2 > p1 ) logger.info(String.format("prob at %s key[%d]=%d, %f is lower than at %d which is %f", flowRead2.getName(), i, key2[i], p1, j, p2)); } } } if ( score < 0 && !fmArgs.keepNegatives && score != -1.0 ) { score = 0; } return score; } public static double computeLikelihoodLocal(final FlowBasedRead read, final FlowBasedHaplotype haplotype, final int hapKeyLength, final boolean debug) { final byte[] flowOrder = haplotype.getFlowOrderArray(); final byte readFlowOrder0 = read.getFlowOrderArray()[0]; int startingPoint = 0; for (int i = 0; i < flowOrder.length; i++) { if (flowOrder[i] == readFlowOrder0) { startingPoint = i; break; } } final int[] key = haplotype.getKey(); // debug support StringBuffer debugMessage = null; if ( debug ) debugMessage = new StringBuffer(Integer.toString(startingPoint) + " hmer prob |"); double result = 0 ; for (int i = 0; i < read.getKeyLength(); i++) { int index = i + startingPoint; double prob = 0; int locationToFetch = 0; if ( index < hapKeyLength ) { locationToFetch = Math.min(key[index] & 0xff, read.getMaxHmer() + 1); prob = read.getProb(i, locationToFetch); } else { if ( debug ) { debugMessage.append(" clip"); } break; } if ( Precision.equals(prob, 0.0) ) { prob = LOWEST_PROB; } result += Math.log10(prob); if ( debug ) { debugMessage.append(String.format(" %d %.4f", locationToFetch, prob)); } } if ( debug ) { debugMessage.append(" | " + result); logger.info("debugMessage: " + debugMessage); } return result; } private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final String flowOrder, byte altBase) { // build bases for flow haplotypes // NOTE!!!: this code assumes length of feature on read and reference is the same // this is true for SNP but not for INDELs - it will have to be re-written! // TODO: write for INDEL byte[] bases = fr.read.getBasesNoCopy(); int offset = fr.readBasesOffset; int refStart = fr.start; int refModOfs = 0; // install alt base? byte orgBase = 0; if ( altBase != 0 ) { orgBase = fr.refBases[0]; fr.refBases[0] = altBase; } if ( offset > 0 ) { // reach into hmer before offset--; refModOfs++; refStart--; // extend until start of hmer final byte hmerBase = bases[offset]; while ( offset > 0 && bases[offset-1] == hmerBase ) { offset--; refModOfs++; refStart--; } } final byte[] sAltBases = Arrays.copyOfRange(bases, offset, bases.length); final byte[] sRefBases = Arrays.copyOf(sAltBases, sAltBases.length); System.arraycopy(fr.refBases, 0, sRefBases, refModOfs, fr.refBases.length); // construct haplotypes final SimpleInterval genomeLoc = new SimpleInterval(fr.read.getContig(), refStart, refStart + sAltBases.length - 1); final Cigar cigar = new Cigar(); cigar.add(new CigarElement(sAltBases.length, CigarOperator.M)); final Haplotype altHaplotype = new Haplotype(sAltBases, false); final Haplotype refHaplotype = new Haplotype(sRefBases, true); altHaplotype.setGenomeLocation(genomeLoc); refHaplotype.setGenomeLocation(genomeLoc); altHaplotype.setCigar(cigar); refHaplotype.setCigar(cigar); // prepare flow based haplotypes final FlowBasedHaplotype[] result = { new FlowBasedHaplotype(altHaplotype, flowOrder), new FlowBasedHaplotype(refHaplotype, flowOrder) }; // restore changes if ( altBase != 0 ) { fr.refBases[0] = orgBase; } // return return result; } private boolean filterFeature(final MappedFeature fr) { if ( fmArgs.excludeNaNScores && Double.isNaN(fr.score) ) { return false; } else if ( fr.score > fmArgs.maxScore ) { return false; } else if ( fr.score < fmArgs.minScore ) { return false; } return true; } private void emitFeature(final MappedFeature fr) { // create alleles final Collection alleles = new LinkedList<>(); if ( fmArgs.reportAllAlts && Arrays.equals(fr.readBases, fr.refBases) ) { alleles.add(Allele.create("*".getBytes(), false)); } else { alleles.add(Allele.create(fr.readBases, false)); } alleles.add(Allele.create(fr.refBases, true)); // create variant context builder final VariantContextBuilder vcb = new VariantContextBuilder( VCB_SOURCE, fr.read.getContig(), fr.start, fr.start + fr.refBases.length - 1, alleles); // copy attributes vcb.attribute(VCF_READ_NAME, fr.read.getName()); vcb.attribute(VCF_SCORE, String.format("%.5f", fr.score)); vcb.attribute(VCF_FLAGS, fr.read.getFlags()); vcb.attribute(VCF_MAPQ, fr.read.getMappingQuality()); vcb.attribute(VCF_CIGAR, fr.read.getCigar().toString()); vcb.attribute(VCF_READ_COUNT, fr.readCount); vcb.attribute(VCF_FILTERED_COUNT, fr.filteredCount); vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead); vcb.attribute(VCF_FC2, fr.featuresOnRead); vcb.attribute(VCF_LENGTH, fr.read.getLength()); vcb.attribute(VCF_EDIST, fr.refEditDistance); vcb.attribute(VCF_INDEX, fr.index); // median/mean quality on? if ( fmArgs.surroundingMediaQualitySize != null ) { vcb.attribute(VCF_SMQ_LEFT, fr.smqLeft); vcb.attribute(VCF_SMQ_RIGHT, fr.smqRight); } if ( fmArgs.surroundingMeanQualitySize != null ) { vcb.attribute(VCF_SMQ_LEFT_MEAN, fr.smqLeftMean); vcb.attribute(VCF_SMQ_RIGHT_MEAN, fr.smqRightMean); } for ( CopyAttrInfo info : copyAttrInfo ) { if ( fr.read.hasAttribute(info.name) ) { final String attrName = fmArgs.copyAttrPrefix + info.name; if ( info.type == VCFHeaderLineType.Integer ) { vcb.attribute(attrName, fr.read.getAttributeAsInteger(info.name)); } else if ( info.type == VCFHeaderLineType.Float ) { vcb.attribute(attrName, fr.read.getAttributeAsFloat(info.name)); } else { vcb.attribute(attrName, fr.read.getAttributeAsString(info.name)); } } } // validation mode? if ( fmArgs.reportAllAlts ) { if ( fr.scoreForBase != null ) { for (int baseIndex = 0; baseIndex < SequenceUtil.VALID_BASES_UPPER.length; baseIndex++) { if (!Double.isNaN(fr.scoreForBase[baseIndex])) { vcb.attribute(scoreNameForBase(baseIndex), String.format("%.5f", fr.scoreForBase[baseIndex])); } } } } if ( fr.adjacentRefDiff ) { vcb.attribute(VCF_ADJACENT_REF_DIFF, true); } // build it! final VariantContext vc = vcb.make(); // write to file vcfWriter.add(vc); } private FeatureMapper buildMapper() { // build appropriate mapper if ( fmArgs.mappingFeature == FlowFeatureMapperArgumentCollection.MappingFeatureEnum.SNV ) { return new SNVMapper(fmArgs); } else { throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy