org.broadinstitute.hellbender.tools.walkers.featuremapping.FlowFeatureMapper Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gatk Show documentation
Development on GATK 4
The newest version!
package org.broadinstitute.hellbender.tools.walkers.featuremapping;

import htsjdk.samtools.*;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.util.Precision;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.*;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.FlowBasedProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.read.FlowBasedReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter;
import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;

import java.util.*;


/**
 * Finds specific features in reads, scores the confidence of each feature relative to the
 * reference in each read and writes them into a VCF file.
 *
 * The sense of what a 'feature' is left somewhat open. In the most general sense, it is a haplotype
 * located in a specific location on the read. It is not necessarily defined as a deviation from the reference.
 *
 * A feature is indeed scored against the reference (in terms of its deviation).
 *
 * The current version implements a single type of feature: a SNP (aka SNV).
 *
 * 
 * At this point, this tool finds SNVs
 * 
 *
 *  Input 
 * 
 *      Coordinate-sorted and indexed SAM/BAM/CRAM 
 * 
 *
 *  Output 
 * 
 *      Coordinate-sorted and indexed VCF 
 * 
 *
 * Usage examples
 * Find SNVs in chromosome 20.
 *  * gatk FlowFeatureMapper \
 *   -I input.bam \
 *   -L 20 \
 *   -O chr20_snv.vcf
 * 
 *
 * {@GATK.walkertype ReadWalker}
 */
@CommandLineProgramProperties(
        summary = "Mapping features (flow space processing)",
        oneLineSummary = "Map/find features in BAM file, output VCF. Initially mapping SNVs",
        programGroup = FlowBasedProgramGroup.class
)


@DocumentedFeature
@ExperimentalFeature
public final class FlowFeatureMapper extends ReadWalker {

    static class CopyAttrInfo {
        public final String name;
        public final VCFHeaderLineType type;
        public final String desc;

        public CopyAttrInfo(final String spec) {
            final String[] toks = spec.split(",");
            name = toks[0];
            type = toks.length > 1 ? VCFHeaderLineType.valueOf(toks[1]) : VCFHeaderLineType.String;
            desc = toks.length > 2 ? StringUtils.join(Arrays.copyOfRange(toks, 2, toks.length), ",") : ("copy-attr: " + name);
        }
    }

    private static final Logger     logger = LogManager.getLogger(FlowFeatureMapper.class);

    private static final String     VCB_SOURCE = "fm";

    private static final String     VCF_READ_NAME = "X_RN";
    private static final String     VCF_SCORE = "X_SCORE";
    private static final String     VCF_FLAGS = "X_FLAGS";
    private static final String     VCF_MAPQ = "X_MAPQ";
    private static final String     VCF_CIGAR = "X_CIGAR";
    private static final String     VCF_READ_COUNT = "X_READ_COUNT";
    private static final String     VCF_FILTERED_COUNT = "X_FILTERED_COUNT";
    private static final String     VCF_FC1 = "X_FC1";
    private static final String     VCF_FC2 = "X_FC2";
    private static final String     VCF_LENGTH = "X_LENGTH";
    private static final String     VCF_EDIST = "X_EDIST";
    private static final String     VCF_INDEX = "X_INDEX";
    private static final String     VCF_SMQ_LEFT = "X_SMQ_LEFT";
    private static final String     VCF_SMQ_RIGHT = "X_SMQ_RIGHT";
    private static final String     VCF_SMQ_LEFT_MEAN = "X_SMQ_LEFT_MEAN";
    private static final String     VCF_SMQ_RIGHT_MEAN = "X_SMQ_RIGHT_MEAN";
    private static final String     VCF_ADJACENT_REF_DIFF = "X_ADJACENT_REF_DIFF";


    private static final Double     LOWEST_PROB = 0.0001;
    private static final int        VENDOR_QUALITY_CHECK_FLAG = 0x200;

    private static final String     INCLUDE_QC_FAILED_READS_FULL_NAME = "include-qc-failed-reads";

    final private List copyAttrInfo = new LinkedList<>();

    // order here is according to SequenceUtil.VALID_BASES_UPPER
    final private String scoreForBaseNames[] = new String[SequenceUtil.VALID_BASES_UPPER.length];

    @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
            shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
            doc = "File to which variants should be written")
    public GATKPath outputVCF = null;

    @ArgumentCollection
    private FlowFeatureMapperArgumentCollection fmArgs = new FlowFeatureMapperArgumentCollection();

    @Advanced
    @Argument(fullName= AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, shortName= AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_SHORT_NAME, doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
    public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;

    @Advanced
    @Argument(fullName = HaplotypeCallerArgumentCollection.GQ_BAND_LONG_NAME, shortName = HaplotypeCallerArgumentCollection.GQ_BAND_SHORT_NAME, doc= "Exclusive upper bounds for reference confidence GQ bands " +
            "(must be in [1, 100] and specified in increasing order)", optional = true)
    public List GVCFGQBands = new ArrayList<>(70);
    {
        for (int i=1; i<=60; ++i) {
            GVCFGQBands.add(i);
        }
        GVCFGQBands.add(70); GVCFGQBands.add(80); GVCFGQBands.add(90); GVCFGQBands.add(99);
    };

    @Advanced
    @Argument(fullName=HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS, doc = "Output the band lower bound for each GQ block regardless of the data it represents", optional = true)
    public boolean floorBlocks = false;

    @Advanced
    @Argument(fullName=INCLUDE_QC_FAILED_READS_FULL_NAME, doc = "include reads with QC failed flag", optional = true)
    public boolean includeQcFailedReads = false;

    @ArgumentCollection
    public FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection();

    protected static class ReadContext implements Comparable {
        final GATKRead         read;
        final ReferenceContext referenceContext;

        ReadContext(final GATKRead read, final ReferenceContext referenceContext) {
            this.read = read;
            this.referenceContext = referenceContext;
        }

        @Override
        public int compareTo(ReadContext o) {
            int     delta = read.getContig().compareTo(o.read.getContig());

            delta = (delta != 0) ? delta : Integer.compare(read.getStart(), o.read.getStart());
            delta = (delta != 0) ? delta : Integer.compare(read.getEnd(), o.read.getEnd());

            return delta;
        }
    }

    protected static class MappedFeature implements Comparable {

        GATKRead    read;
        FlowFeatureMapperArgumentCollection.MappingFeatureEnum  type;
        byte[]      readBases;
        byte[]      refBases;
        int         readBasesOffset; // offset of read bases array
        int         start;      // location (on reference)
        int         offsetDelta;
        double      score;
        int         readCount;
        int         filteredCount;
        int         nonIdentMBasesOnRead;
        int         featuresOnRead;
        int         refEditDistance;
        int         index;
        int         smqLeft;
        int         smqRight;
        int         smqLeftMean;
        int         smqRightMean;

        double      scoreForBase[];
        boolean     adjacentRefDiff;

        public MappedFeature(GATKRead read, FlowFeatureMapperArgumentCollection.MappingFeatureEnum  type, byte[] readBases,
                             byte[] refBases, int readBasesOffset, int start, int offsetDelta) {
            this.read = read;
            this.type = type;
            this.readBases = readBases;
            this.refBases = refBases;
            this.readBasesOffset = readBasesOffset;
            this.start = start;
            this.offsetDelta = offsetDelta;
        }

        static MappedFeature makeSNV(GATKRead read, int offset, byte refBase, int start, int offsetDelta) {
            byte[]      readBases = {read.getBasesNoCopy()[offset]};
            byte[]      refBases = {refBase};
            return new MappedFeature(
                    read,
                    FlowFeatureMapperArgumentCollection.MappingFeatureEnum.SNV,
                    readBases,
                    refBases,
                    offset,
                    start,
                    offsetDelta);
        }

        @Override
        public String toString() {
            return "Feature{" +
                    "read=" + read +
                    ", type=" + type +
                    ", readBases=" + Arrays.toString(readBases) +
                    ", refBases=" + Arrays.toString(refBases) +
                    ", readBasesOffset=" + readBasesOffset +
                    ", start=" + start +
                    '}';
        }

        @Override
        public int compareTo(MappedFeature o) {

            int     delta = this.read.getContig().compareTo(o.read.getContig());
            if ( delta != 0 ) {
                return delta;
            } else {
                return this.start - o.start;
            }
        }
    }

    // locals
    private VariantContextWriter                vcfWriter;
    final private PriorityQueue        featureQueue = new PriorityQueue<>();
    final private PriorityQueue    readQueue = new PriorityQueue<>();
    private FeatureMapper                       mapper;

    @Override
    public void onTraversalStart() {
        super.onTraversalStart();
        mapper = buildMapper();

        // enforce requirement for sorted input
        if ( getHeaderForReads().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) {
            throw new IllegalArgumentException("input file must be coordinated sorted");
        }

        // open output vcf
        // The HC engine will make the right kind (VCF or GVCF) of writer for us
        final SAMSequenceDictionary sequenceDictionary = getHeaderForReads().getSequenceDictionary();
        vcfWriter = makeVCFWriter(outputVCF, sequenceDictionary, createOutputVariantIndex, createOutputVariantMD5, outputSitesOnlyVCFs);
        vcfWriter.writeHeader(makeVCFHeader(sequenceDictionary, getDefaultToolVCFHeaderLines()));
    }

    @Override
    public void closeTool() {
        flushQueue(null, null);
        super.closeTool();
        if ( vcfWriter != null ) {
            vcfWriter.close();
        }
    }

    public VariantContextWriter makeVCFWriter( final GATKPath outputVCF, final SAMSequenceDictionary readsDictionary,
                                               final boolean createOutputVariantIndex, final boolean  createOutputVariantMD5,
                                               final boolean sitesOnlyMode ) {
        Utils.nonNull(outputVCF);
        Utils.nonNull(readsDictionary);

        final List options = new ArrayList<>(2);
        if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);}
        if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);}

        VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(
                outputVCF.toPath(),
                readsDictionary,
                createOutputVariantMD5,
                options.toArray(new Options[options.size()])
        );

        if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
            try {
                writer = new GVCFWriter(writer, new ArrayList(GVCFGQBands), floorBlocks);
            } catch ( IllegalArgumentException e ) {
                throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
            }
        }

        return writer;
    }

    public VCFHeader makeVCFHeader(final SAMSequenceDictionary sequenceDictionary, final Set  defaultToolHeaderLines ) {
        final Set headerInfo = new HashSet<>();
        headerInfo.addAll(defaultToolHeaderLines);

        // all callers need to add these standard annotation header lines
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
        headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));

        // all callers need to add these standard FORMAT field header lines
        VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
                VCFConstants.GENOTYPE_KEY,
                VCFConstants.GENOTYPE_QUALITY_KEY,
                VCFConstants.DEPTH_KEY,
                VCFConstants.GENOTYPE_PL_KEY);

        // add our own headers
        headerInfo.add(new VCFInfoHeaderLine(VCF_READ_NAME, 1, VCFHeaderLineType.String, "Read name"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_SCORE, 1, VCFHeaderLineType.Float, "Mapping score"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_FLAGS, 1, VCFHeaderLineType.Integer, "Read flags"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_MAPQ, 1, VCFHeaderLineType.Integer, "Read mapqe"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_CIGAR, 1, VCFHeaderLineType.String, "Read CIGAR"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_READ_COUNT, 1, VCFHeaderLineType.Integer, "Number of reads containing this location"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_FILTERED_COUNT, 1, VCFHeaderLineType.Integer, "Number of reads containing this location that pass the adjacent base filter"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_FC1, 1, VCFHeaderLineType.Integer, "Number of M bases different on read from references"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_FC2, 1, VCFHeaderLineType.Integer, "Number of features before score threshold filter"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_LENGTH, 1, VCFHeaderLineType.Integer, "Read length"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_EDIST, 1, VCFHeaderLineType.Integer, "Read Levenshtein edit distance from reference"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_INDEX, 1, VCFHeaderLineType.Integer, "Ordinal index, from start of the read, where the feature was found"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_LEFT, 1, VCFHeaderLineType.Integer, "Ordinal Median quality of N bases to the left of the feature"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_RIGHT, 1, VCFHeaderLineType.Integer, "Ordinal Median quality of N bases to the right of the feature"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_LEFT_MEAN, 1, VCFHeaderLineType.Integer, "Ordinal Mean quality of N bases to the left of the feature"));
        headerInfo.add(new VCFInfoHeaderLine(VCF_SMQ_RIGHT_MEAN, 1, VCFHeaderLineType.Integer, "Ordinal Mean quality of N bases to the right of the feature"));
        for ( String spec : fmArgs.copyAttr ) {
            final CopyAttrInfo info = new CopyAttrInfo(spec);
            headerInfo.add(new VCFInfoHeaderLine(fmArgs.copyAttrPrefix + info.name, 1, info.type, info.desc));
            copyAttrInfo.add(info);
        }

        // validation mode?
        if ( fmArgs.reportAllAlts ) {
            for ( int baseIndex = 0 ; baseIndex < SequenceUtil.VALID_BASES_UPPER.length ; baseIndex++ ) {
                headerInfo.add(new VCFInfoHeaderLine(scoreNameForBase(baseIndex), 1, VCFHeaderLineType.Float, "Base specific mapping score"));
            }
        }
        if ( fmArgs.reportAllAlts || fmArgs.tagBasesWithAdjacentRefDiff ) {
            headerInfo.add(new VCFInfoHeaderLine(VCF_ADJACENT_REF_DIFF, 1, VCFHeaderLineType.Flag, "Adjacent base filter indication: indel in the adjacent 5 bases to the considered base on the read"));
        }

        final VCFHeader vcfHeader = new VCFHeader(headerInfo);
        vcfHeader.setSequenceDictionary(sequenceDictionary);
        return vcfHeader;
    }

    private String scoreNameForBase(int baseIndex) {
        if ( scoreForBaseNames[baseIndex] == null ) {
            scoreForBaseNames[baseIndex] = VCF_SCORE + "_" + new String(new byte[]{SequenceUtil.VALID_BASES_UPPER[baseIndex]});
        }
        return scoreForBaseNames[baseIndex];
    }

    @Override
    public void apply(final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) {

        // include dups?
        if ( read.isDuplicate() && !fmArgs.includeDupReads ) {
            return;
        }

        // include supplementary alignments?
        if ( read.isSupplementaryAlignment() && !fmArgs.keepSupplementaryAlignments ) {
            return;
        }

        // include qc-failed reads?
        if ( ((read.getFlags() & VENDOR_QUALITY_CHECK_FLAG) != 0) && !includeQcFailedReads ) {
            return;
        }

        // flush qeues up to this read
        flushQueue(read, referenceContext);

        // find features in read
        mapper.forEachOnRead(read, referenceContext, fr -> {
            if ( logger.isDebugEnabled() ) {
                logger.debug("fr: " + fr);
            }

            // score the feature
            fr.score = scoreFeature(fr);

            // score for validation mode?
            if ( fmArgs.reportAllAlts) {
                fr.scoreForBase = new double[SequenceUtil.VALID_BASES_UPPER.length];
                for ( int baseIndex = 0 ; baseIndex < fr.scoreForBase.length ; baseIndex++ ) {
                    final byte base = SequenceUtil.VALID_BASES_UPPER[baseIndex];
                    boolean incl = base != fr.readBases[0];
                    if ( incl ) {
                        fr.scoreForBase[baseIndex] = scoreFeature(fr, base);
                    } else {
                        fr.scoreForBase[baseIndex] = Double.NaN;
                    }
                }
            }

            // emit feature if filters in
            if ( filterFeature(fr) ) {
                featureQueue.add(fr);
            }
        });
    }

    private void flushQueue(final GATKRead read, final ReferenceContext referenceContext) {

        // emit all?
        if ( read == null ) {
            while ( featureQueue.size() != 0 ) {
                final MappedFeature fr = featureQueue.poll();
                enrichFeature(fr);
                emitFeature(fr);
            }
        } else {
            // enter read into the queue
            readQueue.add(new ReadContext(read, referenceContext));

            // emit all features that start before this read
            while ( featureQueue.size() != 0 ) {
                MappedFeature fr = featureQueue.peek();
                if ( !fr.read.getContig().equals(read.getContig())
                            || (fr.start < read.getStart()) ) {
                    fr = featureQueue.poll();
                    enrichFeature(fr);
                    emitFeature(fr);
                }
                else {
                    break;
                }
            }

            // remove all reads that start before this read
            while ( readQueue.size() != 0 ) {
                ReadContext rc = readQueue.peek();

                if ( !rc.read.getContig().equals(read.getContig())
                        || (rc.read.getEnd() < read.getStart()) ) {
                    rc = readQueue.poll();
                }
                else {
                    break;
                }
            }
        }
    }

    private void enrichFeature(final MappedFeature fr) {

        // loop on queued reads, count and check if should be counted as filtered
        final Locatable   loc = new SimpleInterval(fr.read.getContig(), fr.start, fr.start);
        for ( ReadContext rc : readQueue ) {
            if ( rc.read.contains(loc) ) {
                fr.readCount++;
                FeatureMapper.FilterStatus fs = mapper.noFeatureButFilterAt(rc.read, rc.referenceContext, fr.start);
                if ( (fs == FeatureMapper.FilterStatus.Filtered) || (fs == FeatureMapper.FilterStatus.NoFeatureAndFiltered) ) {
                    fr.filteredCount++;
                }
            }
        }
    }

    private double scoreFeature(final MappedFeature fr) {
        return scoreFeature(fr, (byte)0);
    }
    private double scoreFeature(final MappedFeature fr, byte altBase) {

        // build haplotypes
        final FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(getHeaderForReads(), fr.read);
        final FlowBasedHaplotype[]    haplotypes = buildHaplotypes(fr, rgInfo.flowOrder, altBase);

        // create flow read
        final FlowBasedRead flowRead = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs);

        final int diffLeft = haplotypes[0].getStart() - flowRead.getStart() + fr.offsetDelta;
        final int diffRight = flowRead.getEnd() - haplotypes[0].getEnd();
        flowRead.applyBaseClipping(Math.max(0, diffLeft), Math.max(diffRight, 0), false);

        if ( !flowRead.isValid() ) {
            return -1;
        }

        // compute alternative score
        final int         hapKeyLength = Math.min(haplotypes[0].getKeyLength(), haplotypes[1].getKeyLength());
        final double      readScore = computeLikelihoodLocal(flowRead, haplotypes[0], hapKeyLength, false);
        final double      refScore = computeLikelihoodLocal(flowRead, haplotypes[1], hapKeyLength, false);
        double            score = readScore - refScore;
        if ( !Double.isNaN(fmArgs.limitScore) ) {
            score = Math.min(score, fmArgs.limitScore);
        }

        if ( ((Double.isNaN(score) || (score < 0)) && fmArgs.debugNegatives)
             || (fmArgs.debugReadName != null && fmArgs.debugReadName.contains(fr.read.getName())) ) {
            logger.info("**** debug read: " + fr.read);
            logger.info("readBases: " + fr.read.getBasesString());
            logger.info("flowRead: " + flowRead);
            logger.info("flowBases: " + flowRead.getBasesString());
            logger.info("flowOrder: " + flowRead.getFlowOrder());
            logger.info("flowKey: " + flowRead.getKeyLength() + " " + Arrays.toString(flowRead.getKey()));
            logger.info("readHaplotype: " + haplotypes[0]);
            logger.info("readHapKey: " + haplotypes[0].getKeyLength() + " " + Arrays.toString(haplotypes[0].getKey()));
            computeLikelihoodLocal(flowRead, haplotypes[0], hapKeyLength, true);
            logger.info("refrHaplotype: " + haplotypes[1]);
            logger.info("refrHapKey: " + haplotypes[1].getKeyLength() + " " + Arrays.toString(haplotypes[1].getKey()));
            computeLikelihoodLocal(flowRead, haplotypes[1], hapKeyLength, true);
            logger.info("score: " + score);

            // analyze read
            final FlowBasedRead flowRead2 = new FlowBasedRead(fr.read, rgInfo.flowOrder, rgInfo.maxClass, fbargs);
            final int[]        key2 = flowRead2.getKey();
            for ( int i = 0 ; i < key2.length ; i++ ) {
                final double      p1 = flowRead2.getProb(i, key2[i]);
                for ( int j = 0 ; j < rgInfo.maxClass ; j++ ) {
                    final double      p2 = flowRead2.getProb(i, j);
                    if ( p2 > p1 )
                        logger.info(String.format("prob at %s key[%d]=%d, %f is lower than at %d which is %f",
                                                                flowRead2.getName(), i, key2[i], p1, j, p2));
                }
            }
        }

        if ( score < 0 && !fmArgs.keepNegatives && score != -1.0 ) {
            score = 0;
        }

        return score;
    }

    public static double computeLikelihoodLocal(final FlowBasedRead read, final FlowBasedHaplotype haplotype, final int hapKeyLength, final boolean debug) {

        final byte[] flowOrder = haplotype.getFlowOrderArray();
        final byte   readFlowOrder0 = read.getFlowOrderArray()[0];
        int startingPoint = 0;
        for (int i = 0; i < flowOrder.length; i++) {
            if (flowOrder[i] == readFlowOrder0) {
                startingPoint = i;
                break;
            }
        }
        final int[]         key = haplotype.getKey();

        // debug support
        StringBuffer        debugMessage = null;
        if ( debug )
            debugMessage = new StringBuffer(Integer.toString(startingPoint) + " hmer prob |");
        double              result = 0 ;
        for (int i = 0; i < read.getKeyLength(); i++) {
            int     index = i + startingPoint;
            double  prob = 0;
            int     locationToFetch = 0;
            if ( index < hapKeyLength ) {
                locationToFetch = Math.min(key[index] & 0xff, read.getMaxHmer() + 1);
                prob = read.getProb(i, locationToFetch);
            } else {
                if ( debug ) {
                    debugMessage.append(" clip");
                }
                break;
            }
            if ( Precision.equals(prob, 0.0) ) {
                prob = LOWEST_PROB;
            }
            result += Math.log10(prob);

            if ( debug ) {
                debugMessage.append(String.format(" %d %.4f", locationToFetch, prob));
            }
        }

        if ( debug ) {
            debugMessage.append(" | " + result);
            logger.info("debugMessage: " + debugMessage);
        }

        return result;
    }

    private FlowBasedHaplotype[] buildHaplotypes(final MappedFeature fr, final String flowOrder, byte altBase) {

        // build bases for flow haplotypes
        // NOTE!!!: this code assumes length of feature on read and reference is the same
        // this is true for SNP but not for INDELs - it will have to be re-written!
        // TODO: write for INDEL
        byte[] bases = fr.read.getBasesNoCopy();
        int         offset = fr.readBasesOffset;
        int         refStart = fr.start;
        int         refModOfs = 0;

        // install alt base?
        byte orgBase = 0;
        if ( altBase != 0 ) {
            orgBase = fr.refBases[0];
            fr.refBases[0] = altBase;
        }

        if ( offset > 0 ) {
            // reach into hmer before
            offset--;
            refModOfs++;
            refStart--;

            // extend until start of hmer
            final byte        hmerBase = bases[offset];
            while ( offset > 0 && bases[offset-1] == hmerBase ) {
                offset--;
                refModOfs++;
                refStart--;
            }
        }
        final byte[]      sAltBases = Arrays.copyOfRange(bases, offset, bases.length);
        final byte[]      sRefBases = Arrays.copyOf(sAltBases, sAltBases.length);
        System.arraycopy(fr.refBases, 0, sRefBases, refModOfs, fr.refBases.length);

        // construct haplotypes
        final SimpleInterval genomeLoc = new SimpleInterval(fr.read.getContig(), refStart, refStart + sAltBases.length - 1);
        final Cigar          cigar = new Cigar();
        cigar.add(new CigarElement(sAltBases.length, CigarOperator.M));
        final Haplotype      altHaplotype = new Haplotype(sAltBases, false);
        final Haplotype      refHaplotype = new Haplotype(sRefBases, true);
        altHaplotype.setGenomeLocation(genomeLoc);
        refHaplotype.setGenomeLocation(genomeLoc);
        altHaplotype.setCigar(cigar);
        refHaplotype.setCigar(cigar);

        // prepare flow based haplotypes
        final FlowBasedHaplotype[] result = {
                                new FlowBasedHaplotype(altHaplotype, flowOrder),
                                new FlowBasedHaplotype(refHaplotype, flowOrder)
                            };

        // restore changes
        if ( altBase != 0 ) {
            fr.refBases[0] = orgBase;
        }

        // return
        return result;
    }

    private boolean filterFeature(final MappedFeature fr) {

        if ( fmArgs.excludeNaNScores && Double.isNaN(fr.score) ) {
            return false;
        } else if ( fr.score > fmArgs.maxScore ) {
            return false;
        } else if ( fr.score < fmArgs.minScore ) {
            return false;
        }

        return true;
    }

    private void emitFeature(final MappedFeature fr) {

        // create alleles
        final Collection          alleles = new LinkedList<>();
        if ( fmArgs.reportAllAlts && Arrays.equals(fr.readBases, fr.refBases) ) {
            alleles.add(Allele.create("*".getBytes(), false));
        } else {
            alleles.add(Allele.create(fr.readBases, false));
        }
        alleles.add(Allele.create(fr.refBases, true));

        // create variant context builder
        final VariantContextBuilder       vcb = new VariantContextBuilder(
                                                    VCB_SOURCE,
                                                    fr.read.getContig(),
                                                    fr.start,
                                                    fr.start + fr.refBases.length - 1,
                                                    alleles);

        // copy attributes
        vcb.attribute(VCF_READ_NAME, fr.read.getName());
        vcb.attribute(VCF_SCORE, String.format("%.5f", fr.score));
        vcb.attribute(VCF_FLAGS, fr.read.getFlags());
        vcb.attribute(VCF_MAPQ, fr.read.getMappingQuality());
        vcb.attribute(VCF_CIGAR, fr.read.getCigar().toString());
        vcb.attribute(VCF_READ_COUNT, fr.readCount);
        vcb.attribute(VCF_FILTERED_COUNT, fr.filteredCount);
        vcb.attribute(VCF_FC1, fr.nonIdentMBasesOnRead);
        vcb.attribute(VCF_FC2, fr.featuresOnRead);
        vcb.attribute(VCF_LENGTH, fr.read.getLength());
        vcb.attribute(VCF_EDIST, fr.refEditDistance);
        vcb.attribute(VCF_INDEX, fr.index);

        // median/mean quality on?
        if ( fmArgs.surroundingMediaQualitySize != null ) {
            vcb.attribute(VCF_SMQ_LEFT, fr.smqLeft);
            vcb.attribute(VCF_SMQ_RIGHT, fr.smqRight);
        }
        if ( fmArgs.surroundingMeanQualitySize != null ) {
            vcb.attribute(VCF_SMQ_LEFT_MEAN, fr.smqLeftMean);
            vcb.attribute(VCF_SMQ_RIGHT_MEAN, fr.smqRightMean);
        }

        for ( CopyAttrInfo info : copyAttrInfo ) {
            if ( fr.read.hasAttribute(info.name) ) {
                final String attrName = fmArgs.copyAttrPrefix + info.name;
                if ( info.type == VCFHeaderLineType.Integer ) {
                    vcb.attribute(attrName, fr.read.getAttributeAsInteger(info.name));
                } else if ( info.type == VCFHeaderLineType.Float ) {
                    vcb.attribute(attrName, fr.read.getAttributeAsFloat(info.name));
                } else {
                    vcb.attribute(attrName, fr.read.getAttributeAsString(info.name));
                }
            }
        }

        // validation mode?
        if ( fmArgs.reportAllAlts ) {
            if ( fr.scoreForBase != null ) {
                for (int baseIndex = 0; baseIndex < SequenceUtil.VALID_BASES_UPPER.length; baseIndex++) {
                    if (!Double.isNaN(fr.scoreForBase[baseIndex])) {
                        vcb.attribute(scoreNameForBase(baseIndex), String.format("%.5f", fr.scoreForBase[baseIndex]));
                    }
                }
            }
        }
        if ( fr.adjacentRefDiff ) {
            vcb.attribute(VCF_ADJACENT_REF_DIFF, true);
        }

        // build it!
        final VariantContext      vc = vcb.make();

        // write to file
        vcfWriter.add(vc);
    }

    private FeatureMapper buildMapper() {

        // build appropriate mapper
        if ( fmArgs.mappingFeature == FlowFeatureMapperArgumentCollection.MappingFeatureEnum.SNV ) {
            return new SNVMapper(fmArgs);
        } else {
            throw new GATKException("unsupported mappingFeature: " + fmArgs.mappingFeature);
        }
    }
}