All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.pairhmm.LoglessPDPairHMM Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.pairhmm;

import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.haplotype.PartiallyDeterminedHaplotype;

import java.util.Arrays;

import static org.broadinstitute.hellbender.utils.pairhmm.PairHMMModel.*;

public class LoglessPDPairHMM extends N2MemoryPDPairHMM {
    static final double INITIAL_CONDITION = Math.pow(2, 1020);
    static final double INITIAL_CONDITION_LOG10 = Math.log10(INITIAL_CONDITION);
    static final int OFF = 1;

    // we divide e by 3 because the observed base could have come from any of the non-observed alleles
    static final double TRISTATE_CORRECTION = 3.0;

    //Branch Stat Arrays
    protected double[][] branchMatchMatrix = null;
    protected double[][] branchInsertionMatrix = null;
    protected double[][] branchDeletionMatrix = null;

    enum HMMState {
        // The regular state
        NORMAL,

        // Idicating that we must be copying the array elements to the right
        INSIDE_DEL,

        // Indicating that we must handle the special case for merging events after the del
        AFTER_DEL,
    }

    public double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases,
        final byte[] haplotypePDBases,
        final byte[] readBases,
        final byte[] readQuals,
        final byte[] insertionGOP,
        final byte[] deletionGOP,
        final byte[] overallGCP,
        final int hapStartIndex,
        final boolean recacheReadValues,
        final int nextHapStartIndex) {

        if (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length) {
            final double initialValue = INITIAL_CONDITION / haplotypeBases.length;
            // set the initial value (free deletions in the beginning) for the first row in the deletion matrix
            for( int j = 0; j < paddedHaplotypeLength; j++ ) {
                deletionMatrix[0][j] = initialValue;
            }
        }

        if ( ! constantsAreInitialized || recacheReadValues ) {
            initializeProbabilities(transition, insertionGOP, deletionGOP, overallGCP);

            // note that we initialized the constants
            constantsAreInitialized = true;
        }

        initializePriors(haplotypeBases, haplotypePDBases, readBases, readQuals, hapStartIndex);

        HMMState currentState = HMMState.NORMAL;
        for (int i = 1; i < paddedReadLength; i++) {
            for (int j = hapStartIndex+1; j < paddedHaplotypeLength; j++) {
                switch (currentState) {
                    case NORMAL:
                        // Pre-emptively copy over from the left
                        branchMatchMatrix[i][j] = matchMatrix[i][j - 1];
                        branchDeletionMatrix[i][j] = deletionMatrix[i][j - 1];
                        branchInsertionMatrix[i][j] = insertionMatrix[i][j - 1];

                        //Inlined the code from updateCell - helps JIT to detect hotspots and produce good native code
                        matchMatrix[i][j] = prior[i][j] * (matchMatrix[i - 1][j - 1] * transition[i][matchToMatch] +
                                insertionMatrix[i - 1][j - 1] * transition[i][indelToMatch] +
                                deletionMatrix[i - 1][j - 1] * transition[i][indelToMatch]);
                        deletionMatrix[i][j] = matchMatrix[i][j - 1] * transition[i][matchToDeletion] + deletionMatrix[i][j - 1] * transition[i][deletionToDeletion];

                        // Special case since we MIGHT be at the deletion end and have to be handling jump states from above
                        if ((haplotypePDBases[j-OFF] & PartiallyDeterminedHaplotype.DEL_END) == PartiallyDeterminedHaplotype.DEL_END) {
                            insertionMatrix[i][j] = Math.max(branchMatchMatrix[i - 1][j], matchMatrix[i - 1][j]) * transition[i][matchToInsertion]
                                    + Math.max(branchInsertionMatrix[i - 1][j], insertionMatrix[i - 1][j]) * transition[i][insertionToInsertion];
                        } else {
                            insertionMatrix[i][j] = matchMatrix[i - 1][j] * transition[i][matchToInsertion] + insertionMatrix[i - 1][j] * transition[i][insertionToInsertion];
                        }
                        break;

                    case INSIDE_DEL:
                        // If we are in a deletion copy from the left
                        branchMatchMatrix[i][j] = branchMatchMatrix[i][j - 1];
                        branchDeletionMatrix[i][j] = branchDeletionMatrix[i][j - 1];
                        branchInsertionMatrix[i][j] = branchInsertionMatrix[i][j - 1];


                        //Inlined the code from updateCell - helps JIT to detect hotspots and produce good native code
                        matchMatrix[i][j] = prior[i][j] * (matchMatrix[i - 1][j - 1] * transition[i][matchToMatch] +
                                insertionMatrix[i - 1][j - 1] * transition[i][indelToMatch] +
                                deletionMatrix[i - 1][j - 1] * transition[i][indelToMatch]);
                        deletionMatrix[i][j] = matchMatrix[i][j - 1] * transition[i][matchToDeletion] + deletionMatrix[i][j - 1] * transition[i][deletionToDeletion];

                        // Special case since we MIGHT be at the deletion end and have to be handling jump states from above
                        // TODO these indexes should probably be -1
                        if ((haplotypePDBases[j-OFF] & PartiallyDeterminedHaplotype.DEL_END) == PartiallyDeterminedHaplotype.DEL_END) {
                            insertionMatrix[i][j] = Math.max(branchMatchMatrix[i - 1][j], matchMatrix[i - 1][j]) * transition[i][matchToInsertion]
                                    + Math.max(branchInsertionMatrix[i - 1][j], insertionMatrix[i - 1][j]) * transition[i][insertionToInsertion];
                        } else {
                            insertionMatrix[i][j] = matchMatrix[i - 1][j] * transition[i][matchToInsertion] + insertionMatrix[i - 1][j] * transition[i][insertionToInsertion];
                        }
                        break;

                    case AFTER_DEL:
                        // Pre-emptively copy over from the left
                        branchMatchMatrix[i][j] = Math.max(branchMatchMatrix[i][j - 1], matchMatrix[i][j - 1]);
                        branchDeletionMatrix[i][j] = Math.max(branchDeletionMatrix[i][j - 1], deletionMatrix[i][j - 1]);
                        branchInsertionMatrix[i][j] = Math.max(branchInsertionMatrix[i][j - 1],  insertionMatrix[i][j - 1]);

                        //Inlined the code from updateCell - helps JIT to detect hotspots and produce good native code
                        matchMatrix[i][j] = prior[i][j] * (Math.max(branchMatchMatrix[i - 1][j - 1], matchMatrix[i - 1][j - 1]) * transition[i][matchToMatch] +
                                Math.max(branchInsertionMatrix[i - 1][j - 1], insertionMatrix[i - 1][j - 1]) * transition[i][indelToMatch] +
                                Math.max(branchDeletionMatrix[i - 1][j - 1], deletionMatrix[i - 1][j - 1]) * transition[i][indelToMatch]);
                        deletionMatrix[i][j] = Math.max(branchMatchMatrix[i][j - 1], matchMatrix[i][j - 1]) * transition[i][matchToDeletion]
                                + Math.max(branchDeletionMatrix[i][j - 1], deletionMatrix[i][j - 1]) * transition[i][deletionToDeletion];

                        // Special case since we MIGHT be at the deletion end and have to be handling jump states from above
                        if ((haplotypePDBases[j-OFF] & PartiallyDeterminedHaplotype.DEL_END) == PartiallyDeterminedHaplotype.DEL_END) {
                            insertionMatrix[i][j] = Math.max(branchMatchMatrix[i - 1][j], matchMatrix[i - 1][j]) * transition[i][matchToInsertion]
                                    + Math.max(branchInsertionMatrix[i - 1][j], insertionMatrix[i - 1][j]) * transition[i][insertionToInsertion];
                        } else {
                            insertionMatrix[i][j] = matchMatrix[i - 1][j] * transition[i][matchToInsertion] + insertionMatrix[i - 1][j] * transition[i][insertionToInsertion];
                        }
                        currentState = HMMState.NORMAL;
                        break;
                }
                // If we are at a deletion start base, start copying the branch states
                if ((haplotypePDBases[j-1] & PartiallyDeterminedHaplotype.DEL_START) == PartiallyDeterminedHaplotype.DEL_START) {
                    currentState = HMMState.INSIDE_DEL;
                }
                // Being after a deltion overrides being inside a deletion by virtue of the fact that we allow single element deletions
                if ((haplotypePDBases[j-1] & PartiallyDeterminedHaplotype.DEL_END) == PartiallyDeterminedHaplotype.DEL_END) {
                    currentState = HMMState.AFTER_DEL;
                }
            }
        }

        // final log probability is the log10 sum of the last element in the Match and Insertion state arrays
        // this way we ignore all paths that ended in deletions! (huge)
        // but we have to sum all the paths ending in the M and I matrices, because they're no longer extended.
        final int endI = paddedReadLength - 1;
        double finalSumProbabilities = 0.0;
        for (int j = 1; j < paddedHaplotypeLength; j++) {
            finalSumProbabilities += matchMatrix[endI][j] + insertionMatrix[endI][j];
        }
        return Math.log10(finalSumProbabilities) - INITIAL_CONDITION_LOG10;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void initialize(final int readMaxLength, final int haplotypeMaxLength ) {
        super.initialize(readMaxLength, haplotypeMaxLength);

        branchMatchMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
        branchInsertionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
        branchDeletionMatrix = new double[paddedMaxReadLength][paddedMaxHaplotypeLength];
    }


    /**
     * Initializes the matrix that holds all the constants related to the editing
     * distance between the read and the haplotype.
     *
     * @param haplotypeBases the bases of the haplotype
     * @param readBases      the bases of the read
     * @param readQuals      the base quality scores of the read
     * @param startIndex     where to start updating the distanceMatrix (in case this read is similar to the previous read)
     */
    void initializePriors(final byte[] haplotypeBases, final byte[] haplotypePDBases, final byte[] readBases, final byte[] readQuals, final int startIndex) {

        // initialize the prior matrix for all combinations of read x haplotype bases
        // Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.

        for (int i = 0; i < readBases.length; i++) {
            final byte x = readBases[i];
            final byte qual = readQuals[i];
            for (int j = startIndex; j < haplotypeBases.length; j++) {
                final byte y = haplotypeBases[j];
                final byte hapPDBase = haplotypePDBases[j];
                prior[i+1][j+1] = ( x == y || x == (byte) 'N' || y == (byte) 'N' || isBasePDMatching(x,hapPDBase) ?
                        QualityUtils.qualToProb(qual) : (QualityUtils.qualToErrorProb(qual) / (doNotUseTristateCorrection ? 1.0 : TRISTATE_CORRECTION)) );
            }
        }
    }
    //Helper method for handling if a pd base counts as matching
    static boolean isBasePDMatching(byte x, byte hapPDBases) {
        if ((hapPDBases & PartiallyDeterminedHaplotype.SNP) != 0) {
            switch (x) {
                case 'A':
                case 'a':
                    return ((hapPDBases & PartiallyDeterminedHaplotype.A) != 0);
                case 'C':
                case 'c':
                    return ((hapPDBases & PartiallyDeterminedHaplotype.C) != 0);
                case 'T':
                case 't':
                    return ((hapPDBases & PartiallyDeterminedHaplotype.T) != 0);
                case 'G':
                case 'g':
                    return ((hapPDBases & PartiallyDeterminedHaplotype.G) != 0);
                default:
                    throw new RuntimeException("Found unexpected base in alt alleles");
            }
        }
        return false;
    }


    /**
     * Initializes the matrix that holds all the constants related to quality scores.
     *
     * @param insertionGOP   insertion quality scores of the read
     * @param deletionGOP    deletion quality scores of the read
     * @param overallGCP     overall gap continuation penalty
     */
    static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
        PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy