All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.pairhmm.PairHMMModel Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.pairhmm;

import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;

/**
 * Helper class that implement calculations required to implement the PairHMM Finite State Automation (FSA) model.
 */
public final class PairHMMModel {

    /**
     * Prevents instantiation of this class
     */
    private PairHMMModel() {
    }

    /**
     * Length of the standard transition probability array.
     */
    public static final int TRANS_PROB_ARRAY_LENGTH = 6;

    /**
     * Position in the transition probability array for the Match-to-Match transition.
     */
    public static final int matchToMatch = 0;

    /**
     * Position in the transition probability array for the Indel-to-Match transition.
     */
    public static final int indelToMatch = 1;

    /**
     * Position in the transition probability array for the Match-to-Insertion transition.
     */
    public static final int matchToInsertion = 2;

    /**
     * Position in the transition probability array for the Insertion-to-Insertion transition.
     */
    public static final int insertionToInsertion = 3;

    /**
     * Position in the transition probability array for the Match-to-Deletion transition.
     */
    public static final int matchToDeletion = 4;

    /**
     * Position in the transition probability array for the Deletion-to-Deletion transition.
     */
    public static final int deletionToDeletion = 5;

    /**
      * Convenient ln10 constant.
      */
    private static final double LN10 = Math.log(10);

    /**
      * Convenient (ln10)^-1 constant.
      */
     private static final double INV_LN10 = 1.0 / LN10;

    /**
     * Holds pre-calculated the matchToMath probability values in linear scale.
     *
     * 

* This is a triangular matrix stored in a unidimentional array like so: *

* (0,0), (0,1), (1,1), (0,2), (1,2), (2,2), (0,3) ... ({@link QualityUtils#MAX_QUAL},{@link QualityUtils#MAX_QUAL}) */ private static final double[] matchToMatchProb = new double[((QualityUtils.MAX_QUAL + 1) * (QualityUtils.MAX_QUAL + 2)) >> 1]; /** * Holds pre-calculated the matchToMatch log10-probabilities. * *

* This is a triangular matrix stored in a unidimentional array like so: *

* (0,0), (0,1), (1,1), (0,2), (1,2), (2,2), (0,3) ... ({@link QualityUtils#MAX_QUAL},{@link QualityUtils#MAX_QUAL}) */ private static final double[] matchToMatchLog10 = new double[((QualityUtils.MAX_QUAL + 1) * (QualityUtils.MAX_QUAL + 2)) >> 1]; /** * Initialize matchToMatch cache tables {@link #matchToMatch} and {@link #matchToMatchLog} */ static { for (int i = 0, offset = 0; i <= QualityUtils.MAX_QUAL; offset += ++i) for (int j = 0; j <= i; j++) { final double log10Sum = MathUtils.approximateLog10SumLog10(-0.1 * i, -0.1 * j); matchToMatchLog10[offset + j] = Math.log1p(-Math.min(1, Math.pow(10, log10Sum))) * INV_LN10; matchToMatchProb[offset + j] = Math.pow(10, matchToMatchLog10[offset + j]); } } /** * Fills a transition probability array given the different quality scores affecting a read site * * @param insQual the insertion quality score as a byte. * @param delQual the deletion quality score as a byte. * @param gcp the gap-continuation-penalty score as a byte. * * @throws NullPointerException if {@code dest} is {@code null}. * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. */ public static void qualToTransProbs(final double[] dest, final byte insQual, final byte delQual, final byte gcp) { Utils.nonNull(dest, "dest array null"); if (insQual < 0) throw new IllegalArgumentException("insert quality cannot less than 0: " + insQual); if (delQual < 0) throw new IllegalArgumentException("deletion quality cannot be less than 0: " + delQual); if (gcp < 0) throw new IllegalArgumentException("gcp cannot be less than 0: " + gcp); dest[matchToMatch] = matchToMatchProb(insQual, delQual); dest[matchToInsertion] = QualityUtils.qualToErrorProb(insQual); dest[matchToDeletion] = QualityUtils.qualToErrorProb(delQual); dest[indelToMatch] = QualityUtils.qualToProb(gcp); dest[insertionToInsertion] = dest[deletionToDeletion] = QualityUtils.qualToErrorProb(gcp); } /** * Returns a transition probability array given the different quality scores affecting a read site. * * @param insQual the insertion quality score as a byte. * @param delQual the deletion quality score as a byte. * @param gcp the gap-continuation-penalty score as a byte. * * @throws NullPointerException if {@code dest} is {@code null}. * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. * * @return never {@code null}. An array of length {@link #TRANS_PROB_ARRAY_LENGTH}. */ public static double[] qualToTransProbs(final byte insQual, final byte delQual, final byte gcp) { final double[] dest = new double[TRANS_PROB_ARRAY_LENGTH]; qualToTransProbs(dest,insQual,delQual,gcp); return dest; } /** * Fills ax matrix with the transition probabilities for a number of bases. * *

* The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. *

* Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. * * @param dest the matrix to update * @param insQuals insertion qualities. * @param delQuals deletion qualities. * @param gcps gap-continuation penalty qualities. * * @throws NullPointerException if any of the input arrays, matrices is {@code null} or any entry in {@code dest} is {@code null}. * @throws IllegalArgumentException if {@code IllegalArgumentException} * if the input array don't have the same length. * @throws ArrayIndexOutOfBoundsException if {@code dest} or any of its elements is not large enough to contain the * transition matrix. */ public static void qualToTransProbs(final double[][] dest, final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { Utils.nonNull(dest, "dest array null"); Utils.nonNull(insQuals, "insQuals array null"); Utils.nonNull(delQuals, "delQuals array null"); Utils.nonNull(gcps, "gcps array null"); final int readLength = insQuals.length; if (delQuals.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + delQuals.length); if (gcps.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + gcps.length); if (dest.length < readLength + 1) throw new IllegalArgumentException("destination length is not enough for the read length: " + dest.length + " < " + readLength + " + 1"); for (int i = 0; i < readLength; i++) { qualToTransProbs(dest[i + 1], insQuals[i], delQuals[i], gcps[i]); } } /** * Returns a matrix with the transition probabilities for a number of bases. * *

* The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. *

* Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. * * @param insQuals insertion qualities. * @param delQuals deletion qualities. * @param gcps gap-continuation penalty qualities. * * @throws NullPointerException if any of the input arrays is {@code null}. * @throws IllegalArgumentException if {@code IllegalArgumentException} * if the input array don't have the same length. * * @return never {@code null}, an matrix of the dimensions explained above. */ public static double[][] qualToTransProbs(final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { Utils.nonNull(insQuals, "insQuals array null"); Utils.nonNull(delQuals, "delQuals array null"); Utils.nonNull(gcps, "gcps array null"); final double[][] dest = createTransitionMatrix(insQuals.length); qualToTransProbs(dest,insQuals,delQuals,gcps); return dest; } /** * Fills a transition log10-probability array given the different quality scores affecting a read site. * * @param insQual the insertion quality score as a byte. * @param delQual the deletion quality score as a byte. * @param gcp the gap-continuation-penalty score as a byte. * * @throws NullPointerException if {@code dest} is {@code null}. * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. */ public static void qualToTransProbsLog10(final double[] dest, final byte insQual, final byte delQual, final byte gcp) { Utils.nonNull(dest, "dest array null"); if (insQual < 0) throw new IllegalArgumentException("insert quality cannot less than 0: " + insQual); if (delQual < 0) throw new IllegalArgumentException("deletion quality cannot be less than 0: " + delQual); if (gcp < 0) throw new IllegalArgumentException("gcp cannot be less than 0: " + gcp); dest[matchToMatch] = matchToMatchProbLog10(insQual, delQual); dest[matchToInsertion] = QualityUtils.qualToErrorProbLog10(insQual); dest[matchToDeletion] = QualityUtils.qualToErrorProbLog10(delQual); dest[indelToMatch] = QualityUtils.qualToProbLog10(gcp); dest[insertionToInsertion] = QualityUtils.qualToErrorProbLog10(gcp); dest[deletionToDeletion] = QualityUtils.qualToErrorProbLog10(gcp); } /** * Returns a transition log10 probability array given the different quality scores affecting a read site. * * @param insQual the insertion quality score as a byte. * @param delQual the deletion quality score as a byte. * @param gcp the gap-continuation-penalty score as a byte. * * @throws NullPointerException if {@code dest} is {@code null}. * @throws ArrayIndexOutOfBoundsException if {@code dest} is not large enough. * @throws IllegalArgumentException if {@code insQual}, {@code delQual} or {@code gcp} is less than negative. * * @return never {@code null}. An array of length {@link #TRANS_PROB_ARRAY_LENGTH}. */ public static double[] qualToTransProbsLog10(final byte insQual, final byte delQual, final byte gcp) { final double[] dest = new double[TRANS_PROB_ARRAY_LENGTH]; qualToTransProbsLog10(dest, insQual, delQual, gcp); return dest; } /** * Fills a matrix with the log10 transition probabilities for a number of bases. * *

* The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. *

* Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. * * @param insQuals insertion qualities. * @param delQuals deletion qualities. * @param gcps gap-continuation penalty qualities. * * @throws NullPointerException if any of the input arrays, matrices is {@code null} or any entry in {@code dest} is {@code null}. * @throws IllegalArgumentException if {@code IllegalArgumentException} * if the input array don't have the same length. * @throws ArrayIndexOutOfBoundsException if {@code dest} or any of its elements is not large enough to contain the * transition matrix. */ public static void qualToTransProbsLog10(final double[][] dest, final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { Utils.nonNull(dest, "dest array null"); Utils.nonNull(insQuals, "insQuals array null"); Utils.nonNull(delQuals, "delQuals array null"); Utils.nonNull(gcps, "gcps array null"); final int readLength = insQuals.length; if (delQuals.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + delQuals.length); if (gcps.length != readLength) throw new IllegalArgumentException("deletion quality array length does not match insert quality array length: " + readLength + " != " + gcps.length); if (dest.length < readLength + 1) throw new IllegalArgumentException("destination length is not enough for the read length: " + dest.length + " < " + readLength + " + 1"); for (int i = 0; i < readLength; i++) { qualToTransProbsLog10(dest[i + 1], insQuals[i], delQuals[i], gcps[i]); } } /** * Returns a matrix with the log10 transition probabilities for a number of bases. * *

* The first dimension of the matrix correspond to the different bases where the first one is stored in position 1. * Thus the position 0 is left empty and the length of the resulting matrix is actually {@code insQual.length + 1}. *

* Each entry is the transition probability array for that base with a length of {@link #TRANS_PROB_ARRAY_LENGTH}. * * @param insQuals insertion qualities. * @param delQuals deletion qualities. * @param gcps gap-continuation penalty qualities. * * @throws NullPointerException if any of the input arrays is {@code null}. * @throws IllegalArgumentException if {@code IllegalArgumentException} * if the input array don't have the same length. * * @return never {@code null}, an matrix of the dimensions explained above. */ public static double[][] qualToTransProbsLog10(final byte[] insQuals, final byte[] delQuals, final byte[] gcps) { Utils.nonNull(insQuals, "insQuals array null"); Utils.nonNull(delQuals, "delQuals array null"); Utils.nonNull(gcps, "gcps array null"); final double[][] dest = createTransitionMatrix(insQuals.length); qualToTransProbsLog10(dest,insQuals,delQuals,gcps); return dest; } /** * Creates a transition probability matrix large enough to work with sequences of a particular length. * * @param maxReadLength the maximum read length for the transition matrix. * * @return never {@code null}. A matrix of {@code maxReadLength + 1} by {@link #TRANS_PROB_ARRAY_LENGTH} positions. */ public static double[][] createTransitionMatrix(final int maxReadLength) { return new double[maxReadLength + 1][TRANS_PROB_ARRAY_LENGTH]; } /** * Returns the probability that neither of two event takes place. *

* * We assume that both event never occur together and that delQual is the conditional probability * (qual. encoded) of the second event, given the first event didn't took place. So that the * probability of no event is:
* * We assume that both event never occur together so that the probability of no event is:
* * 1 - ProbErr(insQual) - ProbErr(delQual)
* * @param insQual PhRED scaled quality/probability of the first event. * @param delQual PhRED scaled quality/probability of the second event. * * @return a value between 0 and 1. */ public static double matchToMatchProb(final byte insQual, final byte delQual) { return matchToMatchProb((insQual & 0xFF), (delQual & 0xFF)); } /** * Returns the log10 probability that neither of two events, insertion and deletion, takes place. *

* * We assume that both event never occur together so that the probability of no event is:
* * 1 - ProbErr(insQual) - ProbErr(delQual)
* * @param insQual PhRED scaled quality/probability of an insertion. * @param delQual PhRED scaled quality/probability of a deletion. * * @return a value between 0 and -Inf. */ public static double matchToMatchProbLog10(final byte insQual, final byte delQual) { return matchToMatchProbLog10((insQual & 0xFF), (delQual & 0xFF)); } /** * Returns the probability that neither of two events, insertion and deletion, takes place. *

* * We assume that both event never occur together and that delQual is the conditional probability * (qual. encoded) of the second event, given the first event didn't took place. So that the * probability of no event is:
* * 1 - ProbErr(insQual) - ProbErr(delQual)
* * @param insQual PhRED scaled quality/probability of an insertion. * @param delQual PhRED scaled quality/probability of a deletion. * @return a value between 0 and 1. */ public static double matchToMatchProb(final int insQual, final int delQual) { final int minQual; final int maxQual; if (insQual <= delQual) { minQual = insQual; maxQual = delQual; } else { minQual = delQual; maxQual = insQual; } if (minQual < 0) throw new IllegalArgumentException("quality cannot be negative: " + minQual + " and " + maxQual); return (QualityUtils.MAX_QUAL < maxQual) ? 1.0 - Math.pow(10, MathUtils.approximateLog10SumLog10(-0.1 * minQual, -0.1 * maxQual)) : matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; } /** * Returns the log-probability that neither of two events takes place. *

* * We assume that both events never occur together and that delQual is the conditional probability (qual. encoded) * of the second event, given the first event didn't took place. So that the probability of no event is:
* * 1 - ProbErr(insQual) - ProbErr(delQual)
* * @param insQual PhRED scaled quality/probability of an insertion. * @param delQual PhRED scaled quality/probability of a deletion. * * @return a value between 0 and -Inf. */ public static double matchToMatchProbLog10(final int insQual, final int delQual) { final int minQual; final int maxQual; if (insQual <= delQual) { minQual = insQual; maxQual = delQual; } else { minQual = delQual; maxQual = insQual; } return (QualityUtils.MAX_QUAL < maxQual) ? Math.log1p ( -Math.min(1, Math.pow(10, MathUtils.approximateLog10SumLog10(-.1 * minQual, -.1 * maxQual)))) * INV_LN10 : matchToMatchLog10[((maxQual * (maxQual + 1)) >> 1) + minQual]; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy