All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.NaturalLogUtils Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils;

import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.MathArrays;

import java.util.Collection;
import java.util.Collections;

public class NaturalLogUtils {
    public static final double LOG_ONE_HALF = Math.log(0.5);
    private static final double LOG1MEXP_THRESHOLD = Math.log(0.5);
    private static final double PHRED_TO_LOG_ERROR_PROB_FACTOR = -Math.log(10)/10;

    private static final double[] qualToLogProbCache = new IndexRange(0, QualityUtils.MAX_QUAL + 1)
            .mapToDouble(n -> log1mexp(qualToLogErrorProb((double)n)));

    private NaturalLogUtils() { }

    /**
     * normalizes the log-probability array in-place.
     *
     * @param array the array to be normalized
     * @return the normalized-in-place array
     */
    public static double[] normalizeFromLogToLinearSpace(final double[] array) {
        return normalizeLog(Utils.nonNull(array), false, true);
    }

    /**
     * normalizes the log-probability array in-place.
     *
     * @param array             the array to be normalized
     * @return the normalized-in-place array, maybe log transformed
     */
    public static double[] normalizeLog(final double[] array) {
        return normalizeLog(Utils.nonNull(array), true, true);
    }


    /**
     * See #normalizeFromLog but with the additional option to use an approximation that keeps the calculation always in log-space
     *
     * @param array
     * @param takeLogOfOutput
     * @param inPlace           if true, modify the input array in-place
     *
     * @return
     */
    public static double[] normalizeLog(final double[] array, final boolean takeLogOfOutput, final boolean inPlace) {
        final double logSum = logSumExp(Utils.nonNull(array));
        final double[] result = inPlace ? MathUtils.applyToArrayInPlace(array, x -> x - logSum) : MathUtils.applyToArray(array, x -> x - logSum);
        return takeLogOfOutput ? result : MathUtils.applyToArrayInPlace(result, Math::exp);
    }

    /**
     * Computes $\log(\sum_i e^{a_i})$ trying to avoid underflow issues by using the log-sum-exp trick.
     *
     * 

* This trick consists of shifting all the log values by the maximum so that exponent values are * much larger (close to 1) before they are summed. Then the result is shifted back down by * the same amount in order to obtain the correct value. *

* @return any double value. */ public static double logSumExp(final double... logValues) { Utils.nonNull(logValues); final int maxElementIndex = MathUtils.maxElementIndex(logValues); final double maxValue = logValues[maxElementIndex]; if(maxValue == Double.NEGATIVE_INFINITY) { return maxValue; } double sum = 1.0; for (int i = 0; i < logValues.length; i++) { final double curVal = logValues[i]; if (i == maxElementIndex || curVal == Double.NEGATIVE_INFINITY) { continue; } else { final double scaled_val = curVal - maxValue; sum += Math.exp(scaled_val); } } if ( Double.isNaN(sum) || sum == Double.POSITIVE_INFINITY ) { throw new IllegalArgumentException("logValues must be non-infinite and non-NAN"); } return maxValue + (sum != 1.0 ? Math.log(sum) : 0.0); } /** * Calculates {@code log(1-exp(a))} without losing precision. * *

* This is based on the approach described in: * *

*

* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012
* Online document. * *

* * @param a the input exponent. * @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value. */ public static double log1mexp(final double a) { if (a > 0) return Double.NaN; if (a == 0) return Double.NEGATIVE_INFINITY; return (a < LOG1MEXP_THRESHOLD) ? Math.log1p(-Math.exp(a)) : Math.log(-Math.expm1(a)); } public static double[] posteriors(double[] logPriors, double[] logLikelihoods) { return normalizeFromLogToLinearSpace(MathArrays.ebeAdd(logPriors, logLikelihoods)); } /** * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log(0.001)) * * WARNING -- because this function takes a byte for maxQual, you must be careful in converting * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) * * @param qual a phred-scaled quality score encoded as a byte * @return a probability (0.0-1.0) */ public static double qualToLogErrorProb(final byte qual){ return qualToLogErrorProb((double)(qual & 0xFF)); } /** * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001)) * * This is the Phred-style conversion, *not* the Illumina-style conversion. * * The calculation is extremely efficient * * @param qual a phred-scaled quality score encoded as a double * @return log of probability (0.0-1.0) */ public static double qualToLogErrorProb(final double qual) { Utils.validateArg( qual >= 0.0, () -> "qual must be >= 0.0 but got " + qual); return qual * PHRED_TO_LOG_ERROR_PROB_FACTOR; } public static double qualToLogProb(final byte qual) { return qualToLogProbCache[(int) qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } public static double logSumLog(final double a, final double b) { return a > b ? a + Math.log(1 + FastMath.exp(b - a)) : b + Math.log(1 + FastMath.exp(a - b)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy