org.broadinstitute.hellbender.tools.copynumber.models.AlleleFractionLikelihoods Maven / Gradle / Ivy
package org.broadinstitute.hellbender.tools.copynumber.models;
import org.apache.commons.math3.special.Gamma;
import org.apache.commons.math3.util.CombinatoricsUtils;
import org.apache.commons.math3.util.FastMath;
import org.broadinstitute.hellbender.utils.NaturalLogUtils;
import java.util.List;
import java.util.stream.IntStream;
import static org.apache.commons.math3.util.FastMath.sqrt;
/**
* Contains likelihood methods for the allele-fraction model.
* See docs/CNV/archived/archived-CNV-methods.pdf for a thorough description of the model.
*
* We can compute the log-likelihood of a alt reads and r ref reads given minor fraction f and gamma hyperparameters
* (specifying the distribution on allelic biases) mu (mean) and beta (rate = mean/variance) and given
* an alt minor, ref minor, or outlier indicator state. Note that this is a partially collapsed log-likelihood in that the
* latent variable corresponding to the allelic bias at this site has been marginalized out but the indicator
* variable has not been marginalized out.
*
* See docs/CNV/archived/archived-CNV-methods.pdf for derivation.
*
* Finally, note that this is a static method and does not get mu, beta, and minorFraction from an AlleleFractionState object
* We need such functionality because MCMC evaluates the likelihood under proposed parameter changes.
*
*
* if indicator == ALT_MINOR:
*
* log { [beta^alpha / Gamma(alpha)][(1 - pi) / 2] * int_{0 to infty} f^a * (1 - f)^r * lambda^(alpha + r - 1) * exp(-beta * lambda)/(f + (1 - f) * lambda)^n d lambda }
*
* if indicator == REF_MINOR, same as ALT_MINOR but with f <--> 1 - f
*
* if indicator == OUTLIER log {pi * a!r!/(n+1)!}
*
* where alpha = mu*beta and n = a + r.
*
* @author David Benjamin <[email protected]>
* @author Samuel Lee <[email protected]>
*/
final class AlleleFractionLikelihoods {
private static final double EPSILON = 1E-10;
private static final FunctionCache logGammaCache = new FunctionCache<>(Gamma::logGamma);
private static final FunctionCache logCache = new FunctionCache<>(AlleleFractionLikelihoods::log);
private AlleleFractionLikelihoods() {}
static double hetLogLikelihood(final AlleleFractionGlobalParameters parameters,
final double minorFraction,
final AlleleFractionSegmentedData.IndexedAllelicCount allelicCount) {
final double alpha = parameters.getAlpha();
final double beta = parameters.getBeta();
final double pi = parameters.getOutlierProbability();
//we cache some quantities that will be reused
final double logPi = logCache.computeIfAbsent(pi);
final double logNotPi = logCache.computeIfAbsent((1 - pi) / 2);
final double logcCommon = alpha * logCache.computeIfAbsent(beta) - logGammaCache.computeIfAbsent(alpha);
final double majorFraction = 1 - minorFraction;
final double logMinorFraction = logCache.computeIfAbsent(minorFraction);
final double logMajorFraction = logCache.computeIfAbsent(majorFraction);
final int a = allelicCount.getAltReadCount();
final int r = allelicCount.getRefReadCount();
final int n = a + r;
//alt-minor calculation
final double lambda0AltMinor = biasPosteriorMode(alpha, beta, minorFraction, a, r);
final double kappaAltMinor = biasPosteriorCurvature(alpha, minorFraction, r, n, lambda0AltMinor);
final double rhoAltMinor = biasPosteriorEffectiveAlpha(lambda0AltMinor, kappaAltMinor);
final double tauAltMinor = biasPosteriorEffectiveBeta(lambda0AltMinor, kappaAltMinor);
final double logcAltMinor = logcCommon + a * logMinorFraction + r * logMajorFraction
+ (r + alpha - rhoAltMinor) * log(lambda0AltMinor) + (tauAltMinor - beta) * lambda0AltMinor
- n * log(minorFraction + majorFraction * lambda0AltMinor);
final double altMinorLogLikelihood = logNotPi + logcAltMinor + Gamma.logGamma(rhoAltMinor) - rhoAltMinor * log(tauAltMinor);
//ref-minor calculation
final double lambda0RefMinor = biasPosteriorMode(alpha, beta, majorFraction, a, r);
final double kappaRefMinor = biasPosteriorCurvature(alpha, majorFraction, r, n, lambda0RefMinor);
final double rhoRefMinor = biasPosteriorEffectiveAlpha(lambda0RefMinor, kappaRefMinor);
final double tauRefMinor = biasPosteriorEffectiveBeta(lambda0RefMinor, kappaRefMinor);
final double logcRefMinor = logcCommon + a * logMajorFraction + r * logMinorFraction
+ (r + alpha - rhoRefMinor) * log(lambda0RefMinor) + (tauRefMinor - beta) * lambda0RefMinor
- n * log(majorFraction + minorFraction * lambda0RefMinor);
final double refMinorLogLikelihood = logNotPi + logcRefMinor + Gamma.logGamma(rhoRefMinor) - rhoRefMinor * log(tauRefMinor);
final double outlierLogLikelihood = logPi - Math.log(a + r + 1) - CombinatoricsUtils.binomialCoefficientLog(a+r,a);
return NaturalLogUtils.logSumExp(altMinorLogLikelihood, refMinorLogLikelihood, outlierLogLikelihood);
}
static double segmentLogLikelihood(final AlleleFractionGlobalParameters parameters,
final double minorFraction,
final List allelicCountsInSegment) {
return allelicCountsInSegment.stream()
.mapToDouble(allelicCount -> hetLogLikelihood(parameters, minorFraction, allelicCount))
.sum();
}
/**
* The total log likelihood of all segments.
*/
static double logLikelihood(final AlleleFractionGlobalParameters parameters,
final AlleleFractionState.MinorFractions minorFractions,
final AlleleFractionSegmentedData data) {
return IntStream.range(0, data.getNumSegments())
.mapToDouble(segment -> segmentLogLikelihood(parameters, minorFractions.get(segment), data.getIndexedAllelicCountsInSegment(segment)))
.sum();
}
/**
* Calculates the mode of the exact allelic-bias posterior at given values of the hyperparameters for the
* * allelic-bias Gamma-distribution prior, the minor-allele fraction parameter, and the observed
* counts at a site. See docs/CNV/archived/archived-CNV-methods.pdf (where this quantity is referred to as lambda_0) for details.
* @param alpha alpha hyperparameter for allelic-bias Gamma-distribution prior
* @param beta beta hyperparameter for allelic-bias Gamma-distribution prior
* @param f minor-allele fraction
* @param a alt counts
* @param r ref counts
*/
private static double biasPosteriorMode(final double alpha, final double beta, final double f, final int a, final int r) {
final double w = (1 - f) * (a - alpha + 1) + beta * f;
return Math.max((sqrt(w * w + 4 * beta * f * (1 - f) * (r + alpha - 1)) - w) / (2 * beta * (1 - f)), EPSILON);
}
/**
* Calculates the curvature (second derivative at the mode) of the exact allelic-bias log posterior
* at given values of the hyperparameters for the allelic-bias Gamma-distribution prior,
* the minor-allele fraction parameter, and the observed counts at a site.
* See docs/CNV/archived/archived-CNV-methods.pdf (where this quantity is referred to as kappa) for details.
* @param alpha alpha hyperparameter for allelic-bias Gamma-distribution prior
* @param f minor-allele fraction
* @param r ref counts
* @param n total counts
* @param lambda0 mode of allelic-bias posterior
*/
private static double biasPosteriorCurvature(final double alpha, final double f, final int r, final int n, final double lambda0) {
final double y = (1 - f) / (f + (1 - f) * lambda0);
return n * y * y - (r + alpha - 1) / (lambda0 * lambda0);
}
/**
* Calculates the effective alpha hyperparameter for the Gamma-distribution approximation of the exact allelic-bias posterior.
* See docs/CNV/archived/archived-CNV-methods.pdf (where this quantity is referred to as rho) for details.
* @param lambda0 mode of allelic-bias posterior
* @param kappa curvature of allelic-bias posterior
*/
private static double biasPosteriorEffectiveAlpha(final double lambda0, final double kappa) {
return Math.max(1 - kappa * lambda0 * lambda0, EPSILON);
}
/**
* Calculates the effective beta hyperparameter for the Gamma-distribution approximation of the exact allelic-bias posterior.
* See docs/CNV/archived/archived-CNV-methods.pdf (where this quantity is referred to as tau) for details.
* @param lambda0 mode of allelic-bias posterior
* @param kappa curvature of allelic-bias posterior
*/
private static double biasPosteriorEffectiveBeta(final double lambda0, final double kappa) {
return Math.max(-kappa * lambda0, EPSILON);
}
private static double log(final double x) {
return Math.log(Math.max(EPSILON, x));
}
}