picard.fingerprint.HaplotypeProbabilities Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of picard Show documentation
picard
There is a newer version: 3.3.0
/*
 * The MIT License
 *
 * Copyright (c) 2015 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package picard.fingerprint;

import java.util.Arrays;

import static java.lang.Math.log10;
import static picard.util.MathUtil.multiply;
import static picard.util.MathUtil.pNormalizeVector;

/**
 * Abstract class for storing and calculating various likelihoods and probabilities
 * for haplotype alleles given evidence.
 *
 * @author Tim Fennell
 */
public abstract class HaplotypeProbabilities {

    private final HaplotypeBlock haplotypeBlock;

    protected HaplotypeProbabilities(final HaplotypeBlock haplotypeBlock) {
        this.haplotypeBlock = haplotypeBlock;
    }

    /** Returns the haplotype for which the probabilities apply. */
    public HaplotypeBlock getHaplotype() {
        return this.haplotypeBlock;
    }

    public double [] getPriorProbablities(){
        return getHaplotype().getHaplotypeFrequencies();
    }

    /** Returns the probabilities, in order, of the AA, Aa and aa haplotypes.

     * Mathematically, this is P(H | D, F) where and H is the vector of possible haplotypes {AA,Aa,aa}.
     * D is the data seen by the class, and
     * F is the population frequency of each genotype.
     */
    /** Returns the posterior probabilities using the population frequency as a prior. */
    public double[] getPosteriorProbabilities() {
        return pNormalizeVector(multiply(getLikelihoods(), getPriorProbablities()));}

    /**
     * Returns the likelihoods, in order, of the AA, Aa and aa haplotypes given the evidence
     *
     * Mathematically this is P(evidence | haplotype) where haplotype={AA,Aa,aa}.
     */
    public abstract double[] getLikelihoods();

    public double[] getLogLikelihoods() {
        final double[] likelihoods = getLikelihoods();
        final double[] lLikelihoods = new double [likelihoods.length];
        for (int i = 0; i < likelihoods.length; ++i) {
            lLikelihoods[i] = Math.log10(likelihoods[i]);
        }
        return lLikelihoods;
    }

    /**
     * Returns a representative SNP for this haplotype. Different subclasses may implement this in
     * different ways, but should do so in a deterministic/repeatable fashion.
     */
    public abstract Snp getRepresentativeSnp();

    /**
     * Returns the number of observations of alleles supporting the first/major haplotype allele.
     * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
     * a default implementation is provided here.
     * @return int
     */
    public int getObsAllele1() { return 0; }

    /**
     * Returns the number of observations of alleles supporting the second/minor haplotype allele.
     * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
     * a default implementation is provided here.
     * @return int
     */
    public int getObsAllele2() { return 0; }

    /**
     * Returns the total number of observations of any allele.
     * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
     * a default implementation is provided here.
     * @return int
     */
    public int getTotalObs() { return 0; }

    /** Returns true if evidence has been added, false if the probabilities are just the priors. */
    public boolean hasEvidence() {
        return true;
    }

	/** Merges in the likelihood information from the supplied haplotype probabilities object. */
	public abstract void merge(final HaplotypeProbabilities other);

    /**
     * Returns the index of the highest probability which can then be used to look up
     * DiploidHaplotypes or DiploidGenotypes as appropriate.
     */
    int getMostLikelyIndex() {
        final double[] probs = getPosteriorProbabilities();

        if (probs[0] > probs[1] && probs[0] > probs[2]) return 0;
        else if (probs[1] > probs[2]) return 1;
        else return 2;
    }

    /** Gets the most likely haplotype given the probabilities. */
    public DiploidHaplotype getMostLikelyHaplotype() {
        return DiploidHaplotype.values()[getMostLikelyIndex()];
    }

    /** Gets the genotype for this Snp given the most likely haplotype. */
    public DiploidGenotype getMostLikelyGenotype(final Snp snp) {
        assertSnpPartOfHaplotype(snp);
        return snp.getGenotype(getMostLikelyHaplotype());
    }

    /** Throws an exception if the passed SNP is not part of this haplotype. */
    void assertSnpPartOfHaplotype(final Snp snp) {
        if (!this.haplotypeBlock.getSnps().contains(snp)) {
            throw new IllegalArgumentException("Snp " + snp + " does not belong to haplotype " + this.haplotypeBlock);
        }
    }

    /** This function returns the scaled probability of the evidence collected
     * given a vector of priors on the haplotype using the internal likelihood, which may be
     * scaled by an unknown factor. This factor causes the result to be scaled, hence the name.
     *
     * Mathematically:
     *
     * P(Evidence| P(h_i)=F_i) = \sum_i P(Evidence | h_i) P(h_i)
     *                         = \sum_i P(Evidence | h_i) F_i
     *                         = c * \sum_i Likelihood_i * F_i
     *
     * Here, h_i are the three possible haplotypes, F_i are the given priors, and Likelihood_i
     * are the stored likelihoods which are scaled from the actually likelihoods by an unknown
     * factor, c. Note that the calculation ignores the internal haplotype probabilities (i.e. priors)
     *
     * @param genotypeFrequencies vector of (possibly scaled) probabilities of the three haplotypes
     * @return P(evidence | P_h)) / c
     */

    public double scaledEvidenceProbabilityUsingGenotypeFrequencies(final double[] genotypeFrequencies) {
        final double[] likelihoods = getLikelihoods();
        assert (genotypeFrequencies.length == likelihoods.length);

        double result = 0;
        for (int i = 0; i < likelihoods.length; ++i) {
            result += likelihoods[i] * genotypeFrequencies[i];
        }
        return result;
    }

    public double shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(final double[] genotypeFrequencies) {
        return log10(scaledEvidenceProbabilityUsingGenotypeFrequencies(genotypeFrequencies));
    }

    /**
     * returns the log-probability the evidence, using as priors the posteriors of another object
     *
     * @param otherHp an additional HaplotypeProbabilities object representing the same underlying HaplotypeBlock
     * @return  log10( P(evidence| P(h_i)=P(h_i|otherHp) ) + c where c is an unknown constant
     */
    public double shiftedLogEvidenceProbabilityGivenOtherEvidence(final HaplotypeProbabilities otherHp) {
        if (!this.haplotypeBlock.equals(otherHp.getHaplotype())) {
            throw new IllegalArgumentException("Haplotypes are from different HaplotypeBlocks!");
        }
        /** Get the posterior from the other otherHp. Use this posterior as the prior to calculate probability.
         *
         *   P(hap|x,y) = P(x|hap,y) P(hap|y) / P(x|y)
         *              = P(x | hap) * P(hap | y) / P(x)
         *                likelihood * other.posterior
         *
         *              = P(x|hap) P(y|hap) P(hap)/P(x)P(y)
         *              = A P(x| hap) P(y| hap) P(hap)  # where A is an unknown scaling factor
         */
        return shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(otherHp.getPosteriorProbabilities());
    }

    /**
     * Returns log (p(evidence)) + c assuming that the prior on haplotypes is given by
     * the internal haplotypeFrequencies
     */
    public double shiftedLogEvidenceProbability() {
        return shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(getPriorProbablities());
    }

    /** Returns the LOD score between the most probable haplotype and the second most probable. */
    public double getLodMostProbableGenotype() {
        final double[] probs = getPosteriorProbabilities();
        final double[] logs = new double[probs.length];
        for (int i = 0; i < probs.length; ++i) {
            logs[i] = log10(probs[i]);
        }

        Arrays.sort(logs);
        return logs[2] - logs[1];
    }

    /** Log10(P(evidence| haplotype)) for the 3 different possible haplotypes
     * {aa, ab, bb}
     */

    //an enum whose only role in life is to help iterate over the 3 possible diploid genotypes
    protected enum Genotype {
        HOM_ALLELE1(0),
        HET_ALLELE12(1),
        HOM_ALLELE2(2);

        int v; //value is the number of chromosomes in the genotype that have ALLELE2.

        Genotype(final int v) {
            this.v = v;
        }
    }
}