All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.imputation.CrossProgenyEmissionMatrix Maven / Gradle / Ivy

package net.maizegenetics.analysis.imputation;

import java.util.Arrays;
import java.util.List;

import net.maizegenetics.analysis.imputation.EmissionProbability;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;

public class CrossProgenyEmissionMatrix extends EmissionProbability {
	/*
	 * p(obs|state) matrix for each site 
	 * use depth to calculate p(obs) for each site/taxon
	 * 
	 * 4 state model, states are:
	 *  0 = h00/h10 [parent 0, haplotype 0 / parent 1, haplotype 0]
	 *  1 = h00/h11
	 *  2 = h01/h10
	 *  3 = h01/h11
	 *  obs are nucleotide genotypes
	 */
	
	private static final byte N = GenotypeTable.UNKNOWN_ALLELE;
	private static final byte NN = GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
	
	private byte[][][] parentHaplotypes; //indexes are parent, chromosome, site
	private GenotypeTable progenyGenotypes;  //the original genotypes for the progeny of interest, not all taxa
	private double[][] parentHaplotypeProbabilities; //first index is in order h00, h01, h10, h11
	//TODO replace obsGivenState with method that accounts for depth
	private double[][] obsGivenState = new double[3][3]; //rows are imputed (assumed true), columns are observed
	private int myTaxonIndex;
	private int[] mySiteIndices;
	private boolean useProbabilityTables;
	
	/**
	 * @param hap	parent haplotypes
	 */
	public CrossProgenyEmissionMatrix(byte[][][] hap, GenotypeTable originalGenotypes, int taxonIndex, int[] siteIndices) {
		parentHaplotypes = hap;
		progenyGenotypes = originalGenotypes;
		useProbabilityTables = false;
		myTaxonIndex = taxonIndex;
		mySiteIndices = siteIndices;
	}
	
	public CrossProgenyEmissionMatrix(double[][] hapProbs, GenotypeTable originalGenotypes, double[][] stateByObs, int taxonIndex, int[] siteIndices) {
		parentHaplotypeProbabilities = hapProbs;
		progenyGenotypes = originalGenotypes;
		obsGivenState = stateByObs;
		useProbabilityTables = true;
		myTaxonIndex = taxonIndex;
		mySiteIndices = siteIndices;
	}
	
	@Override
	public double getProbObsGivenState(int state, int obs) {
		return Double.NaN;
	}

	@Override
	public double getProbObsGivenState(int state, int obs, int site) {
		if (useProbabilityTables) return probObsStateFromPhasedProgeny(state, obs, mySiteIndices[site]);
		return probObsStateUsingParentHaps(state, obs, mySiteIndices[site]);
	}
	
	private double probObsStateUsingParentHaps(int state, int obs, int site) {
		//obs = byte representing a diploid genotype as encoded by NucleotideAlignmentConstants
		
		//are the parent haplotypes known at this site
		//if not then base p(obs) on originalGenotypes
		byte obsgeno = (byte) obs;
		byte h00 = parentHaplotypes[0][0][site];
		byte h01 = parentHaplotypes[0][1][site];
		byte h10 = parentHaplotypes[1][0][site];
		byte h11 = parentHaplotypes[1][1][site];
		
//		if (h00 == N || h01 == N || h10 == N || h11 == N) return probObsAtSite(site, obs);
		if (h00 == N || h01 == N || h10 == N || h11 == N) return 0.25;
		
		byte h00h10 = GenotypeTableUtils.getUnphasedDiploidValue(h00, h10);
		byte h00h11 = GenotypeTableUtils.getUnphasedDiploidValue(h00, h11);
		byte h01h10 = GenotypeTableUtils.getUnphasedDiploidValue(h01, h10);
		byte h01h11 = GenotypeTableUtils.getUnphasedDiploidValue(h01, h11);
		
		//all parent haplotypes are known
		switch (state) {
		case 0:
			//state = h00/h10
			if (GenotypeTableUtils.isHeterozygous(h00h10)) {
				if (GenotypeTableUtils.isEqual(h00h10, obsgeno)) return 1 - probHetAsHom(site);
				else if (!GenotypeTableUtils.isHeterozygous(obsgeno))return probHetAsHom(site);
				else return 0.0001;
			} else {
				if (obsgeno == h00h10) return 0.998;
				else if (GenotypeTableUtils.isHeterozygous(obsgeno)) return 0.001;
				else return 0.001;
			}
		case 1:
			//state = h00/h11
			if (GenotypeTableUtils.isHeterozygous(h00h11)) {
				if (GenotypeTableUtils.isEqual(h00h11, obsgeno)) return 1 - probHetAsHom(site);
				else if (!GenotypeTableUtils.isHeterozygous(obsgeno)) return probHetAsHom(site);
				else return 0.0001;
			} else {
				if (obsgeno == h00h11) return 0.998;
				else if (GenotypeTableUtils.isHeterozygous(obsgeno)) return 0.001;
				else return 0.001;
			}
		case 2:
			//state = h01/h10
			if (GenotypeTableUtils.isHeterozygous(h01h10)) {
				if (GenotypeTableUtils.isEqual(h01h10, obsgeno)) return 1 - probHetAsHom(site);
				else if (!GenotypeTableUtils.isHeterozygous(obsgeno))return probHetAsHom(site);
				else return 0.0001;
			} else {
				if (obsgeno == h01h10) return 0.998;
				else if (GenotypeTableUtils.isHeterozygous(obsgeno)) return 0.001;
				else return 0.001;
			}
		case 3:
			//state = h01/h11
			if (GenotypeTableUtils.isHeterozygous(h01h11)) {
				if (GenotypeTableUtils.isEqual(h01h11, obsgeno)) return 1 - probHetAsHom(site);
				else if (!GenotypeTableUtils.isHeterozygous(obsgeno))return probHetAsHom(site);
				else return 0.0001;
			} else {
				if (obsgeno == h01h11) return 0.998;
				else if (GenotypeTableUtils.isHeterozygous(obsgeno)) return 0.001;
				else return 0.001;
			}
		default:
			throw new IllegalArgumentException("Illegal state as input to probObsStateUsingParentHaps().");
		}
	}
	
	private double probObsStateFromPhasedProgeny(int state, int obs, int site) {
		//obs = byte representing a diploid genotype as endoded by NucleotideAlignmentConstants
		
		byte obsgeno = (byte) obs;
		byte[] obsAlleles = GenotypeTableUtils.getDiploidValues(obsgeno);
		byte major = progenyGenotypes.majorAllele(site);
		byte minor = progenyGenotypes.minorAllele(site);
		
		//otherwise
		//0 = h0/h0, 1 = h0/h1, 2 = h1/h1
		//note if the code reaches this point, h0 != h1
		
		//P(obs|state=0) = P(obs|state=a) * P(state=a|state=0) + P(obs|state=h) * P(state=h|state=0) + P(obs|state=b) * P(state=b|state=0)
		double pHomMajor;
		double pHet;
		double pHomMinor;
		double pMajorh00 = parentHaplotypeProbabilities[0][site];
		double pMajorh01 = parentHaplotypeProbabilities[1][site];
		double pMajorh10 = parentHaplotypeProbabilities[2][site];
		double pMajorh11 = parentHaplotypeProbabilities[3][site];
		switch (state) {
		case 0:
			//state = h00/h10
			//following are probabilities given the state
			pHomMajor = pMajorh00 * pMajorh10;
			pHet = pMajorh00 *(1 - pMajorh10) + ( 1 - pMajorh00) * pMajorh10;
			pHomMinor = (1 - pMajorh00) *(1 - pMajorh10);
			break;
		case 1:
			//state = h00/h11
			pHomMajor = pMajorh00 * pMajorh11;
			pHet = pMajorh00 *(1 - pMajorh11) + ( 1 - pMajorh00) * pMajorh11;
			pHomMinor = (1 - pMajorh00) *(1 - pMajorh11);
			break;
		case 2:
			//state = h01/h10
			pHomMajor = pMajorh01 * pMajorh10;
			pHet = pMajorh01 *(1 - pMajorh10) + ( 1 - pMajorh01) * pMajorh10;
			pHomMinor = (1 - pMajorh01) *(1 - pMajorh10);
			break;
		case 3:
			//state = h01/h11
			pHomMajor = pMajorh01 * pMajorh11;
			pHet = pMajorh01 *(1 - pMajorh11) + ( 1 - pMajorh01) * pMajorh11;
			pHomMinor = (1 - pMajorh01) *(1 - pMajorh11);
			break;
		default:
			throw new IllegalArgumentException("Illegal state as input to probObsStateFromPhasedProgeny().");
		}
		
		if (obsgeno == GenotypeTableUtils.getDiploidValue(major, major)) {
			return obsGivenState[0][0] * pHomMajor + obsGivenState[1][0] * pHet + obsGivenState[2][0] * pHomMinor;
		} else if (GenotypeTableUtils.isEqual(obsgeno, GenotypeTableUtils.getDiploidValue(major, minor))) {
			return obsGivenState[0][1] * pHomMajor + obsGivenState[1][1] * pHet + obsGivenState[2][1] * pHomMinor;
		} else if (obsgeno == GenotypeTableUtils.getDiploidValue(minor, minor)){
			return obsGivenState[0][2] * pHomMajor + obsGivenState[1][2] * pHet + obsGivenState[2][2] * pHomMinor;
		} return Double.NaN;

	}
	
	private double probObsAtSite(int site, int obs) {
		double dtotal = progenyGenotypes.totalNonMissingForSite(site);
		byte obsgeno = (byte) obs;
		double pval;
		if (GenotypeTableUtils.isHeterozygous(obsgeno)) {
			int obscount = 0;
			for (int t = 0; t < progenyGenotypes.numberOfTaxa(); t++) {
				if (GenotypeTableUtils.isEqual(obsgeno, progenyGenotypes.genotype(t,site))) obscount++;
			}
			pval = obscount / dtotal;
		} else {
			int obscount = 0;
			for (int t = 0; t < progenyGenotypes.numberOfTaxa(); t++) {
				if (progenyGenotypes.genotype(t,site) == obsgeno) obscount++;
			}
			pval = obscount / dtotal;
		}
		
		return pval;
	}
		
	private double probHetAsHom(int site) {
		int totalDepth = progenyGenotypes.depth().depth(myTaxonIndex, site);
		if (totalDepth == 1) return 0.99;
		return Math.pow(0.5, totalDepth - 1);
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy