net.maizegenetics.analysis.imputation.FILLINImputationUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
The newest version!
package net.maizegenetics.analysis.imputation;
import com.google.common.collect.*;
import com.google.common.primitives.Ints;
import net.maizegenetics.analysis.popgen.DonorHypoth;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.util.BitSet;
import net.maizegenetics.util.BitUtil;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import static net.maizegenetics.dna.snp.GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
import static net.maizegenetics.dna.WHICH_ALLELE.Major;
import static net.maizegenetics.dna.WHICH_ALLELE.Minor;
import static net.maizegenetics.dna.snp.GenotypeTableUtils.isHeterozygous;
/**
* Basic utility functions to support imputation by blocks.
*
* @author Ed Buckler
*/
public class FILLINImputationUtils {
/**
* Counts union and intersection of major and minor alleles between the target genotype and potential
* donor genotypes. The counts are done by 64 sites block. These counts can be quickly used to estimate
* distance for the set of blocks.
* @param modBitsOfTarget major and minor presence bits for target genotype (must be aligned same as donor)
* @param donorAlign genotypeTable with potential donor genotypes
* @return array with [donor index][sites, same count, diff count, het count index][block index]
*/
public static byte[][][] calcAllelePresenceCountsBtwTargetAndDonors(BitSet[] modBitsOfTarget, GenotypeTable donorAlign) {
int blocks=modBitsOfTarget[0].getNumWords();
byte[][][] allDist=new byte[donorAlign.numberOfTaxa()][4][blocks];
long[] iMj=modBitsOfTarget[0].getBits();
long[] iMn=modBitsOfTarget[1].getBits();
for (int donor1 = 0; donor1 < allDist.length; donor1++) {
long[] jMj=donorAlign.allelePresenceForAllSites(donor1, Major).getBits();
long[] jMn=donorAlign.allelePresenceForAllSites(donor1, Minor).getBits();
for (int i = 0; i bestDonors=MinMaxPriorityQueue.orderedBy(DonorHypoth.byErrorRateOrdering)
.maximumSize(maxDonorHypotheses).create();
for (int d1 : donor1indices) {
int sameCnt = 0, diffCnt = 0, hetCnt = 0;
for (int i = firstBlock; i <=lastBlock; i++) {
sameCnt+=targetToDonorDistances[d1][1][i];
diffCnt+=targetToDonorDistances[d1][2][i];
hetCnt+=targetToDonorDistances[d1][3][i];
}
int testSites= sameCnt + diffCnt - hetCnt;
if(testSites bd=new HashMap<>();
for (int i = 0; i < allDH.length; i++) {
int rank=allDH[i].length;
for (DonorHypoth dh: allDH[i]) {
if (dh==null) continue;
if (bd.containsKey(dh.donor1Taxon)) { //counts the frequency of best hit
bd.put(dh.donor1Taxon, bd.get(dh.donor1Taxon)+ rank);
} else {
bd.put(dh.donor1Taxon, rank);
}
rank--;
//break;//this line allows only the best hypothesis to go into set (original method)
}
}
TreeMultimap rankings= TreeMultimap.create(Ordering.natural().reverse(),Ordering.natural());
for (Map.Entry dhs : bd.entrySet()) {
rankings.put(dhs.getValue(),dhs.getKey());
}
int resultSize=(rankings.size() iDH=rankings.values().iterator();
for (int i=0; i bestDonors=MinMaxPriorityQueue.orderedBy(DonorHypoth.byErrorRateOrdering)
.maximumSize(maxDonorHypotheses).create();
long[] mj1=donorAlign.allelePresenceForSitesBlock(d1, Major, firstBlock, lastBlock + 1);
long[] mn1=donorAlign.allelePresenceForSitesBlock(d1, Minor, firstBlock, lastBlock + 1);
for (int d2 : donor2Indices) {
long[] mj2=donorAlign.allelePresenceForSitesBlock(d2, Major, firstBlock, lastBlock + 1);
long[] mn2=donorAlign.allelePresenceForSitesBlock(d2, Minor, firstBlock, lastBlock + 1);
int[] mendErr=mendelErrorComparison(mjT, mnT, mj1, mn1, mj2, mn2);
if(mendErr[1] tests=HashMultimap.create();
for (int d1 : donor1Indices) {
for (int d2 : donor2Indices) {
if(d1 bestDonors=MinMaxPriorityQueue.orderedBy(DonorHypoth.byErrorRateOrdering)
.maximumSize(maxDonorHypotheses).create();
for (int d1 : tests.keySet()) {
int[] d2donors=Ints.toArray(tests.get(d1));
DonorHypoth[] oneDimenHypoth=findHeterozygousDonorHypoth(targetTaxon, mjT, mnT, firstBlock, lastBlock,
focusBlock, donorAlign, d1, d2donors, maxDonorHypotheses, minTestSites);
for (DonorHypoth donorHypoth : oneDimenHypoth) {bestDonors.add(donorHypoth);}
}
DonorHypoth[] result=bestDonors.toArray(new DonorHypoth[0]);
Arrays.sort(result,DonorHypoth.byErrorRateOrdering); //Ques keep the top values, but not ordered
return result;
}
/**
* Combines arrays of donorHypoth, sorts them, and returns the best limited by maxDonorHypotheses
* @param maxDonorHypotheses maximum number of donor hypotheses to retain
* @param dhs arrays of DonorHypoth[]
* @return combined arrays of DonorHypoth (length<=maxDonorHypotheses) ordered by error rate
*/
public static DonorHypoth[] combineDonorHypothArrays(int maxDonorHypotheses, DonorHypoth[] ... dhs) {
MinMaxPriorityQueue bestDonors=MinMaxPriorityQueue.orderedBy(DonorHypoth.byErrorRateOrdering)
.maximumSize(maxDonorHypotheses).create();
for (DonorHypoth[] aDHArray : dhs) {
for (DonorHypoth donorHypoth : aDHArray) {
bestDonors.add(donorHypoth);
}
}
DonorHypoth[] result=bestDonors.toArray(new DonorHypoth[0]);
Arrays.sort(result,DonorHypoth.byErrorRateOrdering); //Ques keep the top values, but not ordered
return result;
}
/**
* Given a start 64 site block, it expands to the left and right until it hits
* the minimum Minor Site count or MajorSiteCount in the target taxon
* @param mnT - minor allele bit presence in a series of longs
* @param focusBlock index of the focus block
* @param minMinorCnt minimum count to stop expanding for minor allele sites
* @param minMajorCnt minimum count to stop expanding for major allele sites
* @return arrays of blocks {startBlock, focusBlock, endBlock}
*/
public static int[] getBlockWithMinMinorCount(long[] mjT, long[] mnT, int focusBlock, int minMinorCnt,
int minMajorCnt) {
int blocks=mjT.length;
int majorCnt=Long.bitCount(mjT[focusBlock]);
int minorCnt=Long.bitCount(mnT[focusBlock]);
int endBlock=focusBlock, startBlock=focusBlock;
while((minorCnt