net.maizegenetics.analysis.imputation.FILLINImputationAccuracy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package net.maizegenetics.analysis.imputation;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.InputMismatchException;
import java.util.Random;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTable;
import static net.maizegenetics.dna.snp.GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import static net.maizegenetics.dna.snp.GenotypeTableUtils.isHeterozygous;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import static net.maizegenetics.dna.snp.NucleotideAlignmentConstants.GAP_DIPLOID_ALLELE;
import net.maizegenetics.dna.snp.genotypecall.GenotypeCallTableBuilder;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.ArrayUtils;
/**
* A class to mask input files, hold information related to accuracy calculation,
* and calculate accuracy for FILLINImputationPlugin
* @author kls283
*/
public class FILLINImputationAccuracy {
private GenotypeTable maskKey= null;
private GenotypeTable unimp= null;
private GenotypeTable imputed= null;
private GenotypeTable[] donor= null;
private String outFile= null;
private double propSitesMask= 0.01;
private int depthToMask= 7;
private double propDepthSitesToMask= 0.2;
public int[] MAF= null;
private double[][] all= null; //arrays held ("columns"): 0-maskedMinor, 1-maskedHet, 2-maskedMajor; each array ("rows"):0-to minor, 1-to het, 2-to major, 3-unimp, 4-total for known type
private double[][][] mafAll= null;//sam as all, but first array holds MAF category
private double[] MAFClass= null;
private boolean verboseOutput= true;
public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donors, double propSitesMask, int depthToMask, double propDepthSitesToMask, String outFileName, double[] MAFClass, boolean verbose) {
this.unimp= unimp;
this.maskKey= maskKey;
this.donor= donors;
this.propSitesMask= propSitesMask;
this.depthToMask= depthToMask;
this.propDepthSitesToMask= propDepthSitesToMask;
this.MAFClass= MAFClass;
this.verboseOutput= verbose;
this.outFile= outFileName;
}
public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, String outFileName, boolean verbose) {
this.unimp= unimp;
this.maskKey= maskKey;
this.verboseOutput= verbose;
this.outFile= outFileName;
}
public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donor, String outFileName, boolean verbose) {
this.unimp= unimp;
this.maskKey= maskKey;
this.donor= donor;
this.verboseOutput= verbose;
this.outFile= outFileName;
}
public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donor, double[] MAFClass, String outFileName, boolean verbose) {
this.unimp= unimp;
this.maskKey= maskKey;
this.donor= donor;
this.MAFClass= MAFClass;
this.verboseOutput= verbose;
this.outFile= outFileName;
}
public GenotypeTable initiateAccuracy() {
if (this.MAFClass!=null) {//if mafClass not null, assign MAF categories to sites in unimputed
generateMAF();
this.mafAll= new double[this.MAFClass.length][3][5];
if (this.verboseOutput) System.out.println("Calculating accuracy within supplied MAF categories.");
}
//mask file if not already masked (depth or proportion) and generate key
if (this.maskKey!=null) {
if (this.verboseOutput) System.out.println("File already masked. Use input key file for calculating accuracy");
boolean goodKey= true;
if (Arrays.equals(this.maskKey.physicalPositions(), this.unimp.physicalPositions())==false) goodKey= filterKeySites();
if (!goodKey && this.verboseOutput) System.out.println("Problem with input key file. Masking unimputed input");
else return this.unimp;
}
if (this.unimp.hasDepth() && this.depthToMask>0) {
boolean works= maskFileByDepth();
if (works) return this.unimp;
}
maskPropSites();
return this.unimp;
}
private boolean generateMAF() {
this.MAF= new int[this.unimp.numberOfSites()];
for (GenotypeTable don:this.donor) {
for (int site = 0; site < don.numberOfSites(); site++) {
int unimpSite= this.unimp.positions().indexOf(don.positions().get(site));
if (unimpSite < 0) {this.MAF[unimpSite]= -1; continue;}
int search= Arrays.binarySearch(this.MAFClass, don.minorAlleleFrequency(site));
this.MAF[unimpSite]= search<0?Math.abs(search)-1:search;
}
}
return true;
}
private boolean maskFileByDepth() {
if (verboseOutput) System.out.println("Masking file using depth\nSite depth to mask: "+this.depthToMask+"\nProportion of depth sites to be masked: "+this.propDepthSitesToMask);
GenotypeCallTableBuilder mask= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
GenotypeCallTableBuilder key= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
int cnt= 0;
for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {
int taxaCnt= 0;
mask.setBaseRangeForTaxon(taxon, 0, this.unimp.genotypeAllSites(taxon));
for (int site = 0; site < this.unimp.numberOfSites(); site++) {
if (GenotypeTableUtils.isEqual(GenotypeTable.UNKNOWN_DIPLOID_ALLELE, this.unimp.genotype(taxon, site))) continue;
int[] currD= this.unimp.depthForAlleles(taxon, site);
if (currD[0]+currD[1]!=this.depthToMask) continue;
if (Math.random()>this.propDepthSitesToMask) continue;
else if ((this.unimp.isHeterozygous(taxon, site)==false) ||
(depthToMask > 3 && currD[0] > 1 && currD[1] > 1)||
(depthToMask < 4)) {
mask.setBase(taxon, site, GenotypeTable.UNKNOWN_DIPLOID_ALLELE); key.setBase(taxon, site, this.unimp.genotype(taxon, site)); taxaCnt++;
}
}
if (this.verboseOutput) System.out.println(taxaCnt+" sites masked for "+this.unimp.taxaName(taxon)); cnt+= taxaCnt;
}
if (cnt<2000 && this.verboseOutput) {System.out.println("Insufficient sites masked with depth. Calculate accuracy by proportion"); return false;}
if (this.verboseOutput) System.out.println(cnt+" sites masked at a depth of "+this.depthToMask);
this.maskKey= GenotypeTableBuilder.getInstance(key.build(), this.unimp.positions(), this.unimp.taxa());
this.unimp= GenotypeTableBuilder.getInstance(mask.build(), this.unimp.positions(), this.unimp.taxa());
return true;
}
private boolean maskPropSites() {
if (this.verboseOutput) System.out.println("Masking file without depth\nMasking "+this.propSitesMask+" proportion of sites");
GenotypeCallTableBuilder mask= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
GenotypeCallTableBuilder key= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
int presGenos= 0;
for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {presGenos+= this.unimp.totalNonMissingForTaxon(taxon);}
int expected= (int)(this.propSitesMask*(double)presGenos);
int cnt= 0;
for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {
int taxaCnt= 0;
mask.setBaseRangeForTaxon(taxon, 0, this.unimp.genotypeAllSites(taxon));
for (int site = 0; site < this.unimp.numberOfSites(); site++) {
if (Math.random() keepSites= new ArrayList<>();
boolean working= true;
if (Arrays.equals(this.unimp.chromosomes(), this.maskKey.chromosomes())==false) working= matchChromosomes();
if (!working) {System.out.println("No overlapping chromosomes"); return false;}
PositionList maskPos= this.maskKey.positions();
for (Position p:this.unimp.positions()) {
if (maskPos.contains(p)) keepSites.add(p.getSNPID());
else {
System.out.println("Not all sites in target imputation file contained in mask key. Not using mask");
return false;
}
}
GenotypeTable filter= FilterGenotypeTable.getInstance(this.maskKey, keepSites.toArray(new String[keepSites.size()]));
this.maskKey= filter;//GenotypeTableBuilder.getGenotypeCopyInstance(filter); //Change this back when GenotypeCopyInstance fixed
if (verboseOutput) System.out.println("Sites in new mask: "+this.maskKey.numberOfSites());
//check to see if all taxa in unimputed are in the mask file
if (!this.maskKey.taxa().equals(this.unimp.taxa())) {
if (this.maskKey.taxa().containsAll(this.unimp.taxa()))
this.maskKey= FilterGenotypeTable.getInstance(this.maskKey, this.unimp.taxa());
else {
System.out.println("Not all taxa in target imputation file contained in mask key. Not using mask");
return false;
}
}
System.out.println("Filtering key took "+(System.currentTimeMillis()-time)+" milliseconds");
return true;
}
private boolean matchChromosomes() {
Chromosome[] unimpChr= this.unimp.chromosomes();
Chromosome[] keyChr= this.maskKey.chromosomes();
ArrayList keepSites= new ArrayList<>();
for (Chromosome chr:unimpChr) { //if any of the chromosomes in input do not exist in key, return false (which then masks proportion)
if (Arrays.binarySearch(keyChr, chr)<0) return false;
}
for (Chromosome chr:keyChr) { //keep sites on key that are on matching chromosomes
if (Arrays.binarySearch(unimpChr, chr)>-1) {
int[] startEnd = this.maskKey.firstLastSiteOfChromosome(chr);
for (int site = startEnd[0]; site <= startEnd[1]; site++) {
keepSites.add(site);
}
}
}
GenotypeTable filter= FilterGenotypeTable.getInstance(this.maskKey, ArrayUtils.toPrimitive(keepSites.toArray(new Integer[keepSites.size()])));
this.maskKey= filter;//GenotypeTableBuilder.getGenotypeCopyInstance(filter);//Change this back when GenotypeCopyInstance fixed
if (verboseOutput) System.out.println(this.maskKey.numberOfSites()+" sites retained after chromsome filter");
return true;
}
//this is the sample multiple r2.
private double pearsonR2(double[][] all, boolean verbose) {
int size= 0;
for (int x = 0; x < 3; x++) {size+= (all[x][4]-all[x][3]);}
double[][] xy= new double[2][size]; //0 is x, 1 is y
int last= 0;//the last index filled
for (double x = 0; x < 3; x++) { for (double y = 0; y < 3; y++) {
for (int fill = last; fill < last+all[(int)x][(int)y]; fill++) {
xy[0][fill]= x;
xy[1][fill]= y;
}
last= last+(int)all[(int)x][(int)y];
}}
double meanX= 0; double meanY= 0; double varX= 0; double varY= 0; double covXY= 0; double r2= 0.0;
for (int i = 0; i < xy[0].length; i++) {meanX+=xy[0][i]; meanY+= xy[1][i];}
meanX= meanX/(xy[0].length-1); meanY= meanY/(xy[1].length-1);
double currX, currY;
for (int i = 0; i < xy[0].length; i++) {
currX= xy[0][i]-meanX; currY= xy[1][i]-meanY;
varX+= currX*currX; varY+= currY*currY;
covXY+= currX*currY;
}
r2= (covXY/(Math.sqrt(varX)*Math.sqrt(varY)))*(covXY/(Math.sqrt(varX)*Math.sqrt(varY)));
if (verbose) System.out.println("Unadjusted R2 value for "+size+" comparisons: "+r2);
return r2;
}
private void accuracyOut(double time) {
DecimalFormat df = new DecimalFormat("0.########");
double r2= pearsonR2(this.all, this.verboseOutput);
try {
String ext= FilenameUtils.getExtension(this.outFile);
File outputFile = new File(this.outFile.replace(ext,"Accuracy.txt"));
DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
outStream.writeBytes("##Taxon\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumMinor\tCorrectMinor\tMinorToHet\tMinorToMajor\tUnimpMinor"
+ "\tNumHets\tHetToMinor\tCorrectHet\tHetToMajor\tUnimpHet\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimpMajor\tR2\n");
outStream.writeBytes("##TotalByImputed\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4])+"\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4]-this.all[0][3]-this.all[1][3]-this.all[2][3])+"\t"+
((this.all[0][3]+this.all[1][3]+this.all[2][3])/(this.all[0][4]+this.all[1][4]+this.all[2][4]))+"\t"+this.all[0][4]+"\t"+this.all[0][0]+"\t"+this.all[0][1]+"\t"+this.all[0][2]+"\t"+this.all[0][3]+
"\t"+this.all[1][4]+"\t"+this.all[1][0]+"\t"+this.all[1][1]+"\t"+this.all[1][2]+"\t"+this.all[1][3]+"\t"+this.all[2][4]+"\t"+this.all[2][0]+"\t"+this.all[2][1]+"\t"+this.all[2][2]+
"\t"+this.all[2][3]+"\t"+r2+"\n");
outStream.writeBytes("#Minor=0,Het=1,Major=2;x is masked(known), y is predicted\nx\ty\tN\tprop\n"
+0+"\t"+0+"\t"+this.all[0][0]+"\t"+df.format((this.all[0][0])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
+0+"\t"+.5+"\t"+this.all[0][1]+"\t"+df.format((this.all[0][1])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
+0+"\t"+1+"\t"+this.all[0][2]+"\t"+df.format((this.all[0][2])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
+.5+"\t"+0+"\t"+this.all[1][0]+"\t"+df.format((this.all[1][0])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
+.5+"\t"+.5+"\t"+this.all[1][1]+"\t"+df.format((this.all[1][1])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
+.5+"\t"+1+"\t"+this.all[1][2]+"\t"+df.format((this.all[1][2])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
+1+"\t"+0+"\t"+this.all[2][0]+"\t"+df.format((this.all[2][0])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n"
+1+"\t"+.5+"\t"+this.all[2][1]+"\t"+df.format((this.all[2][1])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n"
+1+"\t"+1+"\t"+this.all[2][2]+"\t"+df.format((this.all[2][2])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n");
outStream.writeBytes("#Proportion unimputed:\n#minor <- "+this.all[0][3]/this.all[0][4]+"\n#het<- "+this.all[1][3]/this.all[1][4]+"\n#major<- "+this.all[2][3]/this.all[2][4]+"\n");
outStream.writeBytes("#Time to impute and calculate accuracy: "+time+" seconds");
if (this.verboseOutput) System.out.println("##Taxon\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumMinor\tCorrectMinor\tMinorToHet\tMinorToMajor\tUnimpMinor"
+ "\tNumHets\tHetToMinor\tCorrectHet\tHetToMajor\tUnimpHet\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimpMajor\tR2");
if (this.verboseOutput) System.out.println("TotalByImputed\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4])+"\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4]-this.all[0][3]-this.all[1][3]-this.all[2][3])+"\t"+
((this.all[0][3]+this.all[1][3]+this.all[2][3])/(this.all[0][4]+this.all[1][4]+this.all[2][4]))+"\t"+this.all[0][4]+"\t"+this.all[0][0]+"\t"+this.all[0][1]+"\t"+this.all[0][2]+"\t"+this.all[0][3]+
"\t"+this.all[1][4]+"\t"+this.all[1][0]+"\t"+this.all[1][1]+"\t"+this.all[1][2]+"\t"+this.all[1][3]+"\t"+this.all[2][4]+"\t"+this.all[2][0]+"\t"+this.all[2][1]+"\t"+this.all[2][2]+
"\t"+this.all[2][3]+"\t"+r2);
if (this.verboseOutput) System.out.println("Proportion unimputed:\nminor: "+this.all[0][3]/this.all[0][4]+"\nhet: "+this.all[1][3]/this.all[1][4]+"\nmajor: "+this.all[2][3]/this.all[2][4]);
if (this.verboseOutput) System.out.println("#Minor=0,Het=1,Major=2;x is masked(known), y is predicted\nx\ty\tN\tprop\n"
+0+"\t"+0+"\t"+this.all[0][0]+"\t"+(this.all[0][0])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
+0+"\t"+.5+"\t"+this.all[0][1]+"\t"+(this.all[0][1])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
+0+"\t"+1+"\t"+this.all[0][2]+"\t"+(this.all[0][2])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
+.5+"\t"+0+"\t"+this.all[1][0]+"\t"+(this.all[1][0])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
+.5+"\t"+.5+"\t"+this.all[1][1]+"\t"+(this.all[1][1])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
+.5+"\t"+1+"\t"+this.all[1][2]+"\t"+(this.all[1][2])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
+1+"\t"+0+"\t"+this.all[2][0]+"\t"+(this.all[2][0])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n"
+1+"\t"+.5+"\t"+this.all[2][1]+"\t"+(this.all[2][1])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n"
+1+"\t"+1+"\t"+this.all[2][2]+"\t"+(this.all[2][2])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n");
outStream.close();
} catch (Exception e) {
if (this.verboseOutput) System.out.println(e);
}
}
private void accuracyMAFOut() {
DecimalFormat df = new DecimalFormat("0.########");
if (this.MAF!=null && this.MAFClass!=null) try {
String ext= FilenameUtils.getExtension(this.outFile);
File outputFile = new File(this.outFile.replace(ext,"MAFAccuracy.txt"));
DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
outStream.writeBytes("##\tMAFClass\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumHets\tHetToMinor\tHetToMajor\tCorrectHet\tUnimpHet\tNumMinor\tMinorToMajor\tMinorToHet\tCorrectMinor\t"
+ "UnimpMinor\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimputedMajor\tr2\n");
for (int i= 0; i -1)?true:false;
if (use) maf= this.MAF[site];
byte known = maskKey.genotype(keyTaxon, site);
if (known == diploidN) continue;
byte imp = this.imputed.genotype(taxon, site);
if (GenotypeTableUtils.isHeterozygous(known) == true) {
this.all[1][4]++; if (use) this.mafAll[maf][1][4]++;
if (imp == diploidN) {this.all[1][3]++; if (use) this.mafAll[maf][1][3]++;}
else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[1][1]++; if (use) this.mafAll[maf][1][1]++;}
else if (GenotypeTableUtils.isHeterozygous(imp) == false && GenotypeTableUtils.isPartiallyEqual(imp, this.unimp.minorAllele(site)) == true) {this.all[1][0]++; if (use) this.mafAll[maf][1][0]++;}//to minor
else if (GenotypeTableUtils.isHeterozygous(imp) == false && GenotypeTableUtils.isPartiallyEqual(imp, this.unimp.majorAllele(site)) == true) {this.all[1][2]++; if (use) this.mafAll[maf][1][2]++;}
else {this.all[1][4]--; if (use) this.mafAll[maf][1][4]--;}//implies >2 allele states at given genotype
} else if (known == GenotypeTableUtils.getDiploidValue(this.unimp.minorAllele(site),this.unimp.minorAllele(site))) {
this.all[0][4]++; if (use) this.mafAll[maf][0][4]++;
if (imp == diploidN) {this.all[0][3]++; if (use) this.mafAll[maf][0][3]++;}
else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[0][0]++; if (use) this.mafAll[maf][0][0]++;}
else if (GenotypeTableUtils.isHeterozygous(imp) == true && GenotypeTableUtils.isPartiallyEqual(imp, known) == true) {this.all[0][1]++; if (use) this.mafAll[maf][0][1]++;}
else {this.all[0][2]++; if (use) this.mafAll[maf][0][3]++;}
} else if (known == GenotypeTableUtils.getDiploidValue(this.unimp.majorAllele(site),this.unimp.majorAllele(site))) {
this.all[2][4]++; if (use) this.mafAll[maf][2][4]++;
if (imp == diploidN) {this.all[2][3]++; if (use) this.mafAll[maf][2][3]++;}
else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[2][2]++; if (use) this.mafAll[maf][2][2]++;}
else if (GenotypeTableUtils.isHeterozygous(imp) == true && GenotypeTableUtils.isPartiallyEqual(imp, known) == true) {this.all[2][1]++; if (use) this.mafAll[maf][2][1]++;}
else {this.all[2][0]++; if (use) this.mafAll[maf][2][0]++;}
} else continue;
}
}
accuracyOut(runtime);
if (this.MAFClass!=null) accuracyMAFOut();
return pearsonR2(this.all,this.verboseOutput);
}
/**
* Legacy approach for measuring accuracy, but need to maintain some tests.
* @deprecated
*/
@Deprecated
public static int[] compareAlignment(String origFile, String maskFile, String impFile, boolean noMask) {
boolean taxaOut=false;
GenotypeTable oA=ImportUtils.readGuessFormat(origFile);
System.out.printf("Orig taxa:%d sites:%d %n",oA.numberOfTaxa(),oA.numberOfSites());
GenotypeTable mA=null;
if(noMask==false) {mA=ImportUtils.readGuessFormat(maskFile);
System.out.printf("Mask taxa:%d sites:%d %n",mA.numberOfTaxa(),mA.numberOfSites());
}
GenotypeTable iA=ImportUtils.readGuessFormat(impFile);
System.out.printf("Imp taxa:%d sites:%d %n",iA.numberOfTaxa(),iA.numberOfSites());
int correct=0;
int errors=0;
int unimp=0;
int hets=0;
int gaps=0;
for (int t = 0; t < iA.numberOfTaxa(); t++) {
int e=0,c=0,u=0,h=0;
int oATaxa=oA.taxa().indexOf(iA.taxaName(t));
for (int s = 0; s < iA.numberOfSites(); s++) {
if(noMask||(oA.genotype(oATaxa, s)!=mA.genotype(t, s))) {
byte ib=iA.genotype(t, s);
byte ob=oA.genotype(oATaxa, s);
if((ib==UNKNOWN_DIPLOID_ALLELE)||(ob==UNKNOWN_DIPLOID_ALLELE)) {unimp++; u++;}
else if(ib==GAP_DIPLOID_ALLELE) {gaps++;}
else if(ib==ob) {
correct++;
c++;
} else {
if(isHeterozygous(ob)||isHeterozygous(ib)) {hets++; h++;}
else {errors++;
e++;
// if(t==0) System.out.printf("%d %d %s %s %n",t,s,oA.getBaseAsString(oATaxa, s), iA.getBaseAsString(t, s));
}
}
}
}
if(taxaOut) System.out.printf("%s %d %d %d %d %n",iA.taxaName(t),u,h,c,e);
}
System.out.println("MFile\tIFile\tGap\tUnimp\tUnimpHets\tCorrect\tErrors");
System.out.printf("%s\t%s\t%d\t%d\t%d\t%d\t%d%n",maskFile, impFile, gaps, unimp,hets,correct,errors);
return new int[]{gaps, unimp,hets,correct,errors};
}
/**
* Calculates proportion imputed, homozygous proportion right, heterozygous proportion right
* @param impGT
* @param keyGT
* @return
* @deprecated a similar method is need in the core part of accuracy.
*/
@Deprecated
public static double[] compareAlignment(GenotypeTable impGT, GenotypeTable keyGT, String taxaPrefix) {
int hetKeys=0, hetCompared=0, hetRight=0;
int homoKeys=0, homoCompared=0, homoRight=0;
//if(impGT.numberOfTaxa()!=keyGT.numberOfTaxa()) throw new InputMismatchException("Number of Taxa do not match");
if(impGT.numberOfSites()!=keyGT.numberOfSites()) throw new InputMismatchException("Number of Sites do not match");
Random r=new Random();
for (int t=0; t
© 2015 - 2025 Weber Informatics LLC | Privacy Policy