All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.imputation.FILLINImputationAccuracy Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package net.maizegenetics.analysis.imputation;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.InputMismatchException;
import java.util.Random;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTable;
import static net.maizegenetics.dna.snp.GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import static net.maizegenetics.dna.snp.GenotypeTableUtils.isHeterozygous;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import static net.maizegenetics.dna.snp.NucleotideAlignmentConstants.GAP_DIPLOID_ALLELE;
import net.maizegenetics.dna.snp.genotypecall.GenotypeCallTableBuilder;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.ArrayUtils;

/**
 * A class to mask input files, hold information related to accuracy calculation,
 * and calculate accuracy for FILLINImputationPlugin
 * @author kls283
 */
public class FILLINImputationAccuracy {
    private GenotypeTable maskKey= null;
    private GenotypeTable unimp= null;
    private GenotypeTable imputed= null;
    private GenotypeTable[] donor= null;
    private String outFile= null;
    private double propSitesMask= 0.01;
    private int depthToMask= 7;
    private double propDepthSitesToMask= 0.2;
    public int[] MAF= null;
    private double[][] all= null; //arrays held ("columns"): 0-maskedMinor, 1-maskedHet, 2-maskedMajor; each array ("rows"):0-to minor, 1-to het, 2-to major, 3-unimp, 4-total for known type
    private double[][][] mafAll= null;//sam as all, but first array holds MAF category
    private double[] MAFClass= null;
    private boolean verboseOutput= true;
    
    public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donors, double propSitesMask, int depthToMask, double propDepthSitesToMask, String outFileName, double[] MAFClass, boolean verbose) {
        this.unimp= unimp;
        this.maskKey= maskKey;
        this.donor= donors;
        this.propSitesMask= propSitesMask;
        this.depthToMask= depthToMask;
        this.propDepthSitesToMask= propDepthSitesToMask;
        this.MAFClass= MAFClass;
        this.verboseOutput= verbose;
        this.outFile= outFileName;
    }
    
    public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, String outFileName, boolean verbose) {
        this.unimp= unimp;
        this.maskKey= maskKey;
        this.verboseOutput= verbose;
        this.outFile= outFileName;
    }
    
    public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donor, String outFileName, boolean verbose) {
        this.unimp= unimp;
        this.maskKey= maskKey;
        this.donor= donor;
        this.verboseOutput= verbose;
        this.outFile= outFileName;
    }
    
    public FILLINImputationAccuracy(GenotypeTable unimp, GenotypeTable maskKey, GenotypeTable[] donor, double[] MAFClass, String outFileName, boolean verbose) {
        this.unimp= unimp;
        this.maskKey= maskKey;
        this.donor= donor;
        this.MAFClass= MAFClass;
        this.verboseOutput= verbose;
        this.outFile= outFileName;
    }
    
    public GenotypeTable initiateAccuracy() {
        if (this.MAFClass!=null) {//if mafClass not null, assign MAF categories to sites in unimputed
            generateMAF();
            this.mafAll= new double[this.MAFClass.length][3][5];
            if (this.verboseOutput) System.out.println("Calculating accuracy within supplied MAF categories.");
        }
        //mask file if not already masked (depth or proportion) and generate key
        if (this.maskKey!=null) {
            if (this.verboseOutput) System.out.println("File already masked. Use input key file for calculating accuracy");
            boolean goodKey= true;
            if (Arrays.equals(this.maskKey.physicalPositions(), this.unimp.physicalPositions())==false) goodKey= filterKeySites();
            if (!goodKey && this.verboseOutput) System.out.println("Problem with input key file. Masking unimputed input");
            else return this.unimp;
        }
        if (this.unimp.hasDepth() && this.depthToMask>0) {
            boolean works= maskFileByDepth();
            if (works) return this.unimp;
        }
        maskPropSites();
        return this.unimp;
    }

        private boolean generateMAF() {
            this.MAF= new int[this.unimp.numberOfSites()];
            for (GenotypeTable don:this.donor) {
                for (int site = 0; site < don.numberOfSites(); site++) {
                    int unimpSite= this.unimp.positions().indexOf(don.positions().get(site));
                    if (unimpSite < 0) {this.MAF[unimpSite]= -1; continue;}
                    int search= Arrays.binarySearch(this.MAFClass, don.minorAlleleFrequency(site));
                    this.MAF[unimpSite]= search<0?Math.abs(search)-1:search;
                }
            }
            return true;
        }

        private boolean maskFileByDepth() {
            if (verboseOutput) System.out.println("Masking file using depth\nSite depth to mask: "+this.depthToMask+"\nProportion of depth sites to be masked: "+this.propDepthSitesToMask);
            GenotypeCallTableBuilder mask= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
            GenotypeCallTableBuilder key= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());

            int cnt= 0;
            for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {
                int taxaCnt= 0;
                mask.setBaseRangeForTaxon(taxon, 0, this.unimp.genotypeAllSites(taxon));
                for (int site = 0; site < this.unimp.numberOfSites(); site++) {
                    if (GenotypeTableUtils.isEqual(GenotypeTable.UNKNOWN_DIPLOID_ALLELE, this.unimp.genotype(taxon, site))) continue;
                    int[] currD= this.unimp.depthForAlleles(taxon, site);
                    if (currD[0]+currD[1]!=this.depthToMask) continue;
                    if (Math.random()>this.propDepthSitesToMask) continue;
                    else if ((this.unimp.isHeterozygous(taxon, site)==false) ||
                                (depthToMask > 3 && currD[0] > 1 && currD[1] > 1)|| 
                                (depthToMask < 4)) {
                        mask.setBase(taxon, site, GenotypeTable.UNKNOWN_DIPLOID_ALLELE); key.setBase(taxon, site, this.unimp.genotype(taxon, site)); taxaCnt++;
                    }
                }
                if (this.verboseOutput) System.out.println(taxaCnt+" sites masked for "+this.unimp.taxaName(taxon)); cnt+= taxaCnt;
            }
            if (cnt<2000 && this.verboseOutput) {System.out.println("Insufficient sites masked with depth. Calculate accuracy by proportion"); return false;}
            if (this.verboseOutput) System.out.println(cnt+" sites masked at a depth of "+this.depthToMask);
            this.maskKey= GenotypeTableBuilder.getInstance(key.build(), this.unimp.positions(), this.unimp.taxa());
            this.unimp= GenotypeTableBuilder.getInstance(mask.build(), this.unimp.positions(), this.unimp.taxa());
            return true;
        }

        private boolean maskPropSites() {
            if (this.verboseOutput) System.out.println("Masking file without depth\nMasking "+this.propSitesMask+" proportion of sites");
            GenotypeCallTableBuilder mask= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());
            GenotypeCallTableBuilder key= GenotypeCallTableBuilder.getInstance(this.unimp.numberOfTaxa(), this.unimp.numberOfSites());

            int presGenos= 0;
            for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {presGenos+= this.unimp.totalNonMissingForTaxon(taxon);}
            int expected= (int)(this.propSitesMask*(double)presGenos);
            int cnt= 0;
            for (int taxon = 0; taxon < this.unimp.numberOfTaxa(); taxon++) {
                int taxaCnt= 0;
                mask.setBaseRangeForTaxon(taxon, 0, this.unimp.genotypeAllSites(taxon));
                for (int site = 0; site < this.unimp.numberOfSites(); site++) {
                    if (Math.random() keepSites= new ArrayList<>();
            boolean working= true;
            if (Arrays.equals(this.unimp.chromosomes(), this.maskKey.chromosomes())==false) working= matchChromosomes();
            if (!working) {System.out.println("No overlapping chromosomes"); return false;}
            PositionList maskPos= this.maskKey.positions();
            for (Position p:this.unimp.positions()) {
                if (maskPos.contains(p)) keepSites.add(p.getSNPID());
                else {
                    System.out.println("Not all sites in target imputation file contained in mask key. Not using mask");
                    return false;
                }
            }
            GenotypeTable filter= FilterGenotypeTable.getInstance(this.maskKey, keepSites.toArray(new String[keepSites.size()]));
            this.maskKey= filter;//GenotypeTableBuilder.getGenotypeCopyInstance(filter); //Change this back when GenotypeCopyInstance fixed
            if (verboseOutput) System.out.println("Sites in new mask: "+this.maskKey.numberOfSites());
            
            //check to see if all taxa in unimputed are in the mask file
            if (!this.maskKey.taxa().equals(this.unimp.taxa())) {
                if (this.maskKey.taxa().containsAll(this.unimp.taxa())) 
                    this.maskKey= FilterGenotypeTable.getInstance(this.maskKey, this.unimp.taxa());
                else {
                    System.out.println("Not all taxa in target imputation file contained in mask key. Not using mask");
                    return false;
                }
            }
            System.out.println("Filtering key took "+(System.currentTimeMillis()-time)+" milliseconds");
            return true;
        }
        
        private boolean matchChromosomes() {
            Chromosome[] unimpChr= this.unimp.chromosomes();
            Chromosome[] keyChr= this.maskKey.chromosomes();
            ArrayList keepSites= new ArrayList<>();
            for (Chromosome chr:unimpChr) { //if any of the chromosomes in input do not exist in key, return false (which then masks proportion)
                if (Arrays.binarySearch(keyChr, chr)<0) return false;
            }
            for (Chromosome chr:keyChr) { //keep sites on key that are on matching chromosomes
                if (Arrays.binarySearch(unimpChr, chr)>-1) {
                    int[] startEnd = this.maskKey.firstLastSiteOfChromosome(chr);
                    for (int site = startEnd[0]; site <= startEnd[1]; site++) {
                        keepSites.add(site);
                    }
                }
            }
            GenotypeTable filter= FilterGenotypeTable.getInstance(this.maskKey, ArrayUtils.toPrimitive(keepSites.toArray(new Integer[keepSites.size()])));
            this.maskKey= filter;//GenotypeTableBuilder.getGenotypeCopyInstance(filter);//Change this back when GenotypeCopyInstance fixed
            if (verboseOutput) System.out.println(this.maskKey.numberOfSites()+" sites retained after chromsome filter");
            return true;
        }
        
            //this is the sample multiple r2.
        private double pearsonR2(double[][] all, boolean verbose) {
            int size= 0;
            for (int x = 0; x < 3; x++) {size+= (all[x][4]-all[x][3]);}
            double[][] xy= new double[2][size]; //0 is x, 1 is y
            int last= 0;//the last index filled
            for (double x = 0; x < 3; x++) { for (double y = 0; y < 3; y++) {
                    for (int fill = last; fill < last+all[(int)x][(int)y]; fill++) {
                        xy[0][fill]= x;
                        xy[1][fill]= y;
                    }
                    last= last+(int)all[(int)x][(int)y];
                }}
            double meanX= 0; double meanY= 0; double varX= 0; double varY= 0; double covXY= 0; double r2= 0.0;
            for (int i = 0; i < xy[0].length; i++) {meanX+=xy[0][i]; meanY+= xy[1][i];}
            meanX= meanX/(xy[0].length-1); meanY= meanY/(xy[1].length-1);
            double currX, currY;
            for (int i = 0; i < xy[0].length; i++) {
                currX= xy[0][i]-meanX; currY= xy[1][i]-meanY;
                varX+= currX*currX; varY+= currY*currY;
                covXY+= currX*currY;
            }
            r2= (covXY/(Math.sqrt(varX)*Math.sqrt(varY)))*(covXY/(Math.sqrt(varX)*Math.sqrt(varY)));
            if (verbose) System.out.println("Unadjusted R2 value for "+size+" comparisons: "+r2);
            return r2;
        }

        private void accuracyOut(double time) {
            DecimalFormat df = new DecimalFormat("0.########");
            double r2= pearsonR2(this.all, this.verboseOutput);
            try {
                String ext= FilenameUtils.getExtension(this.outFile);
                File outputFile = new File(this.outFile.replace(ext,"Accuracy.txt"));
                DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
                outStream.writeBytes("##Taxon\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumMinor\tCorrectMinor\tMinorToHet\tMinorToMajor\tUnimpMinor"
                        + "\tNumHets\tHetToMinor\tCorrectHet\tHetToMajor\tUnimpHet\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimpMajor\tR2\n");
                outStream.writeBytes("##TotalByImputed\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4])+"\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4]-this.all[0][3]-this.all[1][3]-this.all[2][3])+"\t"+
                        ((this.all[0][3]+this.all[1][3]+this.all[2][3])/(this.all[0][4]+this.all[1][4]+this.all[2][4]))+"\t"+this.all[0][4]+"\t"+this.all[0][0]+"\t"+this.all[0][1]+"\t"+this.all[0][2]+"\t"+this.all[0][3]+
                        "\t"+this.all[1][4]+"\t"+this.all[1][0]+"\t"+this.all[1][1]+"\t"+this.all[1][2]+"\t"+this.all[1][3]+"\t"+this.all[2][4]+"\t"+this.all[2][0]+"\t"+this.all[2][1]+"\t"+this.all[2][2]+
                        "\t"+this.all[2][3]+"\t"+r2+"\n");
                outStream.writeBytes("#Minor=0,Het=1,Major=2;x is masked(known), y is predicted\nx\ty\tN\tprop\n"
                        +0+"\t"+0+"\t"+this.all[0][0]+"\t"+df.format((this.all[0][0])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
                        +0+"\t"+.5+"\t"+this.all[0][1]+"\t"+df.format((this.all[0][1])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
                        +0+"\t"+1+"\t"+this.all[0][2]+"\t"+df.format((this.all[0][2])/(this.all[0][0]+this.all[0][1]+this.all[0][2]))+"\n"
                        +.5+"\t"+0+"\t"+this.all[1][0]+"\t"+df.format((this.all[1][0])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
                        +.5+"\t"+.5+"\t"+this.all[1][1]+"\t"+df.format((this.all[1][1])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
                        +.5+"\t"+1+"\t"+this.all[1][2]+"\t"+df.format((this.all[1][2])/(this.all[1][0]+this.all[1][1]+this.all[1][2]))+"\n"
                        +1+"\t"+0+"\t"+this.all[2][0]+"\t"+df.format((this.all[2][0])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n"
                        +1+"\t"+.5+"\t"+this.all[2][1]+"\t"+df.format((this.all[2][1])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n"
                        +1+"\t"+1+"\t"+this.all[2][2]+"\t"+df.format((this.all[2][2])/(this.all[2][0]+this.all[2][1]+this.all[2][2]))+"\n");
                outStream.writeBytes("#Proportion unimputed:\n#minor <- "+this.all[0][3]/this.all[0][4]+"\n#het<- "+this.all[1][3]/this.all[1][4]+"\n#major<- "+this.all[2][3]/this.all[2][4]+"\n");
                outStream.writeBytes("#Time to impute and calculate accuracy: "+time+" seconds");
                if (this.verboseOutput) System.out.println("##Taxon\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumMinor\tCorrectMinor\tMinorToHet\tMinorToMajor\tUnimpMinor"
                        + "\tNumHets\tHetToMinor\tCorrectHet\tHetToMajor\tUnimpHet\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimpMajor\tR2");
                if (this.verboseOutput) System.out.println("TotalByImputed\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4])+"\t"+(this.all[0][4]+this.all[1][4]+this.all[2][4]-this.all[0][3]-this.all[1][3]-this.all[2][3])+"\t"+
                        ((this.all[0][3]+this.all[1][3]+this.all[2][3])/(this.all[0][4]+this.all[1][4]+this.all[2][4]))+"\t"+this.all[0][4]+"\t"+this.all[0][0]+"\t"+this.all[0][1]+"\t"+this.all[0][2]+"\t"+this.all[0][3]+
                        "\t"+this.all[1][4]+"\t"+this.all[1][0]+"\t"+this.all[1][1]+"\t"+this.all[1][2]+"\t"+this.all[1][3]+"\t"+this.all[2][4]+"\t"+this.all[2][0]+"\t"+this.all[2][1]+"\t"+this.all[2][2]+
                        "\t"+this.all[2][3]+"\t"+r2);
                if (this.verboseOutput) System.out.println("Proportion unimputed:\nminor: "+this.all[0][3]/this.all[0][4]+"\nhet: "+this.all[1][3]/this.all[1][4]+"\nmajor: "+this.all[2][3]/this.all[2][4]);
                if (this.verboseOutput) System.out.println("#Minor=0,Het=1,Major=2;x is masked(known), y is predicted\nx\ty\tN\tprop\n"
                        +0+"\t"+0+"\t"+this.all[0][0]+"\t"+(this.all[0][0])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
                        +0+"\t"+.5+"\t"+this.all[0][1]+"\t"+(this.all[0][1])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
                        +0+"\t"+1+"\t"+this.all[0][2]+"\t"+(this.all[0][2])/(this.all[0][0]+this.all[0][1]+this.all[0][2])+"\n"
                        +.5+"\t"+0+"\t"+this.all[1][0]+"\t"+(this.all[1][0])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
                        +.5+"\t"+.5+"\t"+this.all[1][1]+"\t"+(this.all[1][1])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
                        +.5+"\t"+1+"\t"+this.all[1][2]+"\t"+(this.all[1][2])/(this.all[1][0]+this.all[1][1]+this.all[1][2])+"\n"
                        +1+"\t"+0+"\t"+this.all[2][0]+"\t"+(this.all[2][0])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n"
                        +1+"\t"+.5+"\t"+this.all[2][1]+"\t"+(this.all[2][1])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n"
                        +1+"\t"+1+"\t"+this.all[2][2]+"\t"+(this.all[2][2])/(this.all[2][0]+this.all[2][1]+this.all[2][2])+"\n");
                outStream.close();
            } catch (Exception e) {
                if (this.verboseOutput) System.out.println(e);
            }
        }

        private void accuracyMAFOut() {
            DecimalFormat df = new DecimalFormat("0.########");
            if (this.MAF!=null && this.MAFClass!=null) try {
                String ext= FilenameUtils.getExtension(this.outFile);
                File outputFile = new File(this.outFile.replace(ext,"MAFAccuracy.txt"));
                DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
                outStream.writeBytes("##\tMAFClass\tTotalSitesMasked\tTotalSitesCompared\tTotalPropUnimputed\tNumHets\tHetToMinor\tHetToMajor\tCorrectHet\tUnimpHet\tNumMinor\tMinorToMajor\tMinorToHet\tCorrectMinor\t"
                        + "UnimpMinor\tNumMajor\tMajorToMinor\tMajorToHet\tCorrectMajor\tUnimputedMajor\tr2\n");
                for (int i= 0; i -1)?true:false;
                    if (use) maf= this.MAF[site];
                    byte known = maskKey.genotype(keyTaxon, site);
                    if (known == diploidN) continue;
                    byte imp = this.imputed.genotype(taxon, site);
                    if (GenotypeTableUtils.isHeterozygous(known) == true) {
                        this.all[1][4]++; if (use) this.mafAll[maf][1][4]++;
                        if (imp == diploidN) {this.all[1][3]++; if (use) this.mafAll[maf][1][3]++;}
                        else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[1][1]++; if (use) this.mafAll[maf][1][1]++;}
                        else if (GenotypeTableUtils.isHeterozygous(imp) == false && GenotypeTableUtils.isPartiallyEqual(imp, this.unimp.minorAllele(site)) == true) {this.all[1][0]++; if (use) this.mafAll[maf][1][0]++;}//to minor 
                        else if (GenotypeTableUtils.isHeterozygous(imp) == false && GenotypeTableUtils.isPartiallyEqual(imp, this.unimp.majorAllele(site)) == true) {this.all[1][2]++; if (use) this.mafAll[maf][1][2]++;}
                        else {this.all[1][4]--;  if (use) this.mafAll[maf][1][4]--;}//implies >2 allele states at given genotype
                    } else if (known == GenotypeTableUtils.getDiploidValue(this.unimp.minorAllele(site),this.unimp.minorAllele(site))) {
                        this.all[0][4]++; if (use) this.mafAll[maf][0][4]++;
                        if (imp == diploidN) {this.all[0][3]++;  if (use) this.mafAll[maf][0][3]++;}
                        else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[0][0]++;  if (use) this.mafAll[maf][0][0]++;}
                        else if (GenotypeTableUtils.isHeterozygous(imp) == true && GenotypeTableUtils.isPartiallyEqual(imp, known) == true) {this.all[0][1]++;  if (use) this.mafAll[maf][0][1]++;}
                        else {this.all[0][2]++;  if (use) this.mafAll[maf][0][3]++;}
                    } else if (known == GenotypeTableUtils.getDiploidValue(this.unimp.majorAllele(site),this.unimp.majorAllele(site))) {
                        this.all[2][4]++;  if (use) this.mafAll[maf][2][4]++;
                        if (imp == diploidN) {this.all[2][3]++;  if (use) this.mafAll[maf][2][3]++;}
                        else if (GenotypeTableUtils.isEqual(imp, known) == true) {this.all[2][2]++; if (use) this.mafAll[maf][2][2]++;}
                        else if (GenotypeTableUtils.isHeterozygous(imp) == true && GenotypeTableUtils.isPartiallyEqual(imp, known) == true) {this.all[2][1]++;  if (use) this.mafAll[maf][2][1]++;}
                        else {this.all[2][0]++; if (use) this.mafAll[maf][2][0]++;}
                    } else continue;
                }
            }
            accuracyOut(runtime);
            if (this.MAFClass!=null) accuracyMAFOut();
            return pearsonR2(this.all,this.verboseOutput);
        }
        
     /**
     * Legacy approach for measuring accuracy, but need to maintain some tests.
     * @deprecated
     */
    @Deprecated
    public static int[] compareAlignment(String origFile, String maskFile, String impFile, boolean noMask) {
        boolean taxaOut=false;
        GenotypeTable oA=ImportUtils.readGuessFormat(origFile);
        System.out.printf("Orig taxa:%d sites:%d %n",oA.numberOfTaxa(),oA.numberOfSites());
        GenotypeTable mA=null;
        if(noMask==false) {mA=ImportUtils.readGuessFormat(maskFile);
            System.out.printf("Mask taxa:%d sites:%d %n",mA.numberOfTaxa(),mA.numberOfSites());
        }
        GenotypeTable iA=ImportUtils.readGuessFormat(impFile);
        System.out.printf("Imp taxa:%d sites:%d %n",iA.numberOfTaxa(),iA.numberOfSites());
        int correct=0;
        int errors=0;
        int unimp=0;
        int hets=0;
        int gaps=0;
        for (int t = 0; t < iA.numberOfTaxa(); t++) {
            int e=0,c=0,u=0,h=0;
            int oATaxa=oA.taxa().indexOf(iA.taxaName(t));
            for (int s = 0; s < iA.numberOfSites(); s++) {
                if(noMask||(oA.genotype(oATaxa, s)!=mA.genotype(t, s))) {
                    byte ib=iA.genotype(t, s);
                    byte ob=oA.genotype(oATaxa, s);
                    if((ib==UNKNOWN_DIPLOID_ALLELE)||(ob==UNKNOWN_DIPLOID_ALLELE)) {unimp++; u++;}
                    else if(ib==GAP_DIPLOID_ALLELE) {gaps++;}
                    else if(ib==ob) {
                        correct++;
                        c++;
                    } else {
                        if(isHeterozygous(ob)||isHeterozygous(ib)) {hets++; h++;}
                        else {errors++;
                            e++;
//                            if(t==0) System.out.printf("%d %d %s %s %n",t,s,oA.getBaseAsString(oATaxa, s), iA.getBaseAsString(t, s));
                        }
                    }
                }
            }
            if(taxaOut) System.out.printf("%s %d %d %d %d %n",iA.taxaName(t),u,h,c,e);
        }
        System.out.println("MFile\tIFile\tGap\tUnimp\tUnimpHets\tCorrect\tErrors");
        System.out.printf("%s\t%s\t%d\t%d\t%d\t%d\t%d%n",maskFile, impFile, gaps, unimp,hets,correct,errors);
        return new int[]{gaps, unimp,hets,correct,errors};
    }

    /**
     * Calculates proportion imputed, homozygous proportion right, heterozygous proportion right
     * @param impGT
     * @param keyGT
     * @return
     * @deprecated a similar method is need in the core part of accuracy.
     */
    @Deprecated
    public static double[] compareAlignment(GenotypeTable impGT, GenotypeTable keyGT, String taxaPrefix) {
        int hetKeys=0, hetCompared=0, hetRight=0;
        int homoKeys=0, homoCompared=0, homoRight=0;
        //if(impGT.numberOfTaxa()!=keyGT.numberOfTaxa()) throw new InputMismatchException("Number of Taxa do not match");
        if(impGT.numberOfSites()!=keyGT.numberOfSites()) throw new InputMismatchException("Number of Sites do not match");
        Random r=new Random();
        for (int t=0; t




© 2015 - 2025 Weber Informatics LLC | Privacy Policy