All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.CompareGenosBetweenHapMapFilesPlugin Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
/*
 * CompareGenosBetweenHapMapFilesPlugin
 */
package net.maizegenetics.analysis.gbs;

import com.google.common.base.Joiner;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.ArgsEngine;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Logger;
import org.apache.log4j.SimpleLayout;

import javax.swing.*;
import java.awt.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

/**
 *
 * @author glaubitz
 */
public class CompareGenosBetweenHapMapFilesPlugin extends AbstractPlugin {

    private final Logger myLogger = Logger.getLogger(CompareGenosBetweenHapMapFilesPlugin.class);
    private ArgsEngine myArgsEngine = null;
    private String hmp1FileStr, hmp2FileStr;
    private int startChr, endChr, chr, position;
    private Multimap taxaSynonyms = HashMultimap.create();

    private HashMap> taxaRedirect = new HashMap>();
    private final String DELIMITER = "\t";

    public static enum SiteCompareType {

        SAME_STRAND, EITHER_STRAND, DIFF_STRAND, DIFFERENT
    };
    int nCompared = 0;
    int nSamePosNotComparable;
    // these are indices of the stat in the int[] array compareStats[]
    static final int NUM_TAXA_POSSIBLE_COMPARISONS = 0, NUM_TAXA_MISSING = 1, NUM_TAXA_COMPARED = 2, NUM_TAXA_DIFFERENT = 3, NUM_TAXA_HOMOZYGOUS_COMPARED = 4, NUM_TAXA_HOMOZYGOUS_DIFF = 5;
    //Taxa1:1	Taxa2:1	 NumberTested	Same	Different	ErrorRate	HomozygousTested	Same	Different	ErrorRate
    static final int COMPARE_STATS_LENGTH = 6; // this is the length of the int[] array compareStats[]
    static final int NUM_SITES_COMPARED = 0, NUM_SITES_DIFF = 1, NUM_SITES_HOMOZYGOUS_COMPARED = 2, NUM_SITES_HOMOZYGOUS_DIFF = 3;
    static final int COMPARE_TAXA_STATS_LENGTH = 4;
    static final int MINOR_ALLELE_FREQ1 = 0, MINOR_ALLELE_FREQ2 = 1, F_VALUE1 = 2, F_VALUE2 = 3; // these are the indices of the (double) summary stat in the double[] array summStats
    static final int summStatsLength = 4; //
    File outfile = null;
    DataOutputStream fw = null;
    private int myNumCalculations = 0;
    private List myComparisons = new ArrayList();
    private List myErrorRates = new ArrayList();
    private List myHomComparisons = new ArrayList();
    private List myHomDiff = new ArrayList();
    private List myHomError = new ArrayList();
    int[][][] myCompareStatsTaxa;

    public CompareGenosBetweenHapMapFilesPlugin() {
        super(null, false);
    }

    public CompareGenosBetweenHapMapFilesPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    private void printUsage() {
        myLogger.info(
                "\n\nThe options for CompareGenosBetweenHapMapFilesPlugin are:\n"
                + "    -hmp1  First hapmap format genotypic input file (use \"+\" as a wildcard character in place of the chromosome number)\n"
                + "    -hmp2  Second hapmap format genotypic input file to compare the first one to (use \"+\" as a wildcard character in place of the chromosome number)\n"
                + "    -sC    Start chromosome\n"
                + "    -eC    End chromosome\n"
                + "    -syn   Lookup table file of synonymous full taxon names in hmp1 and hmp2 (header line is ignored)\n"
                + "    -o     Output file for report (tab-delimited text format)\n\n\n");
    }

    @Override
    public void setParameters(String[] args) {
        myLogger.addAppender(new ConsoleAppender(new SimpleLayout()));
        if (args.length == 0) {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }
        if (myArgsEngine == null) {
            myArgsEngine = new ArgsEngine();
            myArgsEngine.add("-hmp1", "--hapmap-file1", true);
            myArgsEngine.add("-hmp2", "--hapmap-file2", true);
            myArgsEngine.add("-syn", "--synonym-file", true);
            myArgsEngine.add("-o", "--output-file", true);
        }
        myArgsEngine.parse(args);
        if (myArgsEngine.getBoolean("-syn")) {
            String synFileStr = myArgsEngine.getString("-syn");
            File synFile = new File(synFileStr);
            if (!synFile.exists() || !synFile.isFile()) {
                printUsage();
                throw new IllegalArgumentException("Can't find the file containing the synonym table for full taxon names (-syn option: " + synFileStr + ").");
            }
            if (!readTaxaSynonymsFromFile(synFile)) {
                throw new IllegalArgumentException("Problem reading the file containing the synonym table for full taxon names. Progam aborted.");
            }
        } else {
            createTaxaSynonymsFromAlignment(myArgsEngine.getString("-hmp1"),myArgsEngine.getString("-hmp2"));
          //  printUsage();
          //  throw new IllegalArgumentException("Please specify a file containing the synonym table for full taxon names (option -syn).");
        }
        if (myArgsEngine.getBoolean("-hmp1")) {
            hmp1FileStr = myArgsEngine.getString("-hmp1");
            File hmp1File = new File(hmp1FileStr);
            if (!hmp1File.exists() || !hmp1File.isFile()) {
                printUsage();
                throw new IllegalArgumentException("Can't find the first hapmap format genotype input file (-hmp1 option: " + hmp1FileStr + ").");
            }
        }
        if (myArgsEngine.getBoolean("-hmp2")) {
            hmp2FileStr = myArgsEngine.getString("-hmp2");
            File hmp1File = new File(hmp1FileStr);
            if (!hmp1File.exists() || !hmp1File.isFile()) {
                printUsage();
                throw new IllegalArgumentException("Can't find the first hapmap format genotype input file (-hmp1 option: " + hmp2FileStr + ").");
            }
        }
        if (myArgsEngine.getBoolean("-o")) {
            String outFileStr = myArgsEngine.getString("-o");
            outfile = new File(outFileStr);
            try {
                fw = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outfile), 65536));
                fw.writeBytes(
                        "Chr\tPosition\tAlleles1\tAlleles2\tCompareType\tMAF1\tMAF2\tf1\tf2\tSynonymousTaxaPairs\tMissing\tComparisons\tDiff\tErrorRate\tHomComparisons\tHomDiff\tHomErrorRate\n");
            } catch (Exception e) {
                throw new IllegalArgumentException("Unable to write to your output report file: " + e);
            }
        } else {
            printUsage();
            throw new IllegalArgumentException("Please specify an output report file (inside an existing directory) (option -o).");
        }
    }

    @Override
    public DataSet performFunction(DataSet input) {
        myLogger.info("Comparing: " + hmp1FileStr + " to " + hmp2FileStr);
        GenotypeTable a1, a2;
        try {
            a1 = ImportUtils.readGuessFormat(hmp1FileStr);
        } catch (Exception e) {
            myLogger.info("Could not read the first input hapmap file for chr" + chr + ":\n\t" + hmp2FileStr + "\n\tSkipping...");
            return null;
        }
        try {
            a2 = ImportUtils.readGuessFormat(hmp2FileStr);
        } catch (Exception e) {
            myLogger.info("Could not read the second input hapmap file for chr" + chr + ":\n\t" + hmp2FileStr + "\n\tSkipping...");
            return null;
        }
        populateTaxaRedirect(a1, a2);
        myCompareStatsTaxa = new int[taxaRedirect.size()][][];
        findCommonPositionsAndCompare(a1, a2);

        int[] comparisons = new int[myNumCalculations];
        double comparisonMean = 0.0;
        double[] errorRates = new double[myNumCalculations];
        double errorRateMean = 0.0;
        int[] homComparisons = new int[myNumCalculations];
        double homDiffSum = 0.0;
        double homComparisonSum = 0.0;
        double[] homErrors = new double[myNumCalculations];
        double homErrorSum = 0.0;
        for (int i = 0; i < myNumCalculations; i++) {
            comparisons[i] = myComparisons.get(i);
            comparisonMean = comparisonMean + comparisons[i];
            errorRates[i] = myErrorRates.get(i);
            errorRateMean = errorRateMean + errorRates[i];
            homComparisons[i] = myHomComparisons.get(i);
            homComparisonSum = homComparisonSum + homComparisons[i];
            homDiffSum = homDiffSum + myHomDiff.get(i);
            homErrors[i] = myHomError.get(i);
            homErrorSum = homErrorSum + homErrors[i];
        }

        double overAllHomoErrorRate = homDiffSum / homComparisonSum;

        comparisonMean = comparisonMean / myNumCalculations;
        double comparisonMedian = getMedian(comparisons);

        errorRateMean = errorRateMean / myNumCalculations;
        double errorRateMedian = getMedian(errorRates);

        double homComparisonMean = homComparisonSum / myNumCalculations;
        double homComparisonMedian = getMedian(homComparisons);

        double homErrorMean = homErrorSum / myNumCalculations;
        double homErrorMedian = getMedian(homErrors);

        myLogger.info("Comparison Mean\tComparison Median\tError Rate Mean\tError Rate Median\tHomozygous Comparison Mean\tHomozygous Comparison Median\tHomozygous Error Mean\tHomozygous Error Median");
        myLogger.info(comparisonMean + "\t" + comparisonMedian + "\t" + errorRateMean + "\t" + errorRateMedian + "\t" + homComparisonMean + "\t" + homComparisonMedian + "\t" + homErrorMean + "\t" + homErrorMedian);

        myLogger.info("Output Filename\tOver All Homo Error Rate\tCoverage");
        myLogger.info(outfile.getName() + "\t" + overAllHomoErrorRate + "\t" + (nSamePosNotComparable + nCompared));

        closeOutputFile();
        return null;
    }

    private static double getMedian(int[] values) {
        Arrays.sort(values);
        int middle = values.length / 2;
        if (values.length % 2 == 1) {
            return (values[middle - 1] + values[middle]) / 2.0;
        } else {
            return values[middle];
        }
    }

    private static double getMedian(double[] values) {
        Arrays.sort(values);
        int middle = values.length / 2;
        if (values.length % 2 == 1) {
            return (values[middle - 1] + values[middle]) / 2.0;
        } else {
            return values[middle];
        }
    }

    private boolean readTaxaSynonymsFromFile(File synFile) {
        taxaSynonyms.clear();
        String inputLine = "Nothing has been read from the taxon synonym input file yet";
        int nTaxa = 0, nCompareTaxa = 0;
        try {
            BufferedReader br = new BufferedReader(new FileReader(synFile), 65536);
            inputLine = br.readLine();  // header line (ignore)
            while ((inputLine = br.readLine()) != null) {
                String[] cells = inputLine.split("\t");
                if (!(cells[0].equals("NA") || cells[1].equals("NA"))) {
                    taxaSynonyms.put(cells[0],cells[1]);
                    ++nCompareTaxa;
                }
                ++nTaxa;
            }
        } catch (Exception e) {
            myLogger.error("Catch in reading taxon synonym input file e=" + e);
            e.printStackTrace();
            myLogger.info(inputLine);
            return false;
        }
        myLogger.info(nTaxa + " pairs of taxa full names read from the taxon synonym input file: " + nCompareTaxa + " of these will be compared (if found in the genotype files)");
        return true;
    }

    private boolean createTaxaSynonymsFromAlignment(String f1, String f2) {
        GenotypeTable a1=ImportUtils.readGuessFormat(f1);
        GenotypeTable a2=ImportUtils.readGuessFormat(f1);
        taxaSynonyms.clear();
        int nTaxa = 0, nCompareTaxa = 0;
        for (Taxon taxon : a1.taxa()) {
            int t2a=a2.taxa().indexOf(taxon);
            if(t2a>=0) {
                taxaSynonyms.put(taxon.getName(), a2.taxaName(t2a));
                ++nCompareTaxa;
            }
            ++nTaxa;
        }
        myLogger.info(nTaxa + " pairs of taxa full names read are identical between alignments: " + nCompareTaxa + " of these will be compared (if found in the genotype files)");
        return true;
    }

    private void populateTaxaRedirect(GenotypeTable a1, GenotypeTable a2) {
        taxaRedirect.clear();
        myLogger.info("\n\nMaking list of comparable taxa found in the two hapmap files...\n");
        System.out.println("Taxon1\t\tTaxon2");
        System.out.println("------\t\t------");
        int nTaxaPairs = 0;
        for (int taxon1Index = 0; taxon1Index < a1.numberOfTaxa(); taxon1Index++) {
            String taxon1 = a1.taxaName(taxon1Index);
            if (taxaSynonyms.containsKey(taxon1)) {
                for (String taxon2 : taxaSynonyms.get(taxon1)) {
                    for (int taxon2Index = 0; taxon2Index < a2.numberOfTaxa(); taxon2Index++) {
                        if (taxon2.equals(a2.taxaName(taxon2Index))) {
                            List synTaxaIndicesForTaxonIndex = taxaRedirect.get(taxon1Index);
                            if (synTaxaIndicesForTaxonIndex == null) {
                                taxaRedirect.put(taxon1Index, synTaxaIndicesForTaxonIndex = new ArrayList());
                            }
                            synTaxaIndicesForTaxonIndex.add(taxon2Index);
                            System.out.println(taxon1 + "\t" + taxon2);
                            ++nTaxaPairs;
                            break;
                        }
                    }
                }
            }
        }
        myLogger.info("\nHapMap format genotype file1 contains " + a1.numberOfTaxa() + " taxa in total\n");
        myLogger.info("\nHapMap format genotype file2 contains " + a2.numberOfTaxa() + " taxa in total\n");
        myLogger.info("\n" + nTaxaPairs + " pairs of comparable taxa found in the two hapmap files\n\n");
    }

    private void findCommonPositionsAndCompare(GenotypeTable a1, GenotypeTable a2) {
        nCompared = 0;
        nSamePosNotComparable = 0;
        for (int s1=0; s1=0) {
                position = a1.chromosomalPosition(s1);
                nCompared += getCompareTypeAndCompare(s1, a1, s2, a2);
                }
        }
        myLogger.info(nCompared + " sites compared on chromosome " + chr
                + "\nAn additional " + nSamePosNotComparable + " sites on chromosome " + chr + " had the same position but incomparable alleles\n");

        outputTaxaReport(a1, a2);
    }

    private void outputTaxaReport(GenotypeTable a1, GenotypeTable a2) {

        System.out.println("Taxon1\tTaxon2\tNum_Sites_Compared\tNum_Sites_Diff\tNum_Sites_Homo_Compared\tNum_Sites_Homo_Diff");
        int taxon1Count = 0;
        for (Integer taxon1Index : taxaRedirect.keySet()) {
            List synTaxaIndicesForTaxonIndex = taxaRedirect.get(taxon1Index);
            int taxon2Count = 0;
            for (Integer taxon2Index : synTaxaIndicesForTaxonIndex) {
                StringBuilder builder = new StringBuilder();
                builder.append(a1.taxaName(taxon1Index));
                builder.append(DELIMITER);
                builder.append(a2.taxaName(taxon2Index));
                builder.append(DELIMITER);
                builder.append(myCompareStatsTaxa[taxon1Count][taxon2Count][NUM_SITES_COMPARED]);
                builder.append(DELIMITER);
                builder.append(myCompareStatsTaxa[taxon1Count][taxon2Count][NUM_SITES_DIFF]);
                builder.append(DELIMITER);
                builder.append(myCompareStatsTaxa[taxon1Count][taxon2Count][NUM_SITES_HOMOZYGOUS_COMPARED]);
                builder.append(DELIMITER);
                builder.append(myCompareStatsTaxa[taxon1Count][taxon2Count][NUM_SITES_HOMOZYGOUS_DIFF]);
                System.out.println(builder.toString());
                taxon2Count++;
            }
            taxon1Count++;
        }

    }

    private int getCompareTypeAndCompare(int site1, GenotypeTable a1, int site2, GenotypeTable a2) {
        byte[] alleles1 = a1.alleles(site1);
        byte[] alleles2 = a2.alleles(site2);
        SiteCompareType compareType=(a1.positions().get(site1).getStrand()==a2.positions().get(site2).getStrand())?
                SiteCompareType.SAME_STRAND:SiteCompareType.DIFFERENT;
        if (compareType == SiteCompareType.DIFFERENT) {
            nSamePosNotComparable++;
            return 0;
        }

        double[] summStats = new double[summStatsLength];
        summStats[MINOR_ALLELE_FREQ1] = a1.minorAlleleFrequency(site1);
        summStats[MINOR_ALLELE_FREQ2] = a2.minorAlleleFrequency(site2);
        summStats[F_VALUE1] = calculateF(a1, site1);
        summStats[F_VALUE2] = calculateF(a2, site2);
        String alleleString1 = Joiner.on("/").join(GenotypeTableUtils.convertNucleotideGenotypesToStringList(alleles1));
        String alleleString2 = Joiner.on("/").join(GenotypeTableUtils.convertNucleotideGenotypesToStringList(alleles2));

        int[][][] compareTaxaStatsSame = new int[taxaRedirect.size()][][];


        int[] compareStatsSame = null;
        int taxon1Count = 0;
        for (Integer taxon1Index : taxaRedirect.keySet()) {
            List synTaxaIndicesForTaxonIndex = taxaRedirect.get(taxon1Index);
            if (myCompareStatsTaxa[taxon1Count] == null) {
                myCompareStatsTaxa[taxon1Count] = new int[synTaxaIndicesForTaxonIndex.size()][COMPARE_TAXA_STATS_LENGTH];
            }

            compareTaxaStatsSame[taxon1Count] = new int[synTaxaIndicesForTaxonIndex.size()][COMPARE_TAXA_STATS_LENGTH];

            int taxon2Count = 0;
            for (Integer taxon2Index : synTaxaIndicesForTaxonIndex) {
                if ((compareType == SiteCompareType.SAME_STRAND) || (compareType == SiteCompareType.EITHER_STRAND)) {
                    int[] tempStats = compareGenotypes(taxon1Index, site1, a1, taxon2Index, site2, a2, true);
                    if (compareStatsSame == null) {
                        compareStatsSame = new int[COMPARE_STATS_LENGTH];
                    }
                    for (int i = 0; i < COMPARE_STATS_LENGTH; i++) {
                        compareStatsSame[i] += tempStats[i];
                    }
                    compareTaxaStatsSame[taxon1Count][taxon2Count][NUM_SITES_COMPARED] = 1;
                    compareTaxaStatsSame[taxon1Count][taxon2Count][NUM_SITES_DIFF] = tempStats[NUM_TAXA_DIFFERENT];
                    compareTaxaStatsSame[taxon1Count][taxon2Count][NUM_SITES_HOMOZYGOUS_COMPARED] = tempStats[NUM_TAXA_HOMOZYGOUS_COMPARED];
                    compareTaxaStatsSame[taxon1Count][taxon2Count][NUM_SITES_HOMOZYGOUS_DIFF] = tempStats[NUM_TAXA_HOMOZYGOUS_DIFF];
                }
                taxon2Count++;
            }
            taxon1Count++;
        }

        int[] compareResults= compareStatsSame;
        addTaxaStats(myCompareStatsTaxa, compareTaxaStatsSame);
        writeCompareStats(compareResults, alleleString1, alleleString2, compareType, summStats);

        return 1;
    }

    private void addTaxaStats(int[][][] totals, int[][][] additions) {
        for (int i = 0; i < totals.length; i++) {
            for (int j = 0; j < totals[0].length; j++) {
                for (int k = 0; k < totals[0][0].length; k++) {
                    totals[i][j][k] += additions[i][j][k];
                }
            }
        }
    }

    private double calculateF(GenotypeTable a, int site) {
        byte majAllele = a.majorAllele(site);
        byte minAllele = a.minorAllele(site);
        int majGenoCnt = 0, minGenoCnt = 0, hetGenoCnt = 0;
        int nTaxa = a.numberOfTaxa();
        // TERRY - Does this make sense?  What if it's het but not major/minor?
        for (int taxon = 0; taxon < nTaxa; taxon++) {
            byte[] bases = a.genotypeArray(taxon, site);
            if ((bases[0] == majAllele) && bases[1] == majAllele) {
                majGenoCnt++;
            } else if ((bases[0] == minAllele) && bases[1] == minAllele) {
                minGenoCnt++;
            } else if (GenotypeTableUtils.isEqual(bases, new byte[]{majAllele, minAllele})) {
                hetGenoCnt++;
            }
        }
        int nGenos = hetGenoCnt + majGenoCnt + minGenoCnt;
        if (nGenos > 0) {
            double propHets = (double) hetGenoCnt / nGenos;
            double maf = (double) (2 * majGenoCnt + hetGenoCnt) / (2 * nGenos);
            double expHets = 2.0 * maf * (1.0 - maf);
            return 1.0 - (propHets / expHets);
        } else {
            return Double.NaN;
        }
    }

    private int[] compareGenotypes(int taxon1Index, int site1, GenotypeTable a1, int taxon2Index, int site2, GenotypeTable a2, boolean sameStrand) {
        int[] compareStats = new int[COMPARE_STATS_LENGTH];
        byte base1 = a1.genotype(taxon1Index, site1);
        byte base2;
        if (sameStrand) {
            base2 = a2.genotype(taxon2Index, site2);
        } else {
            base2 = NucleotideAlignmentConstants.getNucleotideDiploidComplement(a2.genotype(taxon2Index, site2));
        }
        if (base1 != GenotypeTable.UNKNOWN_DIPLOID_ALLELE && base2 != GenotypeTable.UNKNOWN_DIPLOID_ALLELE) {
            if (!(GenotypeTableUtils.isHeterozygous(base1) || GenotypeTableUtils.isHeterozygous(base2))) {
                if (base1 != base2) {
                    ++compareStats[NUM_TAXA_DIFFERENT];
                    ++compareStats[NUM_TAXA_HOMOZYGOUS_DIFF];
                }
                ++compareStats[NUM_TAXA_HOMOZYGOUS_COMPARED];
            } else {
                if (!GenotypeTableUtils.isEqual(base1, base2)) {
                    ++compareStats[NUM_TAXA_DIFFERENT];
                }
            }
            ++compareStats[NUM_TAXA_COMPARED];
        } else {
            ++compareStats[NUM_TAXA_MISSING];
        }
        ++compareStats[NUM_TAXA_POSSIBLE_COMPARISONS];
        return compareStats;
    }

    private void writeCompareStats(int[] compareStats, String alleles1, String alleles2, SiteCompareType sct, double[] summStats) {
        double errRate = compareStats[NUM_TAXA_COMPARED] > 0 ? (double) compareStats[NUM_TAXA_DIFFERENT] / compareStats[NUM_TAXA_COMPARED] : Double.NaN;
        double errRateHom = compareStats[NUM_TAXA_HOMOZYGOUS_COMPARED] > 0 ? (double) compareStats[NUM_TAXA_HOMOZYGOUS_DIFF] / compareStats[NUM_TAXA_HOMOZYGOUS_COMPARED] : Double.NaN;

        try {
            fw.writeBytes(String.valueOf(chr));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(position));
            fw.writeBytes(DELIMITER);
            //fw.writeBytes((char) alleles1[0] + "/" + (char) alleles1[1] + "\t");
            //fw.writeBytes((char) alleles2[0] + "/" + (char) alleles2[1] + "\t");
            fw.writeBytes(alleles1);
            fw.writeBytes(DELIMITER);
            fw.writeBytes(alleles2);
            fw.writeBytes(DELIMITER);
            fw.writeBytes(sct.toString());
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(summStats[MINOR_ALLELE_FREQ1]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(summStats[MINOR_ALLELE_FREQ2]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(summStats[F_VALUE1]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(summStats[F_VALUE2]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_POSSIBLE_COMPARISONS]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_MISSING]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_COMPARED]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_DIFFERENT]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(errRate));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_HOMOZYGOUS_COMPARED]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(compareStats[NUM_TAXA_HOMOZYGOUS_DIFF]));
            fw.writeBytes(DELIMITER);
            fw.writeBytes(String.valueOf(errRateHom));
            fw.writeBytes("\n");

            myNumCalculations++;
            myComparisons.add(compareStats[NUM_TAXA_COMPARED]);
            myErrorRates.add(errRate);
            myHomDiff.add(compareStats[NUM_TAXA_HOMOZYGOUS_DIFF]);
            myHomComparisons.add(compareStats[NUM_TAXA_HOMOZYGOUS_COMPARED]);
            myHomError.add(errRateHom);

        } catch (Exception e) {
            throw new IllegalArgumentException("Unable to write to your output report file: " + e);
        }
    }

    private void closeOutputFile() {
        try {
            fw.close();
        } catch (Exception e) {
            throw new IllegalArgumentException("Unable to close your output report file: " + e);
        }
    }

    public static SiteCompareType getSiteCompareType(byte[] alleles1, byte[] alleles2) {

        //if (alleles1.length != 2 || alleles2.length != 2) {
//        if (alleles1.length < 2 || alleles2.length < 2) {
//            return SiteCompareType.DIFFERENT; // both must be biallelic to be compared
//        }

        byte diploidValue1 = GenotypeTableUtils.getDiploidValue(alleles1[0], alleles1[1]);
        byte diploidValue2 = GenotypeTableUtils.getDiploidValue(alleles2[0], alleles2[1]);

        String iupac1 = NucleotideAlignmentConstants.getNucleotideIUPAC(diploidValue1);
        String iupac2 = NucleotideAlignmentConstants.getNucleotideIUPAC(diploidValue2);

        if ((iupac1 == null) || (iupac2 == null)) {
            return SiteCompareType.DIFFERENT;
        }

        //byte hetg1 = IUPACNucleotides.getDegerateSNPByteFromTwoSNPs(alleles1);
        //byte hetg2 = IUPACNucleotides.getDegerateSNPByteFromTwoSNPs(alleles2);

        char hetg1 = iupac1.charAt(0);
        char hetg2 = iupac2.charAt(0);

        if (hetg1 == '0' || hetg1 == '-' || hetg1 == '+' || hetg1 == GenotypeTable.UNKNOWN_ALLELE_CHAR
                || hetg2 == '0' || hetg2 == '-' || hetg2 == '+' || hetg2 == GenotypeTable.UNKNOWN_ALLELE_CHAR) {
            return SiteCompareType.DIFFERENT; // indel sites not compared
        }
        if (hetg1 == 'K' && hetg2 == 'K') {
            return SiteCompareType.SAME_STRAND;  // GT v GT
        }
        if (hetg1 == 'M' && hetg2 == 'M') {
            return SiteCompareType.SAME_STRAND;  // AC v AC
        }
        if (hetg1 == 'R' && hetg2 == 'R') {
            return SiteCompareType.SAME_STRAND;  // AG v AG
        }
        if (hetg1 == 'Y' && hetg2 == 'Y') {
            return SiteCompareType.SAME_STRAND;  // CT v CT
        }
        if (hetg1 == 'S' && hetg2 == 'S') {
            return SiteCompareType.EITHER_STRAND;  // CG v CG
        }
        if (hetg1 == 'W' && hetg2 == 'W') {
            return SiteCompareType.EITHER_STRAND;  // AT v AT
        }
        if (hetg1 == 'K' && hetg2 == 'M') {
            return SiteCompareType.DIFF_STRAND;  // GT v AC
        }
        if (hetg1 == 'M' && hetg2 == 'K') {
            return SiteCompareType.DIFF_STRAND;  // AC v GT
        }
        if (hetg1 == 'R' && hetg2 == 'Y') {
            return SiteCompareType.DIFF_STRAND;  // AG v CT
        }
        if (hetg1 == 'Y' && hetg2 == 'R') {
            return SiteCompareType.DIFF_STRAND;  // CT v AG
        }
        if (hetg1 == 'K' && hetg2 == 'R') {
            return SiteCompareType.DIFFERENT;  // GT v AG
        }
        if (hetg1 == 'K' && hetg2 == 'S') {
            return SiteCompareType.DIFFERENT;  // GT v CG
        }
        if (hetg1 == 'K' && hetg2 == 'W') {
            return SiteCompareType.DIFFERENT;  // GT v AT
        }
        if (hetg1 == 'K' && hetg2 == 'Y') {
            return SiteCompareType.DIFFERENT;  // GT v CT
        }
        if (hetg1 == 'M' && hetg2 == 'R') {
            return SiteCompareType.DIFFERENT;  // AC v AG
        }
        if (hetg1 == 'M' && hetg2 == 'S') {
            return SiteCompareType.DIFFERENT;  // AC v CG
        }
        if (hetg1 == 'M' && hetg2 == 'W') {
            return SiteCompareType.DIFFERENT;  // AC v AT
        }
        if (hetg1 == 'M' && hetg2 == 'Y') {
            return SiteCompareType.DIFFERENT;  // AC v CT
        }
        if (hetg1 == 'R' && hetg2 == 'K') {
            return SiteCompareType.DIFFERENT;  // AG v GT
        }
        if (hetg1 == 'R' && hetg2 == 'M') {
            return SiteCompareType.DIFFERENT;  // AG v AC
        }
        if (hetg1 == 'R' && hetg2 == 'S') {
            return SiteCompareType.DIFFERENT;  // AG v CG
        }
        if (hetg1 == 'R' && hetg2 == 'W') {
            return SiteCompareType.DIFFERENT;  // AG v AT
        }
        if (hetg1 == 'S' && hetg2 == 'K') {
            return SiteCompareType.DIFFERENT;  // CG v GT
        }
        if (hetg1 == 'S' && hetg2 == 'M') {
            return SiteCompareType.DIFFERENT;  // CG v AC
        }
        if (hetg1 == 'S' && hetg2 == 'R') {
            return SiteCompareType.DIFFERENT;  // CG v AG
        }
        if (hetg1 == 'S' && hetg2 == 'W') {
            return SiteCompareType.DIFFERENT;  // CG v AT
        }
        if (hetg1 == 'S' && hetg2 == 'Y') {
            return SiteCompareType.DIFFERENT;  // CG v CT
        }
        if (hetg1 == 'W' && hetg2 == 'K') {
            return SiteCompareType.DIFFERENT;  // AT v GT
        }
        if (hetg1 == 'W' && hetg2 == 'M') {
            return SiteCompareType.DIFFERENT;  // AT v AC
        }
        if (hetg1 == 'W' && hetg2 == 'R') {
            return SiteCompareType.DIFFERENT;  // AT v AG
        }
        if (hetg1 == 'W' && hetg2 == 'S') {
            return SiteCompareType.DIFFERENT;  // AT v CG
        }
        if (hetg1 == 'W' && hetg2 == 'Y') {
            return SiteCompareType.DIFFERENT;  // AT v CT
        }
        if (hetg1 == 'Y' && hetg2 == 'K') {
            return SiteCompareType.DIFFERENT;  // CT v GT
        }
        if (hetg1 == 'Y' && hetg2 == 'M') {
            return SiteCompareType.DIFFERENT;  // CT v AC
        }
        if (hetg1 == 'Y' && hetg2 == 'S') {
            return SiteCompareType.DIFFERENT;  // CT v CG
        }
        if (hetg1 == 'Y' && hetg2 == 'W') {
            return SiteCompareType.DIFFERENT;  // CT v AT
        }
        if (hetg1 == 'A' || hetg1 == 'C' || hetg1 == 'G' || hetg1 == 'T' || hetg2 == 'A' || hetg2 == 'C' || hetg2 == 'G' || hetg2 == 'T') {
            return SiteCompareType.DIFFERENT; // both must be variable to be compared (this is unlikely because then alleles.length==1)
        }

        return SiteCompareType.DIFFERENT; // default return
    }

    @Override
    public ImageIcon getIcon() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getButtonName() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getToolTipText() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy