net.maizegenetics.analysis.imputation.ImputationUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
package net.maizegenetics.analysis.imputation;
import net.maizegenetics.analysis.data.FileLoadPlugin;
import net.maizegenetics.analysis.data.FileLoadPlugin.TasselFileType;
import net.maizegenetics.dna.WHICH_ALLELE;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.util.BitSet;
import org.apache.log4j.Logger;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Pattern;
public class ImputationUtils {
private static final Logger myLogger = Logger.getLogger(ImputationUtils.class);
public static Pattern tab = Pattern.compile("\t");
public static void main(String[] args) {
if (args.length == 3 && args[0].equals("-xo")) {
//args[1] is the parentcall file, args[2] is the output file
exportCrossoverPositions(args[1], args[2]);
} else if (args.length == 4 && args[0].equals("-xo2")) {
exportCrossoverPositionsByParent(args[1], args[2], args[3]);
}
}
public static int[] order(int[] array) {
class SortElement implements Comparable {
int val;
int ndx;
SortElement(int x, int index) {
val = x;
ndx = index;
}
@Override
public int compareTo(SortElement se) {
return val - se.val;
}
}
int n = array.length;
SortElement[] sortArray = new SortElement[n];
for (int i = 0; i < n; i++) {
sortArray[i] = new SortElement(array[i], i);
}
Arrays.sort(sortArray);
int[] order = new int[n];
for (int i = 0; i < n; i++) {
order[i] = sortArray[i].ndx;
}
return order;
}
public static int[] reverseOrder(int[] array) {
class SortElement implements Comparable {
int val;
int ndx;
SortElement(int x, int index) {
val = x;
ndx = index;
}
@Override
public int compareTo(SortElement se) {
return se.val - val;
}
}
int n = array.length;
SortElement[] sortArray = new SortElement[n];
for (int i = 0; i < n; i++) {
sortArray[i] = new SortElement(array[i], i);
}
Arrays.sort(sortArray);
int[] order = new int[n];
for (int i = 0; i < n; i++) {
order[i] = sortArray[i].ndx;
}
return order;
}
public static GenotypeTable[] getTwoClusters(GenotypeTable gt, int[] parentIndex) {
int maxiter = 5;
//if the parents are in the data set use these as seeds
//if one parent is in the dataset pick the taxon farthest from it as the other seed
//if neither parent is in the dataset choose random seeds
int ntaxa = gt.numberOfTaxa();
int nsnps = gt.numberOfSites();
int seed1 = parentIndex[0];
int seed2 = parentIndex[1];
float[] loc1, loc2;
Random rand = new Random();
if (seed1 == -1) {
if (seed2 == -1) {
//both parents are not in the data set
seed1 = rand.nextInt(ntaxa);
loc1 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Minor)}, nsnps);
while (seed2 == -1 || seed1 == seed2) {
seed2 = rand.nextInt(ntaxa);
}
loc2 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Minor)}, nsnps);
} else {
//parent2 is in the data set
loc1 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(0, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(0, WHICH_ALLELE.Minor)}, nsnps);
loc2 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Minor)}, nsnps);
seed1 = 0;
float prevdist = getManhattanDistance(loc2, loc1, nsnps);
for (int t = 1; t < ntaxa; t++) {
float[] tloc = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(t, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
float dist = getManhattanDistance(loc2, tloc, nsnps);
if (dist > prevdist) {
prevdist = dist;
loc1 = tloc;
seed1 = t;
}
}
}
} else if (seed2 == -1) {
//parent1 is in the data set
loc1 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Minor)}, nsnps);
loc2 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(0, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(0, WHICH_ALLELE.Minor)}, nsnps);
seed2 = 0;
float prevdist = getManhattanDistance(loc1, loc2, nsnps);
for (int t = 1; t < ntaxa; t++) {
float[] tloc = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(t, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
float dist = getManhattanDistance(loc1, tloc, nsnps);
if (dist > prevdist) {
prevdist = dist;
loc2 = tloc;
seed2 = t;
}
}
} else {
//both parents are in the data set
loc1 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, WHICH_ALLELE.Minor)}, nsnps);
loc2 = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, WHICH_ALLELE.Minor)}, nsnps);
}
int[] size1 = new int[nsnps];
int[] size2 = new int[nsnps];
for (int i = 0; i < nsnps; i++) {
if (loc1[i] >= 0) size1[i] = 1;
if (loc2[i] >= 0) size2[i] = 1;
}
boolean[] isInCluster1 = new boolean[ntaxa];
isInCluster1[seed1] = true;
isInCluster1[seed2] = false;
//do initial cluster assignment
for (int t = 0; t < ntaxa; t++) {
if (t != seed1 && t != seed2) {
float[] tloc = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(t, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
float dist1 = getManhattanDistance(loc1, tloc, nsnps);
float dist2 = getManhattanDistance(loc2, tloc, nsnps);
if (dist1 <= dist2) {
isInCluster1[t] = true;
loc1 = getMeanLocation(loc1, size1, tloc, true, nsnps);
} else {
isInCluster1[t] = false;
loc2 = getMeanLocation(loc2, size2, tloc, true, nsnps);
}
}
}
//update cluster membership until there are no changes or for the maximum number of iterations
for (int iter = 0; iter < maxiter; iter++) {
boolean noChanges = true;
for (int t = 0; t < ntaxa; t++) {
float[] tloc = snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(t, WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
float dist1 = getManhattanDistance(loc1, tloc, nsnps);
float dist2 = getManhattanDistance(loc2, tloc, nsnps);
if (dist1 <= dist2 && isInCluster1[t] == false) {
isInCluster1[t] = true;
loc1 = getMeanLocation(loc1, size1, tloc, true, nsnps);
loc2 = getMeanLocation(loc2, size2, tloc, false, nsnps);
noChanges = false;
} else if (dist1 > dist2 && isInCluster1[t] == true){
isInCluster1[t] = false;
loc1 = getMeanLocation(loc1, size1, tloc, false, nsnps);
loc2 = getMeanLocation(loc2, size2, tloc, true, nsnps);
noChanges = false;
}
}
if (noChanges) break;
}
System.out.println("distance between clusters = " + getManhattanDistance(loc1, loc2, nsnps));
//make alignments based on the clusters
TaxaListBuilder builder1 = new TaxaListBuilder();
TaxaListBuilder builder2 = new TaxaListBuilder();
for (int t = 0; t < ntaxa; t++) {
if (isInCluster1[t]) builder1.add(gt.taxa().get(t));
else builder2.add(gt.taxa().get(t));
}
GenotypeTable gt1 = FilterGenotypeTable.getInstance(gt, builder1.build());
GenotypeTable gt2 = FilterGenotypeTable.getInstance(gt, builder2.build());
return new GenotypeTable[]{gt1, gt2};
}
public static GenotypeTable[] getTwoClusters(GenotypeTable inputAlignment, int minGametesPerTaxon) {
//filter out low coverage taxa
int ntaxa = inputAlignment.numberOfTaxa();
TaxaListBuilder builder = new TaxaListBuilder();
for (int t = 0; t < ntaxa; t++) {
if (inputAlignment.totalNonMissingForTaxon(t) >= minGametesPerTaxon) builder.add(inputAlignment.taxa().get(t));
}
TaxaList adequatelyCoveredTaxa = builder.build();
GenotypeTable myGenotypes;
if (adequatelyCoveredTaxa.size() < 10) {
myLogger.info("Included lines less than 10 in getTwoClusters, poor coverage in interval starting at " + inputAlignment.siteName(0));
return null;
} else {
myGenotypes = FilterGenotypeTable.getInstance(inputAlignment, adequatelyCoveredTaxa);
}
int ntrials = 5;
int maxiter = 5;
//if the parents are in the data set use these as seeds
//if one parent is in the dataset pick the taxon farthest from it as the other seed
//if neither parent is in the dataset choose random seeds
ntaxa = myGenotypes.numberOfTaxa();
int nsnps = myGenotypes.numberOfSites();
boolean[][] isInCluster1 = new boolean[ntrials][ntaxa];
int bestTrial = -1;
float maxDistance = 0;
Random rand = new Random();
float[][] taxaLocs = new float[ntaxa][nsnps];
for (int t = 0; t < ntaxa; t++) {
taxaLocs[t] = snpsAsFloatVector(new BitSet[]{myGenotypes.allelePresenceForAllSites(t, WHICH_ALLELE.Major), myGenotypes.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
}
for (int trial = 0; trial < ntrials; trial++) {
int seed1 = rand.nextInt(ntaxa);
int seed2 = -1;
while (seed2 == -1 || seed1 == seed2) {
seed2 = rand.nextInt(ntaxa);
}
isInCluster1[trial][seed1] = true;
isInCluster1[trial][seed2] = false;
//do initial cluster assignment
for (int t = 0; t < ntaxa; t++) {
if (t != seed1 && t != seed2) {
float dist1 = getManhattanDistance(taxaLocs[seed1], taxaLocs[t], nsnps);
float dist2 = getManhattanDistance(taxaLocs[seed2], taxaLocs[t], nsnps);
if (dist1 < dist2) {
isInCluster1[trial][t] = true;
} else if (dist1 > dist2){
isInCluster1[trial][t] = false;
} else if (rand.nextDouble() > 0.5) {
isInCluster1[trial][t] = true;
} else {
isInCluster1[trial][t] = false;
}
}
}
//update cluster membership until there are no changes or for the maximum number of iterations
float[][] meanLocs = new float[2][];
boolean badclusters = false;
for (int iter = 0; iter < maxiter; iter++) {
boolean noChanges = true;
int nCluster1 = 0;
int nCluster2 = 0;
for (int t = 0; t < ntaxa; t++) {
if (isInCluster1[trial][t]) nCluster1++;
else nCluster2++;
}
if (nCluster1 == 0 || nCluster2 == 0) {
badclusters = true;
break;
}
float[][] cluster1Locs = new float[nCluster1][];
float[][] cluster2Locs = new float[nCluster2][];
int countCluster1 = 0;
int countCluster2 = 0;
for (int t = 0; t < ntaxa; t++) {
if (isInCluster1[trial][t]) cluster1Locs[countCluster1++] = taxaLocs[t];
else cluster2Locs[countCluster2++] = taxaLocs[t];
}
meanLocs = new float[2][];
meanLocs[0] = getMeanLocation(cluster1Locs);
meanLocs[1] = getMeanLocation(cluster2Locs);
for (int t = 0; t < ntaxa; t++) {
float[] tloc = snpsAsFloatVector(new BitSet[]{myGenotypes.allelePresenceForAllSites(t, WHICH_ALLELE.Major), myGenotypes.allelePresenceForAllSites(t, WHICH_ALLELE.Minor)}, nsnps);
float dist1 = getManhattanDistance(meanLocs[0], tloc, nsnps);
float dist2 = getManhattanDistance(meanLocs[1], tloc, nsnps);
if (dist1 < dist2 && isInCluster1[trial][t] == false) {
isInCluster1[trial][t] = true;
noChanges = false;
} else if (dist1 > dist2 && isInCluster1[trial][t] == true){
isInCluster1[trial][t] = false;
noChanges = false;
}
}
if (noChanges) break;
}
if (badclusters == true) {
System.out.println("Trial " + trial + ": bad clustering, no distance could be calculated");
} else {
float distanceBetweenClusters = getManhattanDistance(meanLocs[0], meanLocs[1], nsnps);
if (distanceBetweenClusters > maxDistance) {
maxDistance = distanceBetweenClusters;
bestTrial = trial;
}
System.out.println("Trial " + trial + ": distance between clusters = " + distanceBetweenClusters);
}
}
//make genotype tables based on the clusters
TaxaListBuilder builder1 = new TaxaListBuilder();
TaxaListBuilder builder2 = new TaxaListBuilder();
for (int t = 0; t < ntaxa; t++) {
if (isInCluster1[bestTrial][t]) builder1.add(myGenotypes.taxa().get(t));
else builder2.add(myGenotypes.taxa().get(t));
}
GenotypeTable gt1 = FilterGenotypeTable.getInstance(myGenotypes, builder1.build());
GenotypeTable gt2 = FilterGenotypeTable.getInstance(myGenotypes, builder2.build());
return new GenotypeTable[]{gt1, gt2};
}
public static float[] snpsAsFloatVector(BitSet[] alleles, int nsnps) {
float[] result = new float[nsnps];
for (int s = 0; s < nsnps; s++) {
if (alleles[0].fastGet(s)) {
result[s] = 2;
if (alleles[1].fastGet(s)) result[s] = 1;
} else {
if (alleles[1].fastGet(s)) result[s] = 0;
// else result[s] = -1;
else result[s] = 1;
}
}
return result;
}
public static float getManhattanDistance(float[] loc, float[] t, int nsnps) {
float d = 0;
int nsites = 0;
for (int s = 0; s < nsnps; s++) {
if (loc[s] >= 0 && t[s] >= 0) {
d += Math.abs(loc[s] - t[s]);
nsites++;
}
}
return d / nsites;
}
public static float[] getMeanLocation(float[] loc, int[] size, float[] t, boolean add, int nsnps) {
float[] result = new float[nsnps];
if (add) {
for (int s = 0; s < nsnps; s++) {
if (t[s] >= 0) {
if (size[s] > 0) {
result[s] = (loc[s] * size[s] + t[s]) / ((float) (size[s] + 1));
size[s]++;
} else {
result[s] = t[s];
size[s] = 1;
}
} else {
result[s] = loc[s];
}
}
} else {
for (int s = 0; s < nsnps; s++) {
if (t[s] >= 0) {
if (size[s] > 1) {
result[s] = (loc[s] * size[s] - t[s]) / ((float) (size[s] - 1));
size[s]--;
} else if (size[s] == 1){
result[s] = 0;
size[s] = 0;
}
} else {
result[s] = loc[s];
}
}
}
return result;
}
public static float[] getMeanLocation(float[][] locs) {
int nsites = locs[0].length;
int ntaxa = locs.length;
float[] result = new float[nsites];
for (int s = 0; s < nsites; s++) {
int count = 0;
float sum = 0;
for (int t = 0; t < ntaxa; t++) {
if (!Float.isNaN(locs[t][s])) {
count++;
sum += locs[t][s];
}
if (count > 0) result[s] = sum / count;
else result[s] = Float.NaN;
}
}
return result;
}
public static void printAlleleStats(GenotypeTable gt, String name) {
int monoCount = 0;
int polyCount = 0;
int[] binCount = new int[21];
int nsites = gt.numberOfSites();
for (int s = 0; s < nsites; s++) {
if (gt.majorAlleleFrequency(s) > 0.75) monoCount++;
else {
polyCount++;
int bin = (int) Math.floor(20 * gt.majorAlleleFrequency(s));
binCount[bin]++;
}
}
System.out.println(name);
System.out.println("mono count = " + monoCount + ", poly count = " + polyCount);
System.out.print("bins: ");
for (int i = 0; i < 20; i++) System.out.print(" " + binCount[i]);
System.out.println();
System.out.println();
}
public static void mergeNonconsensusFiles(String dir, String match, String outfileName) {
File[] mergeFiles = filterFiles(dir, match);
int nfiles = mergeFiles.length;
String[] colLabel = new String[nfiles];
HashMap taxonMap = new HashMap();
String input;
String[] info;
for (int f = 0; f < nfiles; f++) {
System.out.println("processing" + mergeFiles[f].getName());
try {
BufferedReader br = new BufferedReader(new FileReader(mergeFiles[f]));
br.readLine();
input = br.readLine();
info = tab.split(input);
colLabel[f] = info[1];
while (input != null) {
info = tab.split(input);
String[] values = taxonMap.get(info[0]);
if (values == null) {
values = new String[nfiles];
for (int i = 0; i < nfiles; i++) values[i] = "";
taxonMap.put(info[0], values);
}
values[f] = info[2];
input = br.readLine();
}
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
File outfile = new File(dir, outfileName);
try {
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
StringBuilder sb = new StringBuilder("Taxon");
for (int i = 0; i < nfiles; i++) {
sb.append("\t").append(colLabel[i]);
}
bw.write(sb.toString());
bw.newLine();
LinkedList taxaList = new LinkedList(taxonMap.keySet());
Collections.sort(taxaList);
for (String taxon : taxaList) {
sb = new StringBuilder(taxon);
String[] values = taxonMap.get(taxon);
for (int f = 0; f < nfiles; f++) sb.append("\t").append(values[f]);
bw.write(sb.toString());
bw.newLine();
}
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static File[] filterFiles(String dir, String match) {
File matchdir = new File(dir);
final String pattern = new String(match);
File[] filteredFiles = matchdir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name.matches(pattern)) return true;
return false;
}
});
return filteredFiles;
}
public static void mergeFiles(File[] mergeFiles, int idcol, int datacol, int[] colOrder, String outfile) {
int nfiles = mergeFiles.length;
String input;
String[] info;
if (colOrder == null) colOrder = new int[]{0,2,3,4,5,6,7,8,9,1};
HashMap taxonMap = new HashMap();
for (int f = 0; f < nfiles; f++) {
System.out.println("processing" + mergeFiles[f].getName());
try {
BufferedReader br = new BufferedReader(new FileReader(mergeFiles[f]));
br.readLine();
input = br.readLine();
info = tab.split(input);
while (input != null) {
info = tab.split(input);
String[] values = taxonMap.get(info[idcol]);
if (values == null) {
values = new String[nfiles];
for (int i = 0; i < nfiles; i++) values[i] = "";
taxonMap.put(info[idcol], values);
}
values[f] = info[datacol];
input = br.readLine();
}
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
StringBuilder sb = new StringBuilder("Taxon");
for (int i = 0; i < nfiles; i++) {
sb.append("\t").append(i + 1);
}
sb.append("\t").append("average");
bw.write(sb.toString());
bw.newLine();
LinkedList taxaList = new LinkedList(taxonMap.keySet());
Collections.sort(taxaList);
for (String taxon : taxaList) {
sb = new StringBuilder(taxon);
String[] values = taxonMap.get(taxon);
double sum = 0;
double count = 0;
for (int f = 0; f < nfiles; f++) {
sb.append("\t").append(values[f]);
try {
sum += Double.parseDouble(values[f]);
count++;
} catch (NumberFormatException e) {
//do nothing
}
}
sb.append("\t").append(sum/count);
bw.write(sb.toString());
bw.newLine();
}
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void imputeLinkageMarkers(double interval, boolean hapmapFormat, String origsnpFile, String snpfilePattern, String outfilePattern) {
String[] nuc = new String[]{"A","M","C"};
HashMap byteToNumberString = new HashMap();
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("A"), "0");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("M"), "1");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("C"), "2");
HashMap byteToNumber = new HashMap();
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("A"), 0.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("M"), 1.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("C"), 2.0);
Pattern tab = Pattern.compile("\t");
BufferedWriter bw = null;
byte missingByte = NucleotideAlignmentConstants.getNucleotideDiploidByte("NN");
int chromosome = 1;
String outFilename = String.format(outfilePattern, interval);
//String snpFilename = "/Volumes/Macintosh HD 2/data/namgbs/genos_1217/NAM282_20111217_scv10mF8maf002_mgs_E1pLD5_chr" + chromosome + ".hmp.abhv2.impute5to3stateHMM.txt";
String snpFilename = origsnpFile;
// if (hapmapFormat) outFilename = "/Volumes/Macintosh HD 2/data/namgbs/genos_1217/NAM282_20111217_scv10mF8maf002_mgs_E1pLD5_hmp_" + interval + "cmsnps.hmp.txt";
// else outFilename = "/Volumes/Macintosh HD 2/data/namgbs/genos_1217/NAM282_20111217_scv10mF8maf002_mgs_E1pLD5_hmp_" + interval + "cmsnps.txt";
AGPMap agpmap = new AGPMap();
int ntaxa = 0;
try {
BufferedReader br = new BufferedReader(new FileReader(snpFilename));
bw = new BufferedWriter(new FileWriter(outFilename));
String header = br.readLine();
String[] info = tab.split(header);
int ncol = info.length;
ntaxa = ncol - 11;
if (hapmapFormat) bw.write("rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode");
else bw.write("Snp\tallele\tchr\tpos\tcm");
for (int t = 11; t < ncol; t++) {
bw.write("\t");
bw.write(info[t]);
}
bw.newLine();
br.close();
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
String[] family = new String[]{"Z001","Z002","Z003","Z004","Z005","Z006","Z007","Z008","Z009","Z010","Z011","Z012","Z013","Z014","Z015","Z016","Z018","Z019","Z020","Z021","Z022","Z023","Z024","Z025","Z026"};
//impute data for each chromosome
for (int chr = 1; chr <=10; chr++) {
String chrstr = Integer.toString(chr);
for (int fam = 0; fam < 25; fam++) {
System.out.println("Imputing data for chromosome " + chr + ", family " + family[fam] + ".");
// snpFilename = "/Volumes/Macintosh HD 2/results/recombination study/nam/final.Panzea/namibm.combined.hapmap.f.05r.5.chr" + chr + ".family."+ family[fam] + "parents.hmp.txt";
snpFilename = String.format(snpfilePattern, chr, family[fam]);
GenotypeTable a = ImportUtils.readFromHapmap(snpFilename);
int nsnps = a.numberOfSites();
double startgenpos = agpmap.getCmFromPosition(chr, a.positions().chromosomalPosition(0));
//round up to nearest interval
startgenpos = ((double) (Math.ceil(startgenpos / interval))) * interval;
double endgenpos = agpmap.getCmFromPosition(chr, a.positions().chromosomalPosition(nsnps - 1));
//round down to nearest interval
endgenpos = ((double)(Math.floor(endgenpos / interval))) * interval;
int leftflank = 0;
int rightflank = 0;
try {
for (double curpos = startgenpos; curpos <= endgenpos; curpos += interval) {
int physpos = agpmap.getPositionFromCm(chr, curpos);
String physposString = Integer.toString(physpos);
String genpos = Double.toString(curpos);
bw.write("S_");
bw.write(physposString);
bw.write("\timputed\t");
bw.write(chrstr);
bw.write("\t");
bw.write(physposString);
bw.write("\t");
bw.write(genpos);
if (hapmapFormat) bw.write("\tNA\tNA\tNA\tNA\tNA\tNA");
while (physpos > a.positions().chromosomalPosition(rightflank)) rightflank++;
leftflank = rightflank - 1;
if (hapmapFormat) {
for (int t = 0; t < ntaxa; t++) {
bw.write("\t");
int leftndx = leftflank;
int rightndx = rightflank;
while (a.genotype(t, leftndx) == missingByte && leftndx > 0) leftndx--;
while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) rightndx++;
byte leftByte = a.genotype(t, leftndx);
byte rightByte = a.genotype(t, rightndx);
if (leftByte == missingByte) {
if (rightByte == missingByte) bw.write("N");
else bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte));
}
else if (rightByte == missingByte) bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
else if (a.genotype(t, leftndx) == a.genotype(t, rightndx)) bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
else bw.write("N");
}
} else {
for (int t = 0; t < ntaxa; t++) {
bw.write("\t");
int leftndx = leftflank;
int rightndx = rightflank;
while (a.genotype(t, leftndx) == missingByte && leftndx > 0) leftndx--;
while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) rightndx++;
byte leftByte = a.genotype(t, leftndx);
byte rightByte = a.genotype(t, rightndx);
if (leftByte == missingByte) {
if (rightByte == missingByte) bw.write("-");
else bw.write(byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte)));
}
else if (rightByte == missingByte) bw.write(byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte)));
else if (a.genotype(t, leftndx) == a.genotype(t, rightndx)) bw.write(byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte)));
else {
double leftval = byteToNumber.get(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
double rightval = byteToNumber.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte));
int leftpos = a.positions().chromosomalPosition(leftndx);
int rightpos = a.positions().chromosomalPosition(rightndx);
double pd = ((double) (physpos - leftpos)) / ((double) (rightpos - leftpos));
double thisval = leftval * (1 - pd) + rightval * pd;
bw.write(Double.toString(thisval));
};
}
}
bw.newLine();
}
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
}
}
try {
bw.close();
} catch(IOException e) {
e.printStackTrace();
}
System.out.println("Finished imputing markers.");
}
//use this for imputation of July final build results
//modified for later builds
public static void imputeLinkageMarkersAcrossFamilies(double interval, boolean hapmapFormat, boolean excludeTaxa) {
class ImputedSnp {
int physicalPos;
double geneticPos;
StringBuilder sb = new StringBuilder();
}
LinkedList excludeList = getListOfTaxa("/Volumes/Macintosh HD 2/results/recombination study/nam/final.Panzea.consolidated.B/Nam.exclude.release.1.txt");
String[] nuc = new String[]{"A","M","C"};
Pattern tab = Pattern.compile("\t");
byte missingByte = NucleotideAlignmentConstants.getNucleotideDiploidByte("NN");
AGPMap agpmap = new AGPMap();
//impute data for each chromosome
for (int chr = 1; chr <=10; chr++) {
// File snpfiledir = new File("/Volumes/Macintosh HD 2/results/recombination study/nam/final.Panzea.consolidated.B");
// File snpfiledir = new File("/Volumes/Macintosh HD 2/results/recombination study/nam/final.Panzea.consolidated");
File snpfiledir = new File("/Volumes/Macintosh HD 2/data/zea/build2.6/nam/imputed.var.plusfounders");
final String chrname = "chr" + chr + ".family";
File[] snpFiles = snpfiledir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
// if (name.startsWith("nam.consolidated") && name.contains(chrname)) return true;
if (name.startsWith("USNAM_build2.6_imputed_var") && name.contains(chrname)) return true;
return false;
}
});
String chrstr = Integer.toString(chr);
//Get the minimum and maximum physical positions for all families
int minpos = Integer.MAX_VALUE;
int maxpos = 0;
for (File snpfile : snpFiles) {
try {
BufferedReader br = new BufferedReader(new FileReader(snpfile));
br.readLine();
String input = br.readLine();
String[] info = tab.split(input, 5);
int firstpos = Integer.parseInt(info[3]);
String testInput;
while ((testInput = br.readLine()) != null) {
input = testInput;
}
info = tab.split(input, 5);
int lastpos = Integer.parseInt(info[3]);
minpos = Math.min(minpos, firstpos);
maxpos = Math.max(maxpos, lastpos);
br.close();
} catch(IOException e) {
e.printStackTrace();
System.exit(-1);
}
}
double startgenpos = agpmap.getCmFromPosition(chr, minpos);
//round up to nearest interval
startgenpos = ((double) (Math.ceil(startgenpos / interval))) * interval;
int nIntervals = (int) ((agpmap.getCmFromPosition(chr, maxpos) - startgenpos) / interval);
int nImputedSnps = nIntervals + 1;
LinkedList snpList = new LinkedList();
double curpos = startgenpos;
for (int i = 0; i < nImputedSnps; i++) {
ImputedSnp snp = new ImputedSnp();
snp.geneticPos = curpos;
curpos += interval;
snp.physicalPos = agpmap.getPositionFromCm(chr, curpos);
snpList.add(snp);
}
StringBuilder taxaHeader = new StringBuilder();
for (File snpfile : snpFiles) {
System.out.println("Imputing data for " + snpfile.getName() + ".");
GenotypeTable a = ImportUtils.readFromHapmap(snpfile.getPath());
boolean b73isA = isB73HaplotypeA(a);
HashMap byteToNumberString = new HashMap();
HashMap byteToNumber = new HashMap();
HashMap byteToNucleotide = new HashMap();
if (b73isA) {
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "0");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "1");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "1");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "2");
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), 0.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), 1.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), 1.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), 2.0);
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "A");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "M");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "M");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "C");
} else {
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "2");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "1");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "1");
byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "0");
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), 2.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), 1.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), 1.0);
byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), 0.0);
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "C");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "M");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "M");
byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "A");
}
int nsnps = a.numberOfSites();
int ntaxa = a.numberOfTaxa();
int leftflank = 0;
int rightflank = 0;
TaxaList myTaxa = a.taxa();
for (int t = 0; t < ntaxa; t++) {
// if (!a.getTaxaName(t).startsWith(excludeTaxon) && !excludeList.contains(a.getTaxaName(t))) taxaHeader.append("\t").append(a.getFullTaxaName(t));
// if (a.getTaxaName(t).startsWith("Z0")) taxaHeader.append("\t").append(a.getFullTaxaName(t));
if (useTaxon(myTaxa.taxaName(t), excludeList)) taxaHeader.append("\t").append(myTaxa.taxaName(t));
}
for (ImputedSnp isnp : snpList) {
while (rightflank < nsnps && isnp.physicalPos > a.positions().chromosomalPosition(rightflank)) rightflank++;
// System.out.println("rightflank= " + rightflank + ", snp physicalPos= " + isnp.physicalPos + ", position of rightflank= " + a.getPositionInLocus(rightflank)); //debug
leftflank = rightflank - 1;
// System.out.println("leftflank= " + leftflank + ", snp physicalPos= " + isnp.physicalPos + ", position of leftflank= " + a.getPositionInLocus(leftflank)); //debug
if (hapmapFormat) {
for (int t = 0; t < ntaxa; t++) {
// if (a.getTaxaName(t).startsWith(excludeTaxon)) continue;
// if (excludeList.contains(a.getTaxaName(t))) continue;
// if (!a.getTaxaName(t).startsWith("Z0")) continue;
if (!useTaxon(a.taxa().taxaName(t), excludeList)) continue;
isnp.sb.append("\t");
byte leftByte, rightByte;
if (leftflank < 0) leftByte = missingByte;
else {
int leftndx = leftflank;
while (a.genotype(t, leftndx) == missingByte && leftndx > 0) leftndx--;
leftByte = a.genotype(t, leftndx);
}
if (rightflank > nsnps - 1) rightByte = missingByte;
else {
int rightndx = rightflank;
while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) rightndx++;
rightByte = a.genotype(t, rightndx);
}
if (leftByte == missingByte) {
if (rightByte == missingByte) isnp.sb.append("N");
else isnp.sb.append(byteToNucleotide.get(rightByte));
} else if (rightByte == missingByte) isnp.sb.append(byteToNucleotide.get(leftByte));
else if (leftByte == rightByte) isnp.sb.append(byteToNucleotide.get(leftByte));
else isnp.sb.append("N");
}
} else {
for (int t = 0; t < ntaxa; t++) {
// if (a.getTaxaName(t).startsWith(excludeTaxon)) continue;
// if (excludeList.contains(a.getTaxaName(t))) continue;
// if (!a.getTaxaName(t).startsWith("Z0")) continue;
if (!useTaxon(a.taxa().taxaName(t), excludeList)) continue;
isnp.sb.append("\t");
byte leftByte, rightByte;
int leftndx = leftflank;
if (leftflank < 0) leftByte = missingByte;
else {
while (a.genotype(t, leftndx) == missingByte && leftndx > 0) leftndx--;
leftByte = a.genotype(t, leftndx);
}
int rightndx = rightflank;
if (rightflank > nsnps - 1) rightByte = missingByte;
else {
while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) rightndx++;
rightByte = a.genotype(t, rightndx);
}
if (leftByte == missingByte) {
if (rightByte == missingByte) isnp.sb.append("-");
else isnp.sb.append(byteToNumberString.get(rightByte));
}
else if (rightByte == missingByte) isnp.sb.append(byteToNumberString.get(leftByte));
else if (leftByte == rightByte) isnp.sb.append(byteToNumberString.get(rightByte));
else {
double leftval = byteToNumber.get(leftByte);
double rightval = byteToNumber.get(rightByte);
int leftpos = a.positions().chromosomalPosition(leftndx);
int rightpos = a.positions().chromosomalPosition(rightndx);
double pd = ((double) (isnp.physicalPos - leftpos)) / ((double) (rightpos - leftpos));
double thisval = leftval * (1 - pd) + rightval * pd;
isnp.sb.append(Double.toString(thisval));
}
// System.out.println("leftflank = " + leftflank + ", rightflank = " + rightflank + ", leftndx = " + leftndx + ", rightndx = " + rightndx + ", leftbyte = " + NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte) + ", rightbyte = " + NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte) + ", imputed value = " + isnp.sb.charAt(isnp.sb.length() - 1));
}
}
}
}
//write out the chromosome file here
File outfile;
// if (hapmapFormat) outfile = new File(snpfiledir, "imputedMarkers.release.1.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.B.hmp.txt");
// else outfile = new File(snpfiledir, "imputedMarkers.release.1.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.B.txt");
// if (hapmapFormat) outfile = new File(snpfiledir, "imputedMarkers.release.1.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.hmp.txt");
// else outfile = new File(snpfiledir, "imputedMarkers.release.1.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.txt");
// if (hapmapFormat) outfile = new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.B.hmp.txt");
// else outfile = new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr +"." + interval +"cm.final.Panzea.consolidated.B.txt");
if (hapmapFormat) outfile = new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr +"." + interval +"cm.USNAM2.6.imputed.var.hmp.txt");
else outfile = new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr +"." + interval +"cm.USNAM2.6.imputed.var.txt");
try{
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
if (hapmapFormat) {
bw.write("rs#\talleles\tchrom\tpos\tcm\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode");
} else {
bw.write("Snp\tallele\tchr\tpos\tcm");
}
bw.write(taxaHeader.toString());
bw.write("\n");
for (ImputedSnp isnp : snpList) {
bw.write(String.format("S%d_%d\tNA\t", chr, isnp.physicalPos));
bw.write(chrstr);
bw.write("\t");
bw.write(Integer.toString(isnp.physicalPos));
bw.write("\t");
bw.write(Double.toString(isnp.geneticPos));
if (hapmapFormat) bw.write("\tNA\tNA\tNA\tNA\tNA\tNA");
bw.write(isnp.sb.toString());
bw.write("\n");
}
bw.close();
} catch(IOException e) {
}
}
}
public static boolean useTaxon(String name, LinkedList excludelist){
if (excludelist != null) {
if (name.startsWith("Z0") & !excludelist.contains(name)) return true;
} else {
if (name.startsWith("Z0")) return true;
}
return false;
}
public static boolean isB73HaplotypeA(GenotypeTable a) {
TaxaList myTaxa = a.taxa();
int ndx = myTaxa.indexOf("B73(PI550473):MRG:2:250027110");
int nsnps = a.numberOfSites();
HashMap alleleCounts = new HashMap();
for (int s = 0; s < nsnps; s++) {
Byte allele = a.genotype(ndx, s);
Integer count = alleleCounts.get(allele);
if (count == null) alleleCounts.put(allele, 1);
else alleleCounts.put(allele, 1 + count);
}
// System.out.println("B73 allele counts:");
Byte bestAllele = -1;
int maxCount = 0;
for (Byte b:alleleCounts.keySet()) {
int count = alleleCounts.get(b);
// System.out.println(NucleotideAlignmentConstants.getNucleotideIUPAC(b) + ", " + count);
if (count > maxCount) {
maxCount = count;
bestAllele = b;
}
}
if (bestAllele.byteValue() == NucleotideAlignmentConstants.getNucleotideDiploidByte('A')) return true;
return false;
}
public static void imputeLinkageMarkersFrom1106(double interval) {
//the input data
String snpFilename = "/Volumes/Macintosh HD 2/data/namgbs/ImputedMarkerGenotypes_flowering_traits_092909.txt";
String outFilename = "/Volumes/Macintosh HD 2/results/namgbs/jointlinkage/array1106/imputedMarkers.1106.allchr.txt";
ArrayList genotypes = new ArrayList();
ArrayList taxanames = new ArrayList();
int nmarkers = 1106;
int ntaxa;
AGPMap agpmap = new AGPMap();
BufferedWriter bw = null;
try {
BufferedReader br = new BufferedReader(new FileReader(snpFilename));
br.readLine();
String input;
while ((input = br.readLine()) != null) {
String[] info = tab.split(input);
float[] geno = new float[nmarkers];
for (int i = 0; i < nmarkers; i++) geno[i] = Float.parseFloat(info[i+5]);
genotypes.add(geno);
taxanames.add(info[0]);
}
br.close();
bw = new BufferedWriter(new FileWriter(outFilename));
bw.write("Snp\tallele\tchr\tpos\tcm");
for (String taxon:taxanames) {
bw.write("\t");
bw.write(taxon);
}
bw.write("\n");
} catch(IOException e) {
e.printStackTrace();
System.exit(-1);
}
//impute data for each chromosome
ntaxa = taxanames.size();
for (int chr = 1; chr <=10; chr++) {
String chrstr = Integer.toString(chr);
double startgenpos = agpmap.getFirstGeneticPosition(chr);
//round down to nearest interval
startgenpos = ((double) (Math.floor(startgenpos / interval))) * interval;
double endgenpos = agpmap.getLastGeneticPosition(chr);
//round up to nearest interval
endgenpos = ((double)(Math.ceil(endgenpos / interval))) * interval;
try {
for (double curpos = startgenpos; curpos <= endgenpos; curpos += interval) {
int physpos = agpmap.getPositionFromCm(chr, curpos);
String physposString = Integer.toString(physpos);
String genpos = Double.toString(curpos);
bw.write("S");
bw.write(chrstr);
bw.write("_");
bw.write(physposString);
bw.write("\timputed\t");
bw.write(chrstr);
bw.write("\t");
bw.write(physposString);
bw.write("\t");
bw.write(genpos);
int[] flanks = agpmap.getFlankingMarkerIndices(chr, curpos);
for (int t = 0; t < ntaxa; t++) {
bw.write("\t");
double val;
if (flanks[0] < 0) val = genotypes.get(t)[flanks[1]];
else if(flanks[1] >= nmarkers) val = genotypes.get(t)[flanks[0]];
else if (flanks[0] == flanks[1]) val = genotypes.get(t)[flanks[0]];
else {
double pd = (curpos - agpmap.getGeneticPos(flanks[0])) / (agpmap.getGeneticPos(flanks[1]) - agpmap.getGeneticPos(flanks[0]));
val = genotypes.get(t)[flanks[0]] * (1 - pd) + genotypes.get(t)[flanks[1]] * pd;
}
bw.write(Double.toString(val));
}
bw.newLine();
}
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
}
try {
bw.close();
} catch(IOException e) {
e.printStackTrace();
}
System.out.println("Finished imputing markers.");
}
public static LinkedList getListOfTaxa(String filename) {
LinkedList taxaList = new LinkedList();
try {
BufferedReader br = new BufferedReader(new FileReader(filename));
String taxon;
while((taxon = br.readLine()) != null) taxaList.add(taxon);
br.close();
} catch(IOException e){
e.printStackTrace();
System.exit(-1);
}
return taxaList;
}
public static void serializePhasedHaplotypes(Map phasedHaps, String filename) {
try {
FileOutputStream fos = new FileOutputStream(new File(filename));
ObjectOutputStream oos = new ObjectOutputStream(fos);
oos.writeObject(phasedHaps);
oos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static Map restorePhasedHaplotypes(Path restorePath) {
try {
FileInputStream fis = new FileInputStream(restorePath.toFile());
ObjectInputStream ois = new ObjectInputStream(fis);
Map phasedHaps = (Map) ois.readObject();
ois.close();
return phasedHaps;
} catch (IOException | ClassNotFoundException e) {
e.printStackTrace();
}
return null;
}
public static void exportCrossoverPositions(String parentcallFilename, String outputFilename) {
byte NN = GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
System.out.println("Starting exportCrossoverPositions.");
File genoFile = new File(parentcallFilename);
FileLoadPlugin flp = new FileLoadPlugin(null, false);
flp.setTheFileType(TasselFileType.Unknown);
flp.setOpenFiles(new File[]{genoFile});
GenotypeTable myGeno = (GenotypeTable) flp.performFunction(null).getData(0).getData();
int ntaxa = myGeno.numberOfTaxa();
int nsites = myGeno.numberOfSites();
byte[] prevGeno = myGeno.genotypeAllTaxa(0);
Chromosome currChrom = myGeno.chromosome(0);
int[] startpos = new int[nsites];
try (BufferedWriter bw = Files.newBufferedWriter(Paths.get(outputFilename))) {
bw.write("taxon\tchr\tstart\tend\n");
for (int s = 1; s < nsites; s++) {
if (currChrom != myGeno.chromosome(s)) {
currChrom = myGeno.chromosome(s);
prevGeno = myGeno.genotypeAllTaxa(s);
} else {
byte[] siteGeno = myGeno.genotypeAllTaxa(s);
for (int t = 0; t < ntaxa; t++) {
if (prevGeno[t] == NN) {
prevGeno[t] = siteGeno[t];
startpos[t] = myGeno.chromosomalPosition(s);
}
else if (siteGeno[t] == NN) {
//do nothing
} else {
if (siteGeno[t] != prevGeno[t]) {
//record a crossover
bw.write(String.format("%s\t%s\t%d\t%d\n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s)));
prevGeno[t] = siteGeno[t];
}
startpos[t] = myGeno.chromosomalPosition(s);
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("Finished exporting crossover positions.");
}
public static void exportCrossoverPositionsByParent(String parentcallFilename, String outputMomFilename, String outputDadFilename) {
byte AA = NucleotideAlignmentConstants.getNucleotideDiploidByte("AA");
byte CC = NucleotideAlignmentConstants.getNucleotideDiploidByte("CC");
byte GG = NucleotideAlignmentConstants.getNucleotideDiploidByte("GG");
byte TT = NucleotideAlignmentConstants.getNucleotideDiploidByte("TT");
byte NN = GenotypeTable.UNKNOWN_DIPLOID_ALLELE;
System.out.println("Starting exportCrossoverPositions.");
File genoFile = new File(parentcallFilename);
FileLoadPlugin flp = new FileLoadPlugin(null, false);
flp.setTheFileType(TasselFileType.Unknown);
flp.setOpenFiles(new File[]{genoFile});
GenotypeTable myGeno = (GenotypeTable) flp.performFunction(null).getData(0).getData();
int ntaxa = myGeno.numberOfTaxa();
int nsites = myGeno.numberOfSites();
byte[] prevGeno = myGeno.genotypeAllTaxa(0);
Chromosome currChrom = myGeno.chromosome(0);
int[] startpos = new int[nsites];
try {
PrintWriter pwMom = new PrintWriter(outputMomFilename);
PrintWriter pwDad = new PrintWriter(outputDadFilename);
pwMom.println("taxon\tchr\tstart\tend");
pwDad.println("taxon\tchr\tstart\tend");
for (int s = 1; s < nsites; s++) {
if (currChrom != myGeno.chromosome(s)) {
currChrom = myGeno.chromosome(s);
prevGeno = myGeno.genotypeAllTaxa(s);
} else {
byte[] siteGeno = myGeno.genotypeAllTaxa(s);
for (int t = 0; t < ntaxa; t++) {
if (prevGeno[t] == NN) {
prevGeno[t] = siteGeno[t];
startpos[t] = myGeno.chromosomalPosition(s);
}
else if (siteGeno[t] == NN) {
//do nothing
} else {
if (siteGeno[t] != prevGeno[t]) {
//record a crossover
if (siteGeno[t] == AA) {
if (prevGeno[t] == CC) pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == GG) pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == TT) {
pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
}
} else if (siteGeno[t] == CC) {
if (prevGeno[t] == AA) pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == TT) pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == GG) {
pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
}
} else if (siteGeno[t] == GG) {
if (prevGeno[t] == TT) pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == AA) pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == CC) {
pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
}
} else if (siteGeno[t] == TT) {
if (prevGeno[t] == GG) pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == CC) pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
else if (prevGeno[t] == AA) {
pwDad.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
pwMom.printf("%s\t%s\t%d\t%d%n", myGeno.taxaName(t), currChrom.getName(), startpos[t], myGeno.chromosomalPosition(s));
}
}
prevGeno[t] = siteGeno[t];
}
startpos[t] = myGeno.chromosomalPosition(s);
}
}
}
}
pwMom.close();
pwDad.close();
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("Finished exporting crossover positions.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy