net.maizegenetics.dna.snp.io.ReadNumericMarkerUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
package net.maizegenetics.dna.snp.io;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Optional;
import java.util.regex.Pattern;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GeneralPosition;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.map.PositionListBuilder;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.score.ReferenceProbability;
import net.maizegenetics.dna.snp.score.ReferenceProbabilityBuilder;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.util.Utils;
public class ReadNumericMarkerUtils {
//prevents instantiation
private ReadNumericMarkerUtils() {
}
/**
* @param inputFile the input file with TASSEL v3 annotations or with no
* input directives
* @return
* @throws IOException
*/
public static GenotypeTable readNumericMarkerFile(String inputFile) throws IOException {
BufferedReader br = Utils.getBufferedReader(inputFile);
String inputline = br.readLine();
Pattern sep = Pattern.compile("\\s+");
String[] markerName = null;
int numberOfColumns = 0;
//process header rows and count the non-blank rows
int numberOfDataLines = 0;
while (inputline != null) {
inputline = inputline.trim();
String[] parsedline = sep.split(inputline);
if (parsedline.length > 1 && !inputline.startsWith("<") && !inputline.startsWith("#")) {
numberOfDataLines++;
} else if (parsedline[0].toUpperCase().equals("")) {
markerName = processHeader(numberOfColumns, parsedline, inputFile);
numberOfColumns = markerName.length;
}
inputline = br.readLine();
}
br.close();
if (numberOfDataLines == 0) {
StringBuilder msg = new StringBuilder("Error in ");
msg.append(inputFile);
msg.append(": Missing taxa values.");
throw new IllegalArgumentException(msg.toString());
}
if (numberOfColumns == 0) {
StringBuilder msg = new StringBuilder("Error in ");
msg.append(inputFile);
msg.append(": Missing taxa data values.");
throw new IllegalArgumentException(msg.toString());
}
//process body of data: we needed numberOfColumns and numberOfDataLines from above
String[][] textdata = new String[numberOfColumns][numberOfDataLines];
String[] taxanames = new String[numberOfDataLines];
br = Utils.getBufferedReader(inputFile);
inputline = br.readLine();
int totallines = 0;
int linecount = 0;
while (inputline != null) {
totallines++;
inputline = inputline.trim();
String[] parsedline = sep.split(inputline);
if (parsedline.length > 1 && !inputline.startsWith("<") && !inputline.startsWith("#")) {
if (parsedline.length != numberOfColumns + 1) {
StringBuilder msg = new StringBuilder("Error in ");
msg.append(inputFile);
msg.append(" line ").append(totallines);
msg.append(": Incorrect number of data values.");
throw new IllegalArgumentException(msg.toString());
}
taxanames[linecount] = parsedline[0];
for (int c = 0; c < numberOfColumns; c++) {
textdata[c][linecount] = parsedline[c + 1];
}
linecount++;
}
inputline = br.readLine();
}
br.close();
TaxaList tL = new TaxaListBuilder().addAll(taxanames).build();
ReferenceProbabilityBuilder rpb = ReferenceProbabilityBuilder.getInstance(numberOfDataLines,
numberOfColumns, tL);
//Create a list of values for each taxon.
for (int indexR = 0; indexR < numberOfDataLines; indexR++) {
float[] fvalues = new float[numberOfColumns];
for (int indexC = 0; indexC < numberOfColumns; indexC++) {
// Create array of floats - these are values for each taxon
// Note when we read a line at a time when processing the file, the
// text array was defined as textData[columns][rows], sa access it thus
if (textdata[indexC][indexR].equalsIgnoreCase("NaN")
|| textdata[indexC][indexR].equalsIgnoreCase("NA")
|| textdata[indexC][indexR].equals(".")) {
fvalues[indexC] = Float.NaN;
} else {
try {
fvalues[indexC] = Float.parseFloat(textdata[indexC][indexR]);
} catch (Exception e) {
throw new IllegalArgumentException("ReadNumericMarkerUtils: readNumericMarkerFile: Can't convert: " + textdata[indexC][indexR] + " to a number on data line: " + indexR);
}
}
}
rpb.addTaxon(indexR, fvalues); // taxon is the row.
}
ReferenceProbability rp = rpb.build(); // build does the "new"
// Build PositionList for GenotypeTable
PositionListBuilder posBuilder = new PositionListBuilder();
for (int mNum = 0; mNum < numberOfColumns; mNum++) {
String snpname = markerName[mNum];
Optional
© 2015 - 2025 Weber Informatics LLC | Privacy Policy