net.maizegenetics.pangenome.GVCFStats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome;
import net.maizegenetics.util.TableReportBuilder;
import net.maizegenetics.util.TableReportUtils;
import net.maizegenetics.util.Utils;
import java.io.BufferedReader;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* @author Terry Casstevens Created April 12, 2017
*/
public class GVCFStats {
public static void main(String[] args) {
String dir = args[0];
int minPos = 0;
int maxPos = Integer.MAX_VALUE;
List filenames = new ArrayList<>();
for (File file : new File(dir).listFiles()) {
String filename = file.getAbsolutePath();
if (filename.endsWith(".g.vcf")) {
filenames.add(filename);
}
}
int numColumns = filenames.size() + 1;
String[] columnNames = new String[numColumns];
columnNames[0] = "Stat";
for (int i = 0; i < filenames.size(); i++) {
columnNames[i + 1] = Utils.getFilename(filenames.get(i));
}
TableReportBuilder builder = TableReportBuilder.getInstance("GVCF Stats", columnNames);
String[] startPosRow = new String[numColumns];
startPosRow[0] = "Start Pos";
String[] endPosRow = new String[numColumns];
endPosRow[0] = "End Pos";
String[] aveDepthRow = new String[numColumns];
aveDepthRow[0] = "Ave Depth";
String[] bpNonZeroCoverage = new String[numColumns];
bpNonZeroCoverage[0] = "bp non zero coverage";
String[] numNonReferenceRow = new String[numColumns];
numNonReferenceRow[0] = "num non reference";
String[] numHomozygousRow = new String[numColumns];
numHomozygousRow[0] = "Num Homozygous Ref";
String[] numHomozygousAltRow = new String[numColumns];
numHomozygousAltRow[0] = "Num Homozygous Alt";
String[] numHomozygousAlt1Row = new String[numColumns];
numHomozygousAlt1Row[0] = "Num Homozygous Alt Depth 1";
String[] numHomozygousAlt2Row = new String[numColumns];
numHomozygousAlt2Row[0] = "Num Homozygous Alt Depth 2";
String[] numHomozygousAlt3Row = new String[numColumns];
numHomozygousAlt3Row[0] = "Num Homozygous Alt Depth 3+";
String[] numHeterozygousRow = new String[numColumns];
numHeterozygousRow[0] = "Num Heterozygous";
int numBins = 175;
String[][] depthBinRows = new String[numBins][numColumns];
for (int i = 0; i < numBins; i++) {
depthBinRows[i][0] = String.valueOf(i);
}
for (int f = 0; f < filenames.size(); f++) {
String filename = filenames.get(f);
if (filename.endsWith(".g.vcf")) {
System.out.println(Utils.getFilename(filename) + "...");
try (BufferedReader reader = Utils.getBufferedReader(filename)) {
String line = reader.readLine();
while (line != null && line.startsWith("#")) {
line = reader.readLine();
}
int startPos = Integer.MAX_VALUE;
int endPos = Integer.MIN_VALUE;
int totalBPs = 0;
int totalDP = 0;
int[] depthBins = new int[numBins];
int numHomozygous = 0;
int numHomozygousAlt = 0;
int numHomozygousAlt1 = 0;
int numHomozygousAlt2 = 0;
int numHomozygousAlt3 = 0;
int numHeterozygous = 0;
int numNotReference = 0;
while (line != null) {
// 0 1 2 3 4 5 6 7 8 9
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT W22
// 8 17639 . C . . END=17789 GT:DP:GQ:MIN_DP:PL 0:12:99:8:0,140
// 8 17790 . T A, 175.00 . DP=5;MLEAC=1,0;MLEAF=1.000,0.000;RAW_MQ=18000.00 GT:AD:DP:GQ:PL:SB 1:0,5,0:5:99:205,0,205:0,0,2,3
String[] tokens = line.split("\t");
String chr = tokens[0];
int start = Integer.parseInt(tokens[1]);
if (start < startPos) {
startPos = start;
}
int end = start;
String id = tokens[2];
String ref = tokens[3];
String alt = tokens[4];
String qual = tokens[5];
String filter = tokens[6];
String[] info = tokens[7].split(";");
for (String current : info) {
String[] keyValue = current.split("=");
if (keyValue[0].equals("END")) {
end = Integer.parseInt(keyValue[1]);
}
}
if (end > endPos) {
endPos = end;
}
int length = end - start + 1;
totalBPs += length;
if (!alt.equals("")) {
numNotReference += length;
}
String[] formats = tokens[8].split(":");
String[] values = tokens[9].split(":");
int numFormats = formats.length;
if (numFormats != values.length) {
throw new IllegalArgumentException("unequal formats");
}
boolean isHomo = true;
boolean isAlt = false;
int dpValue = 0;
for (int i = 0; i < numFormats; i++) {
if (formats[i].equals("DP")) {
dpValue = Integer.parseInt(values[i]);
totalDP += (dpValue * length);
depthBins[dpValue] += length;
} else if (formats[i].equals("AD")) {
String[] alleleDepths = values[i].split(",");
if (alleleDepths.length > 1 && !alleleDepths[0].equals("0") && !alleleDepths[1].equals("0")) {
isHomo = false;
}
} else if (formats[i].equals("GT")) {
if (!values[i].equals("0")) {
isAlt = true;
}
}
}
if (dpValue > 0) {
if (isHomo) {
if (isAlt) {
numHomozygousAlt += length;
if (dpValue == 1) {
numHomozygousAlt1 += length;
} else if (dpValue == 2) {
numHomozygousAlt2 += length;
} else {
numHomozygousAlt3 += length;
}
} else {
numHomozygous += length;
}
} else {
numHeterozygous += length;
}
}
line = reader.readLine();
}
startPosRow[f + 1] = String.valueOf(startPos);
endPosRow[f + 1] = String.valueOf(endPos);
double aveDP = (double) totalDP / (double) totalBPs;
aveDepthRow[f + 1] = String.valueOf(aveDP);
int totalBPNonZeroCoverage = 0;
for (int i = 0; i < numBins; i++) {
depthBinRows[i][f + 1] = String.valueOf(depthBins[i]);
if (i != 0) {
totalBPNonZeroCoverage += depthBins[i];
}
}
bpNonZeroCoverage[f + 1] = String.valueOf(totalBPNonZeroCoverage);
numNonReferenceRow[f + 1] = String.valueOf(numNotReference);
numHomozygousRow[f + 1] = String.valueOf(numHomozygous);
numHomozygousAltRow[f + 1] = String.valueOf(numHomozygousAlt);
numHomozygousAlt1Row[f + 1] = String.valueOf(numHomozygousAlt1);
numHomozygousAlt2Row[f + 1] = String.valueOf(numHomozygousAlt2);
numHomozygousAlt3Row[f + 1] = String.valueOf(numHomozygousAlt3);
numHeterozygousRow[f + 1] = String.valueOf(numHeterozygous);
} catch (Exception e) {
e.printStackTrace();
}
}
}
builder.add(startPosRow);
builder.add(endPosRow);
builder.add(aveDepthRow);
builder.add(bpNonZeroCoverage);
builder.add(numNonReferenceRow);
builder.add(numHomozygousRow);
builder.add(numHomozygousAltRow);
builder.add(numHomozygousAlt1Row);
builder.add(numHomozygousAlt2Row);
builder.add(numHomozygousAlt3Row);
builder.add(numHeterozygousRow);
for (int i = 0; i < numBins; i++) {
builder.add(depthBinRows[i]);
}
TableReportUtils.saveDelimitedTableReport(builder.build(), new File(Utils.getFilename(dir) + "_GVCFStats.txt"));
}
}