All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.GVCFStats Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome;

import net.maizegenetics.util.TableReportBuilder;
import net.maizegenetics.util.TableReportUtils;
import net.maizegenetics.util.Utils;

import java.io.BufferedReader;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
 * @author Terry Casstevens Created April 12, 2017
 */
public class GVCFStats {

    public static void main(String[] args) {

        String dir = args[0];

        int minPos = 0;
        int maxPos = Integer.MAX_VALUE;

        List filenames = new ArrayList<>();
        for (File file : new File(dir).listFiles()) {

            String filename = file.getAbsolutePath();

            if (filename.endsWith(".g.vcf")) {
                filenames.add(filename);
            }

        }

        int numColumns = filenames.size() + 1;
        String[] columnNames = new String[numColumns];
        columnNames[0] = "Stat";
        for (int i = 0; i < filenames.size(); i++) {
            columnNames[i + 1] = Utils.getFilename(filenames.get(i));
        }

        TableReportBuilder builder = TableReportBuilder.getInstance("GVCF Stats", columnNames);

        String[] startPosRow = new String[numColumns];
        startPosRow[0] = "Start Pos";

        String[] endPosRow = new String[numColumns];
        endPosRow[0] = "End Pos";

        String[] aveDepthRow = new String[numColumns];
        aveDepthRow[0] = "Ave Depth";

        String[] bpNonZeroCoverage = new String[numColumns];
        bpNonZeroCoverage[0] = "bp non zero coverage";

        String[] numNonReferenceRow = new String[numColumns];
        numNonReferenceRow[0] = "num non reference";

        String[] numHomozygousRow = new String[numColumns];
        numHomozygousRow[0] = "Num Homozygous Ref";

        String[] numHomozygousAltRow = new String[numColumns];
        numHomozygousAltRow[0] = "Num Homozygous Alt";

        String[] numHomozygousAlt1Row = new String[numColumns];
        numHomozygousAlt1Row[0] = "Num Homozygous Alt Depth 1";

        String[] numHomozygousAlt2Row = new String[numColumns];
        numHomozygousAlt2Row[0] = "Num Homozygous Alt Depth 2";

        String[] numHomozygousAlt3Row = new String[numColumns];
        numHomozygousAlt3Row[0] = "Num Homozygous Alt Depth 3+";

        String[] numHeterozygousRow = new String[numColumns];
        numHeterozygousRow[0] = "Num Heterozygous";

        int numBins = 175;
        String[][] depthBinRows = new String[numBins][numColumns];
        for (int i = 0; i < numBins; i++) {
            depthBinRows[i][0] = String.valueOf(i);
        }

        for (int f = 0; f < filenames.size(); f++) {

            String filename = filenames.get(f);

            if (filename.endsWith(".g.vcf")) {

                System.out.println(Utils.getFilename(filename) + "...");

                try (BufferedReader reader = Utils.getBufferedReader(filename)) {

                    String line = reader.readLine();
                    while (line != null && line.startsWith("#")) {
                        line = reader.readLine();
                    }

                    int startPos = Integer.MAX_VALUE;
                    int endPos = Integer.MIN_VALUE;
                    int totalBPs = 0;
                    int totalDP = 0;

                    int[] depthBins = new int[numBins];
                    int numHomozygous = 0;
                    int numHomozygousAlt = 0;
                    int numHomozygousAlt1 = 0;
                    int numHomozygousAlt2 = 0;
                    int numHomozygousAlt3 = 0;
                    int numHeterozygous = 0;
                    int numNotReference = 0;

                    while (line != null) {

                        // 0        1       2       3       4       5       6       7       8       9
                        // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  W22
                        // 8       17639   .       C              .       .       END=17789       GT:DP:GQ:MIN_DP:PL      0:12:99:8:0,140
                        // 8       17790   .       T       A,     175.00  .       DP=5;MLEAC=1,0;MLEAF=1.000,0.000;RAW_MQ=18000.00        GT:AD:DP:GQ:PL:SB       1:0,5,0:5:99:205,0,205:0,0,2,3
                        String[] tokens = line.split("\t");

                        String chr = tokens[0];
                        int start = Integer.parseInt(tokens[1]);
                        if (start < startPos) {
                            startPos = start;
                        }
                        int end = start;

                        String id = tokens[2];
                        String ref = tokens[3];
                        String alt = tokens[4];
                        String qual = tokens[5];
                        String filter = tokens[6];

                        String[] info = tokens[7].split(";");
                        for (String current : info) {
                            String[] keyValue = current.split("=");
                            if (keyValue[0].equals("END")) {
                                end = Integer.parseInt(keyValue[1]);
                            }
                        }

                        if (end > endPos) {
                            endPos = end;
                        }
                        int length = end - start + 1;
                        totalBPs += length;

                        if (!alt.equals("")) {
                            numNotReference += length;
                        }

                        String[] formats = tokens[8].split(":");
                        String[] values = tokens[9].split(":");

                        int numFormats = formats.length;
                        if (numFormats != values.length) {
                            throw new IllegalArgumentException("unequal formats");
                        }

                        boolean isHomo = true;
                        boolean isAlt = false;
                        int dpValue = 0;
                        for (int i = 0; i < numFormats; i++) {
                            if (formats[i].equals("DP")) {
                                dpValue = Integer.parseInt(values[i]);
                                totalDP += (dpValue * length);
                                depthBins[dpValue] += length;
                            } else if (formats[i].equals("AD")) {
                                String[] alleleDepths = values[i].split(",");
                                if (alleleDepths.length > 1 && !alleleDepths[0].equals("0") && !alleleDepths[1].equals("0")) {
                                    isHomo = false;
                                }
                            } else if (formats[i].equals("GT")) {
                                if (!values[i].equals("0")) {
                                    isAlt = true;
                                }
                            }
                        }

                        if (dpValue > 0) {
                            if (isHomo) {
                                if (isAlt) {
                                    numHomozygousAlt += length;
                                    if (dpValue == 1) {
                                        numHomozygousAlt1 += length;
                                    } else if (dpValue == 2) {
                                        numHomozygousAlt2 += length;
                                    } else {
                                        numHomozygousAlt3 += length;
                                    }
                                } else {
                                    numHomozygous += length;
                                }
                            } else {
                                numHeterozygous += length;
                            }
                        }

                        line = reader.readLine();

                    }

                    startPosRow[f + 1] = String.valueOf(startPos);
                    endPosRow[f + 1] = String.valueOf(endPos);

                    double aveDP = (double) totalDP / (double) totalBPs;
                    aveDepthRow[f + 1] = String.valueOf(aveDP);

                    int totalBPNonZeroCoverage = 0;
                    for (int i = 0; i < numBins; i++) {
                        depthBinRows[i][f + 1] = String.valueOf(depthBins[i]);
                        if (i != 0) {
                            totalBPNonZeroCoverage += depthBins[i];
                        }
                    }
                    bpNonZeroCoverage[f + 1] = String.valueOf(totalBPNonZeroCoverage);

                    numNonReferenceRow[f + 1] = String.valueOf(numNotReference);
                    numHomozygousRow[f + 1] = String.valueOf(numHomozygous);
                    numHomozygousAltRow[f + 1] = String.valueOf(numHomozygousAlt);
                    numHomozygousAlt1Row[f + 1] = String.valueOf(numHomozygousAlt1);
                    numHomozygousAlt2Row[f + 1] = String.valueOf(numHomozygousAlt2);
                    numHomozygousAlt3Row[f + 1] = String.valueOf(numHomozygousAlt3);
                    numHeterozygousRow[f + 1] = String.valueOf(numHeterozygous);

                } catch (Exception e) {
                    e.printStackTrace();
                }

            }

        }

        builder.add(startPosRow);
        builder.add(endPosRow);
        builder.add(aveDepthRow);
        builder.add(bpNonZeroCoverage);
        builder.add(numNonReferenceRow);
        builder.add(numHomozygousRow);
        builder.add(numHomozygousAltRow);
        builder.add(numHomozygousAlt1Row);
        builder.add(numHomozygousAlt2Row);
        builder.add(numHomozygousAlt3Row);
        builder.add(numHeterozygousRow);
        for (int i = 0; i < numBins; i++) {
            builder.add(depthBinRows[i]);
        }

        TableReportUtils.saveDelimitedTableReport(builder.build(), new File(Utils.getFilename(dir) + "_GVCFStats.txt"));

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy