net.maizegenetics.dna.snp.io.BuilderFromVCF Maven / Gradle / Ivy

Go to download
package net.maizegenetics.dna.snp.io;

import com.google.common.base.Splitter;
import com.google.common.collect.SetMultimap;
import net.maizegenetics.dna.WHICH_ALLELE;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GeneralPosition;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.map.PositionListBuilder;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.dna.snp.genotypecall.GenotypeCallTable;
import net.maizegenetics.dna.snp.genotypecall.GenotypeCallTableBuilder;
import net.maizegenetics.dna.snp.score.AlleleDepthBuilder;
import net.maizegenetics.dna.snp.score.AlleleDepthUtil;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.taxa.TaxaListIOUtils;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.ProgressListener;
import net.maizegenetics.util.Utils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;

/**
 * Create an alignment based on VCF format file (either .txt or compressed).  Alleles are set as global reference.
 * e.g. code 
 * {@code
 * Alignment a=BuilderFromVCF.getBuilder(infileName).build();
 * }
 * 
 * TODO:  Add filtering while reading, provide an option to define the alleles as reference and alternate
 *
 * @author Ed Buckler
 */
public class BuilderFromVCF {

    private static final Logger myLogger = LogManager.getLogger(BuilderFromVCF.class);
    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("[\\s]+");
    private static final Pattern TAB_PATTERN = Pattern.compile("[\\t]+");
    private HeaderPositions hp = null;
    private final String infile;
    private boolean includeDepth = false;
    private final ProgressListener myProgressListener;

    private BuilderFromVCF(String infile, ProgressListener listener) {
        this.infile = infile;
        this.myProgressListener = listener;
    }


    /**
     * Create a builder for loading a VCF file into memory
     *
     * @param infile name of the VCF file to be load
     *
     * @return a builder
     */
    public static BuilderFromVCF getBuilder(String infile) {
        return new BuilderFromVCF(infile, null);
    }

    /**
     * Create a builder for loading a VCF file into memory
     *
     * @param infile name of the VCF file to be load
     * @param listener Progress listener for GUI
     *
     * @return a builder
     */
    public static BuilderFromVCF getBuilder(String infile, ProgressListener listener) {
        return new BuilderFromVCF(infile, listener);
    }

    public BuilderFromVCF keepDepth() {
        includeDepth = true;
        return this;
    }

    public GenotypeTable buildAndSortInMemory() {
        return buildEngine(true);
    }

    public GenotypeTable build() {
        return buildEngine(false);
    }

    //TODO provide options on caching to use, read only some sites, etc.
    private GenotypeTable buildEngine(boolean fullSort) {
        long time = System.nanoTime();
        GenotypeTable result = null;
        int totalSites = -1;//unknown
        GenotypeTableBuilder gtbDiskBuild = null;
        ExecutorService pool = null;
        try {

            int numThreads = Runtime.getRuntime().availableProcessors();
            pool = Executors.newFixedThreadPool(numThreads);

            BufferedReader r = Utils.getBufferedReader(infile, -1);
            //Read the ## annotation rows
            String currLine;
            Map infoMap = new HashMap<>();
            Map formatMap = new HashMap<>();
            Map> sampAnnoBuild = new TreeMap<>();
            currLine = parseVCFHeadersIntoMaps(infoMap, formatMap, sampAnnoBuild, r);

            TaxaList taxaList = processTaxa(currLine, sampAnnoBuild);
            int linesAtTime = 1 << 12;
            //  int linesAtTime=1<<8;  //better for with lots of taxa.
            ArrayList txtLines = new ArrayList<>(linesAtTime);
            ArrayList pbs = new ArrayList<>();
            List> futures = new ArrayList<>();
            int sitesRead = 0;
            while ((currLine = r.readLine()) != null) {
                if (currLine.startsWith("#")) continue;
                txtLines.add(currLine);
                sitesRead++;
                if (sitesRead % linesAtTime == 0) {
                    ProcessVCFBlock pb;
                    pb = ProcessVCFBlock.getInstance(taxaList.numberOfTaxa(), hp, txtLines, includeDepth);
                    //pbs.add(pb);
                    //     pb.run(); //used for testing
                    try {
                        futures.add(pool.submit(pb));
                    } catch (Exception e) {
                        myLogger.debug(e.getMessage(), e);
                        throw new IllegalStateException(e.getMessage());
                    }
                    txtLines = new ArrayList<>(linesAtTime);
                }
            }
            r.close();
            //Handle whatever is left over in the file
            if (txtLines.size() > 0) {
                ProcessVCFBlock pb;//=ProcessVCFBlock.getInstance(taxaList.numberOfTaxa(), hp, txtLines);
                pb = ProcessVCFBlock.getInstance(taxaList.numberOfTaxa(), hp, txtLines, includeDepth);
                //pbs.add(pb);
                //  pb.run(); //used for testing

                try {
                    futures.add(pool.submit(pb));
                } catch (Exception e) {
                    myLogger.debug(e.getMessage(), e);
                    throw new IllegalStateException(e.getMessage());
                }
            }
            int numFutures = futures.size();
            int count = 0;
            for (Future future : futures) {
                try {
                    pbs.add(future.get());
                } catch (Exception e) {
                    myLogger.debug(e.getMessage(), e);
                    throw new IllegalStateException(e.getMessage());
                }
                if (myProgressListener != null) {
                    count++;
                    myProgressListener.progress(count * 100 / numFutures, null);
                }
            }
            pool.shutdown();

            //result=completeInMemoryBuilding(futures, taxaList, sitesRead, includeDepth, fullSort);
            result = completeInMemoryBuilding(pbs, taxaList, sitesRead, includeDepth, fullSort);

//            int currentSite=0;
//            PositionListBuilder posBuild=new PositionListBuilder();
//            GenotypeCallTableBuilder gb=GenotypeCallTableBuilder.getUnphasedNucleotideGenotypeBuilder(taxaList.numberOfTaxa(), lines);
//            AlleleDepthBuilder db=null;
//            if(includeDepth) db=AlleleDepthBuilder.getInstance(taxaList.numberOfTaxa(),lines,6);
//            for (ProcessVCFBlock pb : pbs) {
//                posBuild.addAll(pb.getBlkPosList());
//                byte[][] bgTS=pb.getGenoTS();
//                for (int t=0; t pbs, TaxaList taxaList, int numberOfSites, boolean includeDepth, boolean fullSort) {
        int currentSite = 0;
        PositionListBuilder posBuild = new PositionListBuilder();
        GenotypeCallTableBuilder gb = GenotypeCallTableBuilder.getUnphasedNucleotideGenotypeBuilder(taxaList.numberOfTaxa(), numberOfSites);
        AlleleDepthBuilder db = null;

        if (includeDepth) db = AlleleDepthBuilder.getInstance(taxaList.numberOfTaxa(), numberOfSites, taxaList);


        for (ProcessVCFBlock pb : pbs) {
            posBuild.addAll(pb.getBlkPosList());
            byte[][] bgTS = pb.getGenoTS();
            for (int t = 0; t < bgTS.length; t++) {
                gb.setBaseRangeForTaxon(t, currentSite, bgTS[t]);
            }
            if (includeDepth) {
                byte[][][] bdTS = pb.getDepthTS();
                for (int t = 0; t < bgTS.length; t++) {
                    db.setDepthRangeForTaxon(t, currentSite, bdTS[t]);
                }

            }
            currentSite += pb.getSiteNumber();
        }


        //Check that result is in correct order. If not, either try to sort or just throw an error (determined by what was passed to fullSort)
        if (posBuild.validateOrdering() == false) {
            if (fullSort) {
                int[] siteRedirect = posBuild.sort();
                gb.reorderPositions(siteRedirect);
                if (includeDepth) {
                    db.reorderPositions(siteRedirect);
                }
                if (posBuild.validateOrdering() == false) {   //Double-check post-sort ordering. Should never happen, but just to be safe
                    throw new IllegalStateException("BuilderFromVCF: Ordering of VCF file held in memory failed.");
                }
            } else {
                throw new IllegalStateException("BuilderFromVCF: Ordering incorrect. VCF file must be ordered by position. Please first use SortGenotypeFilePlugin to correctly order the file.");
            }
        }
        GenotypeCallTable g = gb.build();
        if (includeDepth) {
            return GenotypeTableBuilder.getInstance(g, posBuild.build(), taxaList, db.build());
        } else {
            return GenotypeTableBuilder.getInstance(g, posBuild.build(), taxaList);
        }

    }

    private static String parseVCFHeadersIntoMaps(Map infoMap, Map formatMap,
                                                  Map> sampAnnoBuild, BufferedReader r) throws IOException {
        String currLine;
        while (((currLine = r.readLine()) != null) && (currLine.startsWith("##"))) {
            String[] cat = currLine.split("=", 2);
            if (cat.length < 2) continue;
            switch (cat[0]) {
//                    case "##INFO":
//                        infoMap.put(mapOfAnno.get("ID"), mapOfAnno.get("Description"));
//                        break;
//                    case "##FILTER":break;
//                    case "##FORMAT":
//                        formatMap.put(mapOfAnno.get("ID"),mapOfAnno.get("Description"));
//                        break;
                case "##SAMPLE":
                    SetMultimap mapOfAnno = TaxaListIOUtils.parseVCFHeadersIntoMap(cat[1]);
                    String taxaID = mapOfAnno.get("ID").iterator().next();
                    if (taxaID == null) break;
                    sampAnnoBuild.put(taxaID, mapOfAnno);
                    break;
                case "##PEDIGREE":
                    break;
                default:
                    break;
            }

//                System.out.println(currLine);
        }
        return currLine;
    }

    private static String getReplaceCommaWithinQuote(String s) {
        StringBuilder sb = new StringBuilder(s);
        boolean inQuote = false;
        for (int i = 0; i < sb.length(); i++) {
            if (sb.charAt(i) == '\"') inQuote = (!inQuote);
            if (inQuote && sb.charAt(i) == ',') sb.setCharAt(i, (char) ((int) ',' + 256));//(char)167);
            if (inQuote && sb.charAt(i) == '=') sb.setCharAt(i, (char) ((int) '=' + 256));//(char)167);
        }
        return sb.toString();
    }

    private TaxaList processTaxa(String readLn, Map> taxaAnnotation) {
        String[] header = TAB_PATTERN.split(readLn);
        hp = new HeaderPositions(header);
        int numTaxa = header.length - hp.NUM_HAPMAP_NON_TAXA_HEADERS;
        TaxaListBuilder tlb = new TaxaListBuilder();
        for (int i = 0; i < numTaxa; i++) {
            String taxonID = header[i + hp.NUM_HAPMAP_NON_TAXA_HEADERS];
            Taxon.Builder at = new Taxon.Builder(taxonID);
            SetMultimap taMap = taxaAnnotation.get(taxonID);
            if (taMap != null) {
                for (Map.Entry en : taMap.entries()) {
                    if (en.getKey().equals("ID")) continue; //skip the IDs as these became the name
                    String s = en.getValue().replace("\"", "");
                    at.addAnno(en.getKey(), s);
                }
            }
            tlb.add(at.build());
        }
        return tlb.build();
    }


}

class HeaderPositions {
    final int NUM_HAPMAP_NON_TAXA_HEADERS;
    final int GENOIDX;
    final int SNPID_INDEX;
    //  final int VARIANT_INDEX;
    final int FILTER_INDEX;
    final int QUAL_INDEX;
    final int CHROMOSOME_INDEX;
    final int POSITION_INDEX;
    final int REF_INDEX;
    final int ALT_INDEX;
    final int INFO_INDEX;
    final int FORMAT_INDEX;

    public HeaderPositions(String[] header) {
        int chrIdx = firstEqualIndex(header, "#CHROM");
        if (chrIdx < 0) chrIdx = firstEqualIndex(header, "#CHR");
        CHROMOSOME_INDEX = chrIdx;
        POSITION_INDEX = firstEqualIndex(header, "POS");
        SNPID_INDEX = firstEqualIndex(header, "ID");
        REF_INDEX = firstEqualIndex(header, "REF");
        ALT_INDEX = firstEqualIndex(header, "ALT");
        QUAL_INDEX = firstEqualIndex(header, "QUAL");
        FILTER_INDEX = firstEqualIndex(header, "FILTER");
        INFO_INDEX = firstEqualIndex(header, "INFO");
        FORMAT_INDEX = firstEqualIndex(header, "FORMAT");

        NUM_HAPMAP_NON_TAXA_HEADERS = Math.max(INFO_INDEX, FORMAT_INDEX) + 1;
        GENOIDX = NUM_HAPMAP_NON_TAXA_HEADERS;
    }

    private static int firstEqualIndex(String[] sa, String match) {
        for (int i = 0; i < sa.length; i++) {
            if (sa[i].equals(match)) return i;
        }
        return -1;
    }

}

//class ProcessVCFBlock implements Runnable {
class ProcessVCFBlock implements Callable {

    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s");
    private static final Pattern SLASH_PATTERN = Pattern.compile("/");
    private final HeaderPositions hp;
    private final int taxaN;
    private final int siteN;
    private final int startSite; //if unknown Int.Mini
    private ArrayList txtL;
    private byte[][] gTS;  //genotypes
    private byte[][][] dTS; //depth
    private final ArrayList blkPosList;
    private final boolean keepDepth;


    private ProcessVCFBlock(int taxaN, HeaderPositions hp, ArrayList txtL, int startSite,
                            boolean keepDepth) {
        this.taxaN = taxaN;
        this.siteN = txtL.size();
        this.txtL = txtL;
        this.hp = hp;
        blkPosList = new ArrayList<>(siteN);
        this.startSite = startSite;
        this.keepDepth = keepDepth;
    }

    /*Used to process VCF blocks and return the result for a in memory GenotypeTable*/
    static ProcessVCFBlock getInstance(int taxaN, HeaderPositions hp, ArrayList txtL, boolean keepDepth) {
        return new ProcessVCFBlock(taxaN, hp, txtL, Integer.MIN_VALUE, keepDepth);
    }

    @Override
    public ProcessVCFBlock call() throws Exception {
        Map chromosomeLookup = new HashMap<>();
        gTS = new byte[taxaN][siteN];
        if (keepDepth == true) dTS = new byte[taxaN][6][siteN];
        for (int s = 0; s < siteN; s++) {
            //really needs to use a Splitter iterator to make this cleaner if it is performant
            String input = txtL.get(s);
            try {
                int[] tabPos = new int[hp.NUM_HAPMAP_NON_TAXA_HEADERS + taxaN];
                int tabIndex = 0;
                int len = input.length();
                for (int i = 0; (tabIndex < hp.NUM_HAPMAP_NON_TAXA_HEADERS + taxaN) && (i < len); i++) {
                    if (input.charAt(i) == '\t') {
                        tabPos[tabIndex++] = i;
                    }
                }
                String chrName = input.substring(0, tabPos[hp.CHROMOSOME_INDEX]);
                Chromosome currChr = chromosomeLookup.get(chrName);
                if (currChr == null) {
                    currChr = new Chromosome(new String(chrName));
                    chromosomeLookup.put(chrName, currChr);
                }
                String snpID = null;
                if (hp.SNPID_INDEX > 0) snpID = input.substring(tabPos[hp.SNPID_INDEX - 1] + 1, tabPos[hp.SNPID_INDEX]);
                String refS = input.substring(tabPos[hp.REF_INDEX - 1] + 1, tabPos[hp.REF_INDEX]);
                String alt = input.substring(tabPos[hp.ALT_INDEX - 1] + 1, tabPos[hp.ALT_INDEX]);
                String variants;
                if (alt.equals(".")) {
                    variants = refS;
                } else {
                    variants = (refS + "/" + alt).replace(',', '/')
                            .replace("", "+").replace('I', '+')
                            .replace("", "-").replace('D', '-')
                            .replace("*", "N");
                }

                //GeneralPosition.Builder apb=new GeneralPosition.Builder(currChr, currentPosition)
                //                                               .knownVariants(variants); //TODO strand, variants,
                //ZRM 8_26
                GeneralPosition.Builder apb = new GeneralPosition.Builder(currChr, Integer.parseInt(input.substring(tabPos[hp.POSITION_INDEX - 1] + 1, tabPos[hp.POSITION_INDEX])))
                        .knownVariants(variants) //TODO strand, variants,
                        ;
                if (snpID != null && !snpID.equals(".")) {
                    apb.snpName(snpID);
                }
                //byte[] alleles=new byte[(variants.length()+1)/2];
                byte[] alleles = new byte[variants.split("/").length];
                for (int i = 0, varInd = 0; i < alleles.length; i++, varInd += 2) {
                    alleles[i] = NucleotideAlignmentConstants.getNucleotideAlleleByte(variants.charAt(varInd));
                }
                /***ZRM 8_27 New code ***/
                String[] variantList = variants.split("/");
                if (variantList[0].length() > 1) {
                    String[] parsedVariantList = new String[variantList.length];
                    //alt deletion
                    for (int i = 0; i < variantList.length; i++) {
                        //Pull off the first character if it exists
                        if (variantList[i].length() > 1) {
                            parsedVariantList[i] = variantList[i].substring(1);
                            if (parsedVariantList[i].length() == 0) {
                                parsedVariantList[i] = "-";
                            }
                        } else {
                            //Mark as deletion
                            parsedVariantList[i] = "-";
                        }
                    }
                    for (int i = 0; i < parsedVariantList.length; i++) {
                        alleles[i] = NucleotideAlignmentConstants.getNucleotideAlleleByte(parsedVariantList[i].charAt(0));
                    }
                } else {
                    //Check for reference deletion(insertion)
                    //Loop through all variants to see if one alt is longer than the ref
                    boolean isIndel = false;
                    for (int i = 1; i < variantList.length; i++) {
                        if (variantList[i].length() > variantList[0].length()) {
                            isIndel = true;
                            break;
                        }
                    }
                    if (isIndel) {
                        String[] parsedVariantList = new String[variantList.length];
                        //ref+alt deletion
                        for (int i = 0; i < variantList.length; i++) {
                            //Pull off the first character if it exists
                            if (variantList[i].length() > 1) {
                                parsedVariantList[i] = variantList[i].substring(1);
                                if (parsedVariantList[i].length() == 0) {
                                    parsedVariantList[i] = "-";
                                }
                            } else {
                                //Mark as deletion
                                parsedVariantList[i] = "-";
                            }
                        }
                        for (int i = 0; i < parsedVariantList.length; i++) {
                            alleles[i] = NucleotideAlignmentConstants.getNucleotideAlleleByte(parsedVariantList[i].charAt(0));
                        }
                    } else {
                        //if not just put it in the allele array
                        for (int i = 0; i < variantList.length; i++) {
                            alleles[i] = NucleotideAlignmentConstants.getNucleotideAlleleByte(variantList[i].charAt(0));
                        }
                    }
                }
                /***ZRM 8_27 New code end ***/
                apb.allele(WHICH_ALLELE.Reference, alleles[0]);
                if (alleles.length > 1) {
                    apb.allele(WHICH_ALLELE.Alternate, alleles[1]);
                }
                for (String annoS : Splitter.on(";").split(input.substring(tabPos[hp.INFO_INDEX - 1] + 1, tabPos[hp.INFO_INDEX]))) {
                    apb.addAnno(annoS);
                }
                blkPosList.add(apb.build());
                final int iGT = 0; //genotype index
                int iAD = -1, iDP = -1, iGQ = -1, iPL = -1;  //alleleDepth, overall depth, genotypeQuality, phredGenotypeLikelihoods
                if (hp.FORMAT_INDEX >= 0) {
                    //Check to see if FORMAT tag is missing. Only applicable for single taxa files
                    if (tabPos[hp.FORMAT_INDEX] == 0) {
                        throw new IllegalStateException("Error Processing VCF: Missing FORMAT tag.");
                    }
                    String unsplitInput = input.substring(tabPos[hp.FORMAT_INDEX - 1] + 1, tabPos[hp.FORMAT_INDEX]);
                    if (unsplitInput.length() == 0 || !unsplitInput.startsWith("GT")) {
                        //Check to see it has the GT field
                        if (unsplitInput.contains("GT")) {
                            throw new IllegalStateException("Error Processing VCF Block: GT field is not in first position of FORMAT.");
                        }
                        //If GT isnt in, we assume that it is missing FORMAT
                        else {
                            throw new IllegalStateException("Error Processing VCF Block: Missing FORMAT tag.");
                        }
                    }
                    String[] formatS = unsplitInput.split(":");

                    iAD = firstEqualIndex(formatS, "AD");
                }
                int t = 0;
                for (String taxaAllG : Splitter.on("\t").split(input.substring(tabPos[hp.NUM_HAPMAP_NON_TAXA_HEADERS - 1] + 1))) {
                    int f = 0;
                    //if(taxaAllG.equals(".")) {
                    if (taxaAllG.startsWith(".")) {
                        gTS[t][s] = GenotypeTable.UNKNOWN_GENOTYPE;
                        //need to still move up the taxa counter by one as we have covered this now.
                        t++;
                        continue;
                    }
                    for (String fieldS : Splitter.on(":").split(taxaAllG)) {
                        if (f == iGT) {
                            //String "[.0-9]\\/[.0-9]||[.0-9]\\|[.0-9]" will match a valid diploid
                            if (!fieldS.equals(".")) { //[TAS-509] Check to make sure we are using diploids in the form 0/1 or 0|0
                                int a1 = 0;
                                int a2 = 0;
                                //Should be this, but for speed we just check the character
                                //if(fieldS.split("|/").length ==1) {
                                if (fieldS.length() == 1) {
                                    a1 = fieldS.charAt(0) - '0';
                                    a2 = fieldS.charAt(0) - '0';
                                } else {
                                    //zrm22 Jul 31, 2017
                                    //int a1 = fieldS.charAt(0) - '0';
                                    //int a2 = fieldS.charAt(2) - '0';

                                    a1 = fieldS.charAt(0) - '0';
                                    a2 = fieldS.charAt(2) - '0';
                                }
                                if (a1 > alleles.length - 1 || a2 > alleles.length - 1) {
                                    Position pos = blkPosList.get(blkPosList.size() - 1);
                                    throw new IllegalStateException("\nError Processing VCF block: Mismatch of alleles.\n  At Chromosome " + pos.getChromosome().getName() + ", Position " + pos.getPosition() + ".\nAllele ID larger than number of alleles");
                                }
                                if (a1 < 0 || a2 < 0) {
                                    gTS[t][s] = GenotypeTable.UNKNOWN_GENOTYPE;
                                } else {
                                    gTS[t][s] =
                                            GenotypeTableUtils.getDiploidValue(alleles[a1], alleles[a2]);
                                }
                            } else {    //[TAS-509] if it isnt a diploid error out early
                                throw new IllegalStateException("Error Processing VCF block: Found haploid information for the element: "
                                        + taxaAllG + ".\nExpected a diploid entry.");
                            }
                        } else if ((f == iAD) && keepDepth) {
                            // System.out.println("GTS: "+gTS[t][s] + " "+fieldS);
                            if (gTS[t][s] != GenotypeTable.UNKNOWN_GENOTYPE) {
                                int i = 0;
                                for (String ad : Splitter.on(",").split(fieldS)) {
                                    if (i >= alleles.length) continue;
                                    if (alleles[i] == GenotypeTable.UNKNOWN_ALLELE || ad.equals(".") || alleles[i] == NucleotideAlignmentConstants.UNDEFINED_ALLELE ||
                                            alleles[i] == NucleotideAlignmentConstants.UNDEFINED_HOMOZYGOUS) {  //no position for depth of unknown alleles or depth is set to missing, so skip
                                        //Uncomment when converted
                                        //dTS[t][alleles[i++]][s] = AlleleDepthUtil.depthIntToByte(AlleleDepthUtil.DEPTH_MISSING);
                                        //Comment next two lines when converted
                                        i++;
                                        continue;
                                    }

                                    int adInt = Integer.parseInt(ad);
                                    dTS[t][alleles[i++]][s] = AlleleDepthUtil.depthIntToByte(adInt);
                                }
                            }
                        }
                        f++;
                    }
                    t++;
                }
            } catch (IllegalStateException e) {
                e.printStackTrace();
                throw e;
            } catch (Exception e) {
                System.err.println("Err Site Number:" + s);
                //System.err.println("Err Site Number:"+blkPosList.get(blkPosList.size()-1).toString());
                //System.err.println("Err:"+input);
                e.printStackTrace();
                throw e;
            }

        }
        txtL = null;
        return this;
    }

    int getSiteNumber() {
        return siteN;
    }

    byte[][] getGenoTS() {
        return gTS;
    }

    byte[][][] getDepthTS() {
        return dTS;
    }

    ArrayList getBlkPosList() {
        return blkPosList;
    }

    private static int firstEqualIndex(String[] sa, String match) {
        for (int i = 0; i < sa.length; i++) {
            if (sa[i].equals(match)) return i;
        }
        return -1;
    }
}