net.maizegenetics.dna.map.GVCFGenomeSequenceBuilder Maven / Gradle / Ivy
package net.maizegenetics.dna.map;
import com.google.common.base.Splitter;
import com.google.common.collect.*;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.util.*;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import net.maizegenetics.util.BitSet;
import net.maizegenetics.util.OpenBitSet;
/**
* Created by zrm22 on 3/27/17.
*/
public class GVCFGenomeSequenceBuilder extends GenomeSequenceBuilder {
private static final Pattern TAB_PATTERN = Pattern.compile("[\\t]+");
/**
* Builds GenomeSequence from a fasta file and a GVCF file.
*
* @param fastaFileName full path to fasta file
* @return GenomeSequence object
*/
public static GenomeSequence instance(String fastaFileName, String gvcfFileName) throws Exception{
Function charConversion = (c) -> c;
return instance(fastaFileName, charConversion, gvcfFileName);
}
/**
* Builds GenomeSequence from a fasta file. The char conversion provide a mechanism to convert upper and lower case
* or convert one case to N. This is useful if a case if used to define a certain class of bases
*
* @param fastaFileName full path to fasta file
* @param charConversion lambda Function to convert characters
* @return GenomeSequence object
*/
public static GenomeSequence instance(String fastaFileName, Function charConversion, String gvcfFileName) throws Exception{
long chrPosMapStartTime = System.currentTimeMillis();
Map chromPositionMap = readReferenceGenomeChr(fastaFileName, charConversion);
System.out.println("Done Reading in Reference: Total Time Taken: "+(System.currentTimeMillis()-chrPosMapStartTime)+"ms");
//need to create a Map>>
// Map> gvcfAnnotationsAndCalls = readGVCFFile(gvcfFileName);
System.out.println("Generating Position List");
long startTime = System.currentTimeMillis();
PositionList gvcfPositionsAndAnnotations = readGVCFFilePositionList(gvcfFileName);
System.out.println("Done Creating Pos list. Total Time Taken: "+(System.currentTimeMillis() - startTime)+"ms");
return new HalfByteGenomeSequenceGVCF(chromPositionMap,gvcfPositionsAndAnnotations);
}
public static GenomeSequence instance(GVCFGenomeSequence base, BitSet maskedBitSet, BitSet filteredBitSet) throws Exception{
return new HalfByteGenomeSequenceGVCF(base.getChrPosMap(), base.getGVCFPositions(), maskedBitSet, filteredBitSet);
// Map chromPositionMap = readReferenceGenomeChr(fastaFileName, charConversion);
// //need to create a Map>>
// Map> gvcfAnnotationsAndCalls = readGVCFFile(gvcfFileName);
// PositionList gvcfPositionsAndAnnotations = readGVCFFilePositionList(gvcfFileName);
// return new HalfByteGenomeSequenceGVCF(chromPositionMap,gvcfPositionsAndAnnotations);
}
private static Map> readGVCFFile(String gvcfFileName) {
HashMap> chromosomeRangeMapHashMap = new HashMap<>();
//TODO multithread using similar code to BuilderFromVCF
return chromosomeRangeMapHashMap;
}
private static PositionList readGVCFFilePositionList(String gvcfFileName) throws Exception{
ArrayList positionArrayList = new ArrayList<>();
// BufferedReader gvcfFileReader = new BufferedReader(new FileReader(gvcfFileName));
BufferedReader gvcfFileReader = Utils.getBufferedReader(gvcfFileName,-1);
//Loop through the headers
String currentLine = "";
while(!(currentLine = gvcfFileReader.readLine()).startsWith("#CHROM")) {
}
//parse the header
String[] header = TAB_PATTERN.split(currentLine);
HeaderPositions hp= new HeaderPositions(header);
GVCFPositionRecord gvcfPositionRecord = new GVCFPositionRecord(hp);
while((currentLine = gvcfFileReader.readLine())!=null) {
positionArrayList.add(gvcfPositionRecord.parseGVCFRecords(currentLine));
}
PositionList instance = new PositionArrayList(positionArrayList, "AGPv4");
return instance;
}
}
class HeaderPositions {
final int NUM_HAPMAP_NON_TAXA_HEADERS;
final int GENOIDX;
final int SNPID_INDEX;
// final int VARIANT_INDEX;
final int FILTER_INDEX;
final int QUAL_INDEX;
final int CHROMOSOME_INDEX;
final int POSITION_INDEX;
final int REF_INDEX;
final int ALT_INDEX;
final int INFO_INDEX;
final int FORMAT_INDEX;
public HeaderPositions(String[] header){
int chrIdx=firstEqualIndex(header,"#CHROM");
if(chrIdx<0) chrIdx=firstEqualIndex(header,"#CHR");
CHROMOSOME_INDEX=chrIdx;
POSITION_INDEX=firstEqualIndex(header,"POS");
SNPID_INDEX=firstEqualIndex(header,"ID");
REF_INDEX=firstEqualIndex(header,"REF");
ALT_INDEX=firstEqualIndex(header,"ALT");
QUAL_INDEX=firstEqualIndex(header,"QUAL");
FILTER_INDEX=firstEqualIndex(header,"FILTER");
INFO_INDEX=firstEqualIndex(header,"INFO");
FORMAT_INDEX=firstEqualIndex(header,"FORMAT");
NUM_HAPMAP_NON_TAXA_HEADERS=Math.max(INFO_INDEX,FORMAT_INDEX)+1;
GENOIDX=NUM_HAPMAP_NON_TAXA_HEADERS;
}
private static int firstEqualIndex(String[] sa, String match) {
for (int i=0; i knownVariants = new ArrayList<>();
//the depth
int depth = 0;
//GQ score
int gqScore = 0;
//GeneralAnnotations for any other in INFO tag
GeneralAnnotationStorage generalAnnotationStorage;
}
class GVCFPositionRecord {
GeneralPosition ap= new GeneralPosition.Builder(new Chromosome("1"),1232)
.maf(0.05f)
.build();
HeaderPositions hp;
public GVCFPositionRecord(HeaderPositions hp) {
this.hp = hp;
}
public Position parseGVCFRecords(String input) {
//Figure out the tab positioning for the header columns
int[] tabPos=new int[hp.NUM_HAPMAP_NON_TAXA_HEADERS+1];
int tabIndex=0;
int len=input.length();
for (int i=0; (tabIndex0) snpID=input.substring(tabPos[hp.SNPID_INDEX-1]+1, tabPos[hp.SNPID_INDEX]);
//create the General position
GeneralPosition.Builder apb=new GeneralPosition.Builder(currChr, Integer.parseInt(input.substring(tabPos[hp.POSITION_INDEX-1]+1, tabPos[hp.POSITION_INDEX])));
if(snpID!=null && !snpID.equals(".")) {
apb.snpName(snpID);
}
String refS=input.substring(tabPos[hp.REF_INDEX-1]+1, tabPos[hp.REF_INDEX]);
String alt=input.substring(tabPos[hp.ALT_INDEX-1]+1, tabPos[hp.ALT_INDEX]);
//create an String to hold the list of variants, we will have to parse this on export
String allAlleles = refS+"/"+alt.replace(",","/");
apb = apb.knownVariants(allAlleles);
//loop through the INFO tag and save off any of those values into positions
for(String annoS: Splitter.on(";").split(input.substring(tabPos[hp.INFO_INDEX-1]+1, tabPos[hp.INFO_INDEX]))) {
apb.addAnno(annoS);
}
final int iGT=0; //genotype index
int iAD=-1,iDP=-1,iGQ=-1, iPL=-1; //alleleDepth, overall depth, genotypeQuality, phredGenotypeLikelihoods
if(hp.FORMAT_INDEX>=0) {
//Check to see if FORMAT tag is missing. Only applicable for single taxa files
if(tabPos[hp.FORMAT_INDEX]==0) {
throw new IllegalStateException("Error Processing VCF: Missing FORMAT tag.");
}
String unsplitInput = input.substring(tabPos[hp.FORMAT_INDEX-1]+1, tabPos[hp.FORMAT_INDEX]);
if(unsplitInput.length()==0|| !unsplitInput.startsWith("GT")) {
//Check to see it has the GT field
if(unsplitInput.contains("GT")) {
throw new IllegalStateException("Error Processing VCF Block: GT field is not in first position of FORMAT.");
}
//If GT isnt in, we assume that it is missing FORMAT
else {
throw new IllegalStateException("Error Processing VCF Block: Missing FORMAT tag.");
}
}
String[] formatS = unsplitInput.split(":");
iAD=firstEqualIndex(formatS,"AD");
iDP=firstEqualIndex(formatS,"DP");
iGQ=firstEqualIndex(formatS,"GQ");
}
//after info is recorded we need to record the call section
String taxaAllG = input.substring(tabPos[hp.NUM_HAPMAP_NON_TAXA_HEADERS-1]+1);
int f = 0;
for(String fieldS: Splitter.on(":").split(taxaAllG)) {
if (f == iGT) {
apb.addAnno("GT",fieldS);
}
if(f == iAD) {
apb.addAnno("AD",fieldS);
}
if(f == iDP) {
apb.addAnno("DP",fieldS);
}
if(f == iGQ) {
apb.addAnno("GQ",fieldS);
}
f++;
}
return apb.build();
}
private static int firstEqualIndex(String[] sa, String match) {
for (int i=0; i chromPositionMap;
private Map chromLengthLookup=new HashMap<>();
private RangeMap wholeGenomeIndexMap= TreeRangeMap.create();
private PositionList gvcfAnnotationsAndCalls;
private final long genomeSize;
private BitSet maskBitSet;
private BitSet filterBitSet;
//Timing stuff
long totalQueryTime = 0;
HashMap positionIndex = new HashMap<>();
int indexScaleFactor = 10000;
protected HalfByteGenomeSequenceGVCF(MapchromPositionMap, PositionList gvcfAnnotationsAndCalls) {
this.chromPositionMap = chromPositionMap;
this.gvcfAnnotationsAndCalls = gvcfAnnotationsAndCalls;
long initialSequenceSetupStart = System.currentTimeMillis();
chromPositionMap.entrySet().stream()
.forEach(e -> chromLengthLookup.put(e.getKey(),e.getKey().getLength()));
LongAdder genomeIndex=new LongAdder();
chromosomes().stream().sorted()
.forEach(chrom -> {
int length=chromLengthLookup.get(chrom);
wholeGenomeIndexMap.put(Range.closed(genomeIndex.longValue(),
genomeIndex.longValue()+length-1),chrom);
genomeIndex.add(length);
int[] currentIndexArray = new int[length/indexScaleFactor+2];
Arrays.fill(currentIndexArray,-1);
positionIndex.put(chrom,currentIndexArray);
}
);
genomeSize=genomeIndex.longValue();
maskBitSet = new OpenBitSet(gvcfAnnotationsAndCalls.size());
filterBitSet = new OpenBitSet(gvcfAnnotationsAndCalls.size());
System.out.println("Initial Startup time for GVCF Sequence: "+(System.currentTimeMillis() - initialSequenceSetupStart)+"ms");
long indexInitStart = System.currentTimeMillis();
for(int i = 0; i < gvcfAnnotationsAndCalls.size(); i++) {
int currentBin = gvcfAnnotationsAndCalls.get(i).getPosition()/indexScaleFactor;
//new record. add it to the index
if(positionIndex.get(gvcfAnnotationsAndCalls.get(i).getChromosome())[currentBin] == -1) {
positionIndex.get(gvcfAnnotationsAndCalls.get(i).getChromosome())[currentBin] = i;
}
}
System.out.println("Done Creating index: "+(System.currentTimeMillis() - indexInitStart)+"ms");
}
protected HalfByteGenomeSequenceGVCF(MapchromPositionMap, PositionList gvcfAnnotationsAndCalls,BitSet maskBitSet, BitSet filterBitSet) {
this.chromPositionMap = chromPositionMap;
this.gvcfAnnotationsAndCalls = gvcfAnnotationsAndCalls;
chromPositionMap.entrySet().stream()
.forEach(e -> chromLengthLookup.put(e.getKey(),e.getKey().getLength()));
LongAdder genomeIndex=new LongAdder();
chromosomes().stream().sorted()
.forEach(chrom -> {
int length=chromLengthLookup.get(chrom);
wholeGenomeIndexMap.put(Range.closed(genomeIndex.longValue(),
genomeIndex.longValue()+length-1),chrom);
genomeIndex.add(length);
int[] currentIndexArray = new int[length/indexScaleFactor+2];
Arrays.fill(currentIndexArray,-1);
positionIndex.put(chrom,currentIndexArray);
}
);
genomeSize=genomeIndex.longValue();
this.maskBitSet = maskBitSet;
this.filterBitSet = filterBitSet;
for(int i = 0; i < gvcfAnnotationsAndCalls.size(); i++) {
int currentBin = gvcfAnnotationsAndCalls.get(i).getPosition()/indexScaleFactor;
//new record. add it to the index
if(positionIndex.get(gvcfAnnotationsAndCalls.get(i).getChromosome())[currentBin] == -1) {
positionIndex.get(gvcfAnnotationsAndCalls.get(i).getChromosome())[currentBin] = i;
}
}
}
@Override
public Set chromosomes() {
return chromPositionMap.keySet();
}
@Override
public byte[] chromosomeSequence(Chromosome chrom) {
return chromosomeSequence(chrom,1,chromLengthLookup.get(chrom));
}
@Override
// Code assumes 1-based coordinates have been passed. It will catch and return
// null if the startSite is 0. Otherwise, the user is on their own to ensure
// input is 1-based.
//TODO add in functionality using GVCF annotations
public byte[] chromosomeSequence(Chromosome chrom, int startSite, int lastSite) {
final int startSiteFinal = startSite;
final int lastSiteFinal = lastSite;
//add one as we grab the last site as well.
// regionTotalReferenceBPCount = lastSite - startSite+1;
//zrm22 new for getting gvcf records.
//Basically the loop now changes to loop through the positionList to check if the allele should be the ref or not
gvcfAnnotationsAndCalls.chromosomeSiteCount(chrom);
//use the index to get the currentList of positions
int[] currentIndex = positionIndex.get(chrom);
int startPositionListIndex = currentIndex[startSite/indexScaleFactor];
int endPositionListIndex = currentIndex[lastSite/indexScaleFactor+1];
int startPositionListCounter = 1;
while(startPositionListIndex==-1) {
if (startSite / indexScaleFactor - startPositionListCounter >= 0) {
startPositionListIndex = currentIndex[startSite / indexScaleFactor - startPositionListCounter];
startPositionListCounter++;
}
else {
startPositionListIndex = 0;
break;
}
}
ArrayList listOfChrPositions = new ArrayList<>();
if(endPositionListIndex==-1) {
for(int i = startPositionListIndex; i < startPositionListIndex+indexScaleFactor && i < gvcfAnnotationsAndCalls.size(); i++) {
if(gvcfAnnotationsAndCalls.get(i).getPosition()>=startSiteFinal && gvcfAnnotationsAndCalls.get(i).getPosition()<=lastSiteFinal) {
listOfChrPositions.add(gvcfAnnotationsAndCalls.get(i));
}
}
}
else {
for (int i = startPositionListIndex; i <= endPositionListIndex; i++) {
if (gvcfAnnotationsAndCalls.get(i).getPosition() >= startSiteFinal && gvcfAnnotationsAndCalls.get(i).getPosition() <= lastSiteFinal) {
listOfChrPositions.add(gvcfAnnotationsAndCalls.get(i));
}
}
}
//sort things to make sure we can iterate properly
Collections.sort(listOfChrPositions);
int actualStartPos = 0;
int startIndex = 0;
int actualEndPos = 0;
int endIndex = 0;
boolean addingAtStart = false;
if(listOfChrPositions.size()>0) {
for (int i = 1; i < gvcfAnnotationsAndCalls.size(); i++) {
if (gvcfAnnotationsAndCalls.get(i).getPosition() == listOfChrPositions.get(0).getPosition()) {
if (checkPositionCoverage(gvcfAnnotationsAndCalls.get(i - 1), startSiteFinal)) {
startIndex = i-1;
listOfChrPositions.add(0,gvcfAnnotationsAndCalls.get(startIndex));
addingAtStart = true;
}
break;
}
}
}
//loop through from startSite till the first position in the GVCF export these alleles directly from the ref
int listOfChrPositionsCounter = 0;
ArrayList byteList = new ArrayList<>();
if(listOfChrPositions.size()==0) {
//Export a single N for a missing sequence
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
else {
for (int siteCounter = startSite; siteCounter <= lastSite && listOfChrPositionsCounter < listOfChrPositions.size(); siteCounter++) {
GeneralAnnotation ga = listOfChrPositions.get(listOfChrPositionsCounter).getAnnotation();
Set annotationKeys = ga.getAnnotationKeys();
if(addingAtStart) {
if (listOfChrPositionsCounter != 0) {
if (filterBitSet.fastGet(listOfChrPositionsCounter-1)) {
//get the size of the position
if (annotationKeys.contains("END")) {
int lengthOfPosBlock = Integer.parseInt((String) ga.getTextAnnotation("END")[0]) - listOfChrPositions.get(listOfChrPositionsCounter).getPosition();
siteCounter+=lengthOfPosBlock;
}
else{
//check insertion or deletion
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
siteCounter+=variants[0].length()-1;
}
listOfChrPositionsCounter++;
continue;
}
}
}
else {
if (filterBitSet.fastGet(listOfChrPositionsCounter)) {
if (annotationKeys.contains("END")) {
int lengthOfPosBlock = Integer.parseInt((String) ga.getTextAnnotation("END")[0]) - listOfChrPositions.get(listOfChrPositionsCounter).getPosition();
siteCounter+=lengthOfPosBlock;
}
else{
//check insertion or deletion
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
siteCounter+=variants[0].length()-1;
}
listOfChrPositionsCounter++;
continue;
}
}
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true it means we have to mask all of these positions
//Check to see if we have an END anno
}
if (listOfChrPositions.get(listOfChrPositionsCounter).getPosition() <= siteCounter) {
if (annotationKeys.contains("END")) {
//this means we have a block
//Check the call and grab the corresponding allele values
String call = ga.getTextAnnotation("GT")[0];
boolean phased = true;
boolean haploid = false;
if (call.contains("/")) {
phased = false;
} else if (call.contains("|")) {
phased = true;
} else {
haploid = true;
}
String[] callSplit = phased ? call.split("|") : call.split("/");
int leftAllele = Integer.parseInt(callSplit[0]);
int rightAllele = leftAllele;
if (!haploid) {
rightAllele = Integer.parseInt(callSplit[1]);
}
int endPoint = Integer.parseInt(ga.getTextAnnotation("END")[0]) - 1;
//check to see if our requested end point is smaller than the region
if(lastSite-1 packedBytes.length * 2 || endPoint > packedBytes.length * 2) {
throw new IllegalArgumentException("GenomeSequenceBuilder.chromosomeSequence: requested sequence is out of range"); // requested sequence is out of range
}
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
String leftAlleleString = variants[leftAllele];
//check depth
// String dpFullString = (String) annos.get("DP").toArray()[0];
String dpFullString = ga.getTextAnnotation("DP")[0];
// String minDPString = (String) annos.get("MIN_DP").toArray()[0];
//TODO check to see if we should assume only Ref or call for blocks
if (maskBitSet.fastGet(listOfChrPositionsCounter) || dpFullString.equals("0")) {
//if true we have a masking
for (int i = startSiteShifted; i <= endPoint; i++) {
// regionDepthCount+=0;
// regionZeroCoverageCount++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else if (leftAllele == 0) {
//fill in with reference sequence
//pull the sequence from siteCounter till you get to endPoint
//Zrm Apr17 fix for default rules
//check if homozygous or het based on GT calls
int depth = 0;
int minDepth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = startSiteShifted; i <= endPoint; i++) {
// regionDepthCount+=depth;
// regionMinDepthCount+=minDepth;
byteList.add((byte) ((i % 2 == 0) ? ((packedBytes[i / 2] & 0xF0) >> 4) : (packedBytes[i / 2] & 0x0F)));
}
} else {
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = startSiteShifted; i <= endPoint; i++) {
// regionDepthCount+=depth;
// regionZeroCoverageCount++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
}
//shift up the siteCounter to match the end
siteCounter = endPoint + 1;
} else {
//it is likely a snp
//check the call and grab the correct allele
//siteCounter will increment correctly
String call = ga.getTextAnnotation("GT")[0];
boolean phased = true;
boolean haploid = false;
if (call.contains("/")) {
phased = false;
} else if (call.contains("|")) {
phased = true;
} else {
//haploid
haploid = true;
}
String[] callSplit = phased ? call.split("|") : call.split("/");
int leftAllele = Integer.parseInt(callSplit[0]);
int rightAllele = leftAllele;
if (!haploid) {
rightAllele = Integer.parseInt(callSplit[1]);
}
//Get the AD field and split it on commas
String adsFullString = ga.getTextAnnotation("AD")[0];
String dpFullString = ga.getTextAnnotation("DP")[0];
String[] adSplit = adsFullString.split(",");
//Get the known Variants
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
boolean isHet = calcHet(adSplit);
if (isHet) {
//TODO handle hets correctly for indels and such
String leftAlleleString = variants[0];
for(int alleleCounter = 0; alleleCounter < leftAlleleString.length(); alleleCounter++) {
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
siteCounter+=leftAlleleString.length()-1;
// for(int i = 0; i < leftAlleleString.length();i++) {
// regionHetCount++;
// }
}
//ZRM April17 do calls based on depth only
//This block is not actually used as it has already been handled if it is a het or a homozygous ref(in an allele block)
else if (adSplit[0].equals("1") || adSplit[0].equals("2")) {
if (adSplit[1].equals("0")) {
//export the ref
String leftAlleleString = variants[0];
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true we have to mask it
for (int i = 0; i < leftAlleleString.length() && siteCounter+i < lastSite-1; i++) {
// regionDepthCount+=depth;
// regionNumberHomoRef1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else {
for (int i = 0; i < leftAlleleString.length() && siteCounter+i < lastSite-1; i++) {
// regionDepthCount+=depth;
// regionNumberHomoRef1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
}
}
} else if (adSplit[1].equals("1") || adSplit[1].equals("2")) { //ZRM22 Jun19 ChangeforChristy
// } else if (adSplit[1].equals("0")) {
//call Ns
//export the ref
String leftAlleleString = variants[1];
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = 0; i < leftAlleleString.length(); i++) {
// regionDepthCount+=depth;
// regionNumberHomoAlt1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else if (Integer.parseInt(adSplit[1]) >= 3) { //ZRM22 Jun19 change for christy
// } else if (Integer.parseInt(adSplit[1]) >= 1) {
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
String leftAlleleString = variants[1];
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true we have to mask it
for (int i = 0; i < leftAlleleString.length(); i++) {
// regionAltCount++;
// regionNumberHomoAlt3PlusCount++;
// regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else {
for (int i = 0; i < leftAlleleString.length(); i++) {
// regionAltCount++;
// regionNumberHomoAlt3PlusCount++;
// regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
siteCounter+=variants[0].length()-1;
}
}
else {
//in the case where we have 0 depth for each allele it will fall through all the cases
//lets stick in Ns for the number of reference basepairs fix the old method header as well.
String leftAlleleString = variants[0];
for (int i = 0; i < leftAlleleString.length(); i++) {
// regionAltCount++;
// regionNumberHomoAlt3PlusCount++;
// regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
siteCounter+=variants[0].length()-1;
}
}
listOfChrPositionsCounter++;
} else {
//grab the reference as we dont have a gvcf for the requested site
//Should this be the breaking point of the sequence??
//TODO should we mark these with Ns?
//With 0 coverage we want to export an N
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
// regionDepthCount+=0;
// regionZeroCoverageCount++;
}
}
}
// regionTotalExportedBPCount = byteList.size();
byte[] fullBytes = new byte[byteList.size()];
for(int i = 0; i < fullBytes.length; i++) {
fullBytes[i] = byteList.get(i);
}
return fullBytes;
}
@Override
//Code to get the Sequence in a string as well as the various stats we have collected.
// This should allow for safe multithreading for the stats.
public HashMap chromosomeSequenceAndStats(Chromosome chrom, int startSite, int lastSite) {
long queryTimeStart = System.currentTimeMillis();
int regionTotalReferenceBPCount = 0;
int regionTotalExportedBPCount = 0;
int regionHetCount = 0;
int regionAltCount = 0;
int regionDepthCount = 0;
int regionGQCount = 0;
int regionMinDepthCount = 0;
int regionZeroCoverageCount = 0;
int regionNumberHomoRef1Or2Count = 0;
int regionNumberHomoRef3PlusCount = 0;
int regionNumberHomoAlt1Or2Count = 0;
int regionNumberHomoAlt3PlusCount = 0;
final int startSiteFinal = startSite;
final int lastSiteFinal = lastSite;
//add one as we grab the last site as well.
regionTotalReferenceBPCount = lastSite - startSite+1;
//zrm22 new for getting gvcf records.
//Basically the loop now changes to loop through the positionList to check if the allele should be the ref or not
// gvcfAnnotationsAndCalls.chromosomeSiteCount(chrom);
long getCurrentSubsetPositionListTimeStart = System.currentTimeMillis();
//use the index to get the currentList of positions
int[] currentIndex = positionIndex.get(chrom);
int startPositionListIndex = currentIndex[startSite/indexScaleFactor];
int endPositionListIndex = currentIndex[lastSite/indexScaleFactor+1];
int startPositionListCounter = 1;
while(startPositionListIndex==-1) {
startPositionListIndex = currentIndex[startSite/indexScaleFactor-startPositionListCounter];
startPositionListCounter++;
}
ArrayList listOfChrPositions = new ArrayList<>();
int actualStartIndex = Integer.MAX_VALUE;
if(endPositionListIndex==-1) {
for(int i = startPositionListIndex; i < startPositionListIndex+indexScaleFactor && i < gvcfAnnotationsAndCalls.size(); i++) {
if(gvcfAnnotationsAndCalls.get(i).getPosition()>=startSiteFinal && gvcfAnnotationsAndCalls.get(i).getPosition()<=lastSiteFinal) {
if(i < actualStartIndex) {
actualStartIndex = i;
}
listOfChrPositions.add(gvcfAnnotationsAndCalls.get(i));
}
}
}
else {
for (int i = startPositionListIndex; i <= endPositionListIndex; i++) {
if (gvcfAnnotationsAndCalls.get(i).getPosition() >= startSiteFinal && gvcfAnnotationsAndCalls.get(i).getPosition() <= lastSiteFinal) {
if(i < actualStartIndex) {
actualStartIndex = i;
}
listOfChrPositions.add(gvcfAnnotationsAndCalls.get(i));
}
}
}
long totalTimeToSubsetPosList = System.currentTimeMillis() - getCurrentSubsetPositionListTimeStart;
//sort things to make sure we can iterate properly
long sortPosTimeStart = System.currentTimeMillis();
Collections.sort(listOfChrPositions);
long totalTimeToSortPosList = System.currentTimeMillis() - sortPosTimeStart;
int actualStartPos = 0;
int startIndex = 0;
int actualEndPos = 0;
int endIndex = 0;
long addAPositionToStartTime = System.currentTimeMillis();
boolean addingAtStart = false;
if(listOfChrPositions.size()>0 && startPositionListIndex!=0) {
if(checkPositionCoverage(gvcfAnnotationsAndCalls.get(actualStartIndex-1),startSiteFinal)) {
startIndex = actualStartIndex -1;
listOfChrPositions.add(0,gvcfAnnotationsAndCalls.get(startIndex));
addingAtStart = true;
}
// for (int i = 1; i < gvcfAnnotationsAndCalls.size(); i++) {
// for (int i = startPositionListIndex; i < gvcfAnnotationsAndCalls.size(); i++) {
// if (gvcfAnnotationsAndCalls.get(i).getPosition() == listOfChrPositions.get(0).getPosition()) {
// if (checkPositionCoverage(gvcfAnnotationsAndCalls.get(i - 1), startSiteFinal)) {
// startIndex = i-1;
// listOfChrPositions.add(0,gvcfAnnotationsAndCalls.get(startIndex));
// addingAtStart = true;
// }
// break;
// }
// }
}
long totalTimeToAddStartPosition = System.currentTimeMillis() - addAPositionToStartTime;
//loop through from startSite till the first position in the GVCF export these alleles directly from the ref
int listOfChrPositionsCounter = 0;
ArrayList byteList = new ArrayList<>();
if(listOfChrPositions.size()==0) {
//Export a single N for a missing sequence
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
else {
for (int siteCounter = startSite; siteCounter <= lastSite && listOfChrPositionsCounter < listOfChrPositions.size(); siteCounter++) {
GeneralAnnotation ga = listOfChrPositions.get(listOfChrPositionsCounter).getAnnotation();
Set annotationKeys = ga.getAnnotationKeys();
if(addingAtStart) {
if (listOfChrPositionsCounter != 0) {
if (filterBitSet.fastGet(listOfChrPositionsCounter-1)) {
//get the size of the position
if (annotationKeys.contains("END")) {
int lengthOfPosBlock = Integer.parseInt((String) ga.getTextAnnotation("END")[0]) - listOfChrPositions.get(listOfChrPositionsCounter).getPosition();
siteCounter+=lengthOfPosBlock;
}
else{
//check insertion or deletion
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
siteCounter+=variants[0].length()-1;
}
listOfChrPositionsCounter++;
continue;
}
}
}
else {
if (filterBitSet.fastGet(listOfChrPositionsCounter)) {
if (annotationKeys.contains("END")) {
int lengthOfPosBlock = Integer.parseInt((String) ga.getTextAnnotation("END")[0]) - listOfChrPositions.get(listOfChrPositionsCounter).getPosition();
siteCounter+=lengthOfPosBlock;
}
else{
//check insertion or deletion
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
siteCounter+=variants[0].length()-1;
}
listOfChrPositionsCounter++;
continue;
}
}
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true it means we have to mask all of these positions
//Check to see if we have an END anno
}
if (listOfChrPositions.get(listOfChrPositionsCounter).getPosition() <= siteCounter) {
if (annotationKeys.contains("END")) {
//this means we have a block
//Check the call and grab the corresponding allele values
String call = ga.getTextAnnotation("GT")[0];
boolean phased = true;
boolean haploid = false;
if (call.contains("/")) {
phased = false;
} else if (call.contains("|")) {
phased = true;
} else {
haploid = true;
}
String[] callSplit = phased ? call.split("|") : call.split("/");
int leftAllele = Integer.parseInt(callSplit[0]);
int rightAllele = leftAllele;
if (!haploid) {
rightAllele = Integer.parseInt(callSplit[1]);
}
int endPoint = Integer.parseInt(ga.getTextAnnotation("END")[0]) - 1;
//check to see if our requested end point is smaller than the region
if(lastSite-1 packedBytes.length * 2 || endPoint > packedBytes.length * 2) {
throw new IllegalArgumentException("GenomeSequenceBuilder.chromosomeSequence: requested sequence is out of range"); // requested sequence is out of range
}
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
String leftAlleleString = variants[leftAllele];
//check depth
// String dpFullString = (String) annos.get("DP").toArray()[0];
String dpFullString = ga.getTextAnnotation("DP")[0];
// String minDPString = (String) annos.get("MIN_DP").toArray()[0];
//TODO check to see if we should assume only Ref or call for blocks
if (maskBitSet.fastGet(listOfChrPositionsCounter) || dpFullString.equals("0")) {
//if true we have a masking
for (int i = startSiteShifted; i <= endPoint; i++) {
regionDepthCount+=0;
regionZeroCoverageCount++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else if (leftAllele == 0) {
//fill in with reference sequence
//pull the sequence from siteCounter till you get to endPoint
//Zrm Apr17 fix for default rules
//check if homozygous or het based on GT calls
int depth = 0;
int minDepth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = startSiteShifted; i <= endPoint; i++) {
regionDepthCount+=depth;
regionMinDepthCount+=minDepth;
byteList.add((byte) ((i % 2 == 0) ? ((packedBytes[i / 2] & 0xF0) >> 4) : (packedBytes[i / 2] & 0x0F)));
}
} else {
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = startSiteShifted; i <= endPoint; i++) {
regionDepthCount+=depth;
regionZeroCoverageCount++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
}
//shift up the siteCounter to match the end
siteCounter = endPoint + 1;
} else {
//it is likely a snp
//check the call and grab the correct allele
//siteCounter will increment correctly
String call = ga.getTextAnnotation("GT")[0];
boolean phased = true;
boolean haploid = false;
if (call.contains("/")) {
phased = false;
} else if (call.contains("|")) {
phased = true;
} else {
//haploid
haploid = true;
}
String[] callSplit = phased ? call.split("|") : call.split("/");
int leftAllele = Integer.parseInt(callSplit[0]);
int rightAllele = leftAllele;
if (!haploid) {
rightAllele = Integer.parseInt(callSplit[1]);
}
//Get the AD field and split it on commas
String adsFullString = ga.getTextAnnotation("AD")[0];
String dpFullString = ga.getTextAnnotation("DP")[0];
String[] adSplit = adsFullString.split(",");
//Get the known Variants
String[] variants = listOfChrPositions.get(listOfChrPositionsCounter).getKnownVariants();
boolean isHet = calcHet(adSplit);
if (isHet) {
//TODO handle hets correctly for indels and such
String leftAlleleString = variants[0];
siteCounter+=leftAlleleString.length()-1;
for(int i = 0; i < leftAlleleString.length();i++) {
regionHetCount++;
}
}
//ZRM April17 do calls based on depth only
else if (adSplit[0].equals("1") || adSplit[0].equals("2")) {
if (adSplit[1].equals("0")) {
//export the ref
String leftAlleleString = variants[0];
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true we have to mask it
for (int i = 0; i < leftAlleleString.length() && siteCounter+i < lastSite-1; i++) {
regionDepthCount+=depth;
regionNumberHomoRef1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else {
for (int i = 0; i < leftAlleleString.length() && siteCounter+i < lastSite-1; i++) {
regionDepthCount+=depth;
regionNumberHomoRef1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
}
}
} else if (adSplit[1].equals("1") || adSplit[1].equals("2")) {
//call Ns
//export the ref
String leftAlleleString = variants[1];
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
for (int i = 0; i < leftAlleleString.length(); i++) {
regionDepthCount+=depth;
regionNumberHomoAlt1Or2Count++;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else if (Integer.parseInt(adSplit[1]) >= 3) {
int depth = 0;
if(!dpFullString.equals("")) {
depth = Integer.parseInt(dpFullString);
}
String leftAlleleString = variants[1];
if (maskBitSet.fastGet(listOfChrPositionsCounter)) {
//if true we have to mask it
for (int i = 0; i < leftAlleleString.length(); i++) {
regionAltCount++;
regionNumberHomoAlt3PlusCount++;
regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
}
} else {
for (int i = 0; i < leftAlleleString.length(); i++) {
regionAltCount++;
regionNumberHomoAlt3PlusCount++;
regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
siteCounter+=variants[0].length()-1;
}
}
else {
//in the case where we have 0 depth for each allele it will fall through all the cases
//lets stick in Ns for the number of reference basepairs fix the old method header as well.
String leftAlleleString = variants[0];
for (int i = 0; i < leftAlleleString.length(); i++) {
// regionAltCount++;
// regionNumberHomoAlt3PlusCount++;
// regionDepthCount+=depth;
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte(leftAlleleString.charAt(i)));
}
siteCounter+=variants[0].length()-1;
}
}
listOfChrPositionsCounter++;
} else {
//grab the reference as we dont have a gvcf for the requested site
//Should this be the breaking point of the sequence??
//TODO should we mark these with Ns?
//With 0 coverage we want to export an N
byteList.add(NucleotideAlignmentConstants.getNucleotideAlleleByte("N"));
regionDepthCount+=0;
regionZeroCoverageCount++;
}
}
}
regionTotalExportedBPCount = byteList.size();
byte[] fullBytes = new byte[byteList.size()];
for(int i = 0; i < fullBytes.length; i++) {
fullBytes[i] = byteList.get(i);
}
//Compile the HashMap();
HashMap sequenceAndStats = new HashMap<>();
sequenceAndStats.put("Sequence",NucleotideAlignmentConstants.nucleotideBytetoString(fullBytes));
sequenceAndStats.put("RefSize",""+regionTotalReferenceBPCount);
sequenceAndStats.put("Size", ""+regionTotalExportedBPCount);
sequenceAndStats.put("HetCount",""+regionHetCount);
sequenceAndStats.put("AltCount",""+regionAltCount);
sequenceAndStats.put("Depth",""+regionDepthCount);
sequenceAndStats.put("GQ",""+regionGQCount);
sequenceAndStats.put("Min_Depth",""+regionMinDepthCount);
sequenceAndStats.put("ZeroCoverageCount",""+regionZeroCoverageCount);
sequenceAndStats.put("HomoRefCount",""+regionNumberHomoRef1Or2Count);
sequenceAndStats.put("HomoAltLowDepthCount",""+regionNumberHomoAlt1Or2Count);
sequenceAndStats.put("HomoAltHighDepthCount",""+regionNumberHomoAlt3PlusCount);
long queryTimeEnd = System.currentTimeMillis();
// System.out.println("Current Query Time:"+(queryTimeEnd - queryTimeStart)+"ms\n" +
// "Time Taken to subset Pos List"+totalTimeToSubsetPosList+"ms\n" +
// "Time Taken to sort Pos List"+totalTimeToSortPosList+"ms\n" +
// "Time Taken to add a Position to start of List:"+totalTimeToAddStartPosition+"ms\n");
// System.out.println("Current Query Time:"+(queryTimeEnd - queryTimeStart)+"ms");
return sequenceAndStats;
}
@Override
//TODO add in functionality to handle GVCF annotations
public byte[] genomeSequence(long startSite, long lastSite) {
if(lastSite-startSite>Integer.MAX_VALUE) throw
new IllegalArgumentException("Less than "+Integer.MAX_VALUE+" sites must be requested at a time");
byte[] fullBytes=new byte[(int)(lastSite-startSite+1)];
long currentSiteToGet=startSite;
while(currentSiteToGet,Chromosome> rangeChromEntry=wholeGenomeIndexMap.getEntry(currentSiteToGet);
int chrStart=(int)(currentSiteToGet-rangeChromEntry.getKey().lowerEndpoint());
int chrLast=(int)Math.min(rangeChromEntry.getKey().upperEndpoint()-rangeChromEntry.getKey().lowerEndpoint(),lastSite-rangeChromEntry.getKey().lowerEndpoint());
byte[] chromoSeq=chromosomeSequence(rangeChromEntry.getValue(), chrStart+1,chrLast+1); //+1 for 0 based genome, 1 based chromosomes
System.arraycopy(chromoSeq,0,fullBytes,(int)(currentSiteToGet-startSite),chromoSeq.length);
currentSiteToGet+=chromoSeq.length;
}
return fullBytes;
}
@Override
public int chromosomeSize(Chromosome chromosome) {
return chromLengthLookup.get(chromosome);
}
@Override
public long genomeSize() {
return genomeSize;
}
@Override
public int numberOfChromosomes() {
return chromPositionMap.size();
}
@Override
//TODO see if we need to fix this for GVCF annotations
public Map> fullRefCoordinateToChromCoordinate(ArrayList coordinates) {
// Returns 0-based value from Chromosome array (values are stored as 0-based)
Map> mappedCoordinates = new ConcurrentHashMap>();
coordinates.stream().parallel().forEach(coordinate -> {
Map.Entry,Chromosome> rangeChromEntry=wholeGenomeIndexMap.getEntry(coordinate);
Chromosome curChrom = rangeChromEntry.getValue();
long chromCoordinate = coordinate - rangeChromEntry.getKey().lowerEndpoint();
Tuple chromWithCoordinate = new Tuple<>(curChrom, (int)chromCoordinate);
mappedCoordinates.put(coordinate, chromWithCoordinate);
});
return mappedCoordinates;
}
private boolean checkPositionCoverage(Position pos, int site) {
// SetMultimap annos = pos.getAnnotation().getAnnotationAsMap();
GeneralAnnotation ga = pos.getAnnotation();
// if(annos.containsKey("END")) {
// int endPoint = Integer.parseInt((String)annos.get("END").toArray()[0]);
// if(pos.getPosition()<=site && site <= endPoint) {
// return true;
// }
// else {
// return false;
// }
//
// }
String[] endArray = ga.getTextAnnotation("END");
// if(ga.getAnnotationKeys().contains("END")) {
if(endArray.length!=0) {
// int endPoint = Integer.parseInt(ga.getTextAnnotation("END")[0]);
int endPoint = Integer.parseInt(endArray[0]);
if(pos.getPosition()<=site && site <= endPoint) {
return true;
}
else {
return false;
}
}
else if(pos.getPosition()==site) {
return true;
}
else {
String[] alleles = pos.getKnownVariants();
//check to see if we have a deletion and the reference covers the whole
if(alleles[0].length()+pos.getPosition()>=site) {
return true;
}
else {
return false;
}
}
}
public Map getChrPosMap() {
return chromPositionMap;
}
public HashMap>> getConsecutiveRegions() {
HashMap>> consecRegions = new HashMap<>();
Set chromosomeSet = chromosomes();
//Loop through each chromosome add to a Map
HashMap> rangeMaps = new HashMap<>();
for(Chromosome chr : chromosomeSet) {
rangeMaps.put(chr,TreeRangeSet.create());
consecRegions.put(chr,new ArrayList<>());
}
for(int i = 0 ; i < gvcfAnnotationsAndCalls.size(); i++) {
if(filterBitSet.fastGet(i)) {
//if we have a position to filter out, we should probably break the fasta sequence
//to do this, just do not add in the range... RangeMap will handle it for you
continue;
}
Position currentPosition = gvcfAnnotationsAndCalls.get(i);
SetMultimap annos = currentPosition.getAnnotation().getAnnotationAsMap();
if(annos.containsKey("END")) {
int endPoint = Integer.parseInt((String)annos.get("END").toArray()[0]);
rangeMaps.get(currentPosition.getChromosome()).add(Range.closed(currentPosition.getPosition(),endPoint+1));
}
else {
//check to make sure that the reference allele is more than one allele(A deletion)
String[] variants = currentPosition.getKnownVariants();
//TODO handle hets correctly for indels and such
String refAlleleString = variants[0];
if(refAlleleString.length()>1) {
//we have a deletion or an insertion+deletion as such our END point will need to take into account the size of the deletion
rangeMaps.get(currentPosition.getChromosome()).add(Range.closed(currentPosition.getPosition(),currentPosition.getPosition()+refAlleleString.length()));
}
else {
//This will cause the index of the next line in the GVCF file to be non consecutive with the previous
rangeMaps.get(currentPosition.getChromosome()).add(Range.closed(currentPosition.getPosition(), currentPosition.getPosition() + 1));
}
}
}
for(Chromosome chr : chromosomeSet) {
Set> rangeSet = rangeMaps.get(chr).asRanges();
for(Range currentRange : rangeSet) {
ArrayList currentList = new ArrayList();
currentList.add(currentRange.lowerEndpoint());
//We need to subtract 1 point so it will be [inclusive,inclusive] instead of [inclusive,exclusive)
currentList.add(currentRange.upperEndpoint()-1);
consecRegions.get(chr).add(currentList);
}
}
return consecRegions;
}
//TODO move this to a different class
public void writeFASTA(String fileName){
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(fileName));
HashMap>> consecutiveRegions = getConsecutiveRegions();
Set chromosomes = chromosomes();
for(Chromosome chr : chromosomes) {
for(ArrayList bounds : consecutiveRegions.get(chr)) {
writer.write(">Chr_"+chr.getChromosomeNumber()+"_StartSite_"+bounds.get(0)+"_EndSite_"+bounds.get(1));
writer.newLine();
writer.write(""+NucleotideAlignmentConstants.nucleotideBytetoString(chromosomeSequence(chr,bounds.get(0),bounds.get(1))));
writer.newLine();
}
}
writer.close();
}
catch(Exception e){
e.printStackTrace();
}
}
public BitSet getMaskBitSet() {
return maskBitSet;
}
public void setMaskBitSet(BitSet newMaskBitSet) {
maskBitSet = newMaskBitSet;
}
public BitSet getFilterBitSet() {
return filterBitSet;
}
public void setFilterBitSet(BitSet newFilterBitSet) {
filterBitSet = newFilterBitSet;
}
public void flipMaskBit(int index) {
maskBitSet.fastFlip(index);
}
public void flipFilterBit(int index) {
filterBitSet.fastFlip(index);
}
public PositionList getGVCFPositions() {
return gvcfAnnotationsAndCalls;
}
private boolean calcHet(String[] adSplit) {
boolean isHet = false;
int refDepth = Integer.parseInt(adSplit[0]);
int altDepth = Integer.parseInt(adSplit[1]);
if(refDepth>0 && altDepth>0) {
isHet = true;
}
return isHet;
}
@Override
public void resetCounters() {
// regionTotalReferenceBPCount = 0;
// regionTotalExportedBPCount = 0;
// regionHetCount = 0;
// regionAltCount = 0;
// regionDepthCount = 0;
// regionGQCount = 0;
// regionMinDepthCount = 0;
// regionZeroCoverageCount = 0;
// regionNumberHomoRef1Or2Count = 0;
// regionNumberHomoAlt1Or2Count = 0;
// regionNumberHomoAlt3PlusCount = 0;
}
@Override
public HashMap getPreviousRegionStats() {
HashMap stats = new HashMap<>();
// stats.put("RefSize",regionTotalReferenceBPCount);
// stats.put("Size", regionTotalExportedBPCount);
// stats.put("HetCount",regionHetCount);
// stats.put("AltCount",regionAltCount);
// stats.put("Depth",regionDepthCount);
// stats.put("GQ",regionGQCount);
// stats.put("Min_Depth",regionMinDepthCount);
// stats.put("ZeroCoverageCount",regionZeroCoverageCount);
// stats.put("HomoRefCount",regionNumberHomoRef1Or2Count);
// stats.put("HomoAltLowDepthCount",regionNumberHomoAlt1Or2Count);
// stats.put("HomoAltHighDepthCount",regionNumberHomoAlt3PlusCount);
return stats;
}
@Override
public byte genotype(Chromosome chrom, int position) {
// TODO Auto-generated method stub
return 0;
}
@Override
public byte genotype(Chromosome chrom, Position positionObject) {
// TODO Auto-generated method stub
return 0;
}
@Override
public String genotypeAsString(Chromosome chrom, int position) {
// TODO Auto-generated method stub
return null;
}
@Override
public String genotypeAsString(Chromosome chrom, Position positionObject) {
// TODO Auto-generated method stub
return null;
}
@Override
public String genotypeAsString(Chromosome chrom, int startSite, int endSite) {
// TODO Auto-generated method stub
return null;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy