net.maizegenetics.pangenome.pipelineTests.SimpleGVCFReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.pipelineTests;
import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
import net.maizegenetics.dna.map.*;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.util.Tuple;
import net.maizegenetics.util.Utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
/**
* Created by edbuckler on 6/22/17.
*/
public class SimpleGVCFReader {
RangeMap, String> callRangeMap= TreeRangeMap.create();
GenomeSequence refGenomeSequence;
public SimpleGVCFReader(String gvcfFileName, String refGenomeFile, int maxRowsToProcess, int minDepth) {
try {
BufferedReader gvcfFileReader = Utils.getBufferedReader(gvcfFileName);
if(refGenomeFile!=null) {
refGenomeSequence= GenomeSequenceBuilder.instance(refGenomeFile);
}
//Loop through the headers
String currentLine = "";
while (!(gvcfFileReader.readLine()).startsWith("#CHROM")) {
}
int counter=0;
while ((currentLine = gvcfFileReader.readLine()) != null) {
//System.out.println(currentLine);
String[] elements=currentLine.split("\t");
//Chromosome chromosome=new Chromosome(elements[0]);
Tuple position= new Tuple<>(Integer.parseInt(elements[0]), Integer.parseInt(elements[1]));
Tuple endPosition=position;
String call;
String[] elementsParsed = elements[9].split(":");
int depth = 0;
if(elementsParsed.length == 1) {
depth = minDepth+1;
}
else if(elementsParsed.length == 2) {
depth = Integer.parseInt(elementsParsed[1]);
}
else {
depth = Integer.parseInt(elements[9].split(":")[2]);
}
// int depth = elementsParsed.length==2?Integer.parseInt(elementsParsed[1]):Integer.parseInt(elements[9].split(":")[2]);//Temporary hack to make it work with VariantContexts in PHG
// int depth = Integer.parseInt(elements[9].split(":")[2]);
if(depth(Integer.parseInt(elements[0]), end);
call="REFRANGE";
} else {
if(elements[9].startsWith(".")) {call = "N";}
else if(elements[9].startsWith("0")) {call=elements[3];} //reference allele call
else {call=elements[4].split(",")[0];} //alternate allele call
}
callRangeMap.put(Range.closed(position,endPosition),call);
counter++;
if(counter>maxRowsToProcess) break;
}
System.out.println("SimpleGVCFReader.SimpleGVCFReader");
System.out.println("gvcfFileName = [" + gvcfFileName + "], rowsProcessed = [" + counter + "]");
System.out.println(callRangeMap.span().toString());
} catch(IOException e) {
e.printStackTrace();
}
}
public String genotype(Position position) {
return genotype(position.getChromosome().getChromosomeNumber(),position.getPosition());
}
public String genotype(int chromosome, int positionInChromosome) {
String result=callRangeMap.get(new Tuple<>(chromosome,positionInChromosome));
if(result==null) return "N";
if(result.equals("REFRANGE")) return reference(chromosome,positionInChromosome);
return result;
}
public boolean inReferenceRange(Position position) {
return inReferenceRange(position.getChromosome().getChromosomeNumber(),position.getPosition());
}
public boolean inReferenceRange(int chromosome, int positionInChromosome) {
String resultTuple=callRangeMap.get(new Tuple<>(chromosome,positionInChromosome));
if(resultTuple==null) return false;
return resultTuple.equals("REFRANGE");
}
public String reference(Position position) {
byte call=refGenomeSequence.chromosomeSequence(position.getChromosome(),position.getPosition(),position.getPosition())[0];
return NucleotideAlignmentConstants.getHaplotypeNucleotide(call);
}
public String reference(int chromosome, int positionInChromosome) {
return reference(new GeneralPosition.Builder(new Chromosome(""+chromosome), positionInChromosome).build());
}
public RangeMap, String> getSubMap(Range> subRange) {
return callRangeMap.subRangeMap(subRange);
}
}