net.maizegenetics.pangenome.hapcollapse.GVCFUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.hapcollapse;
import htsjdk.variant.variantcontext.*;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
/**
* Created by zrm22 on 9/6/17.
*/
public class GVCFUtils {
public static List convertVCFToGVCF(List vcfVariantContexts) {
List gvcfVariants = new ArrayList<>();
//need to loop through the list of Variant contexts from the VCF
//Keep a list of consecutive reference variants
List tempConsecutiveList = new ArrayList<>();
for(VariantContext currentVC : vcfVariantContexts) {
boolean isRef = isReferenceRecord(currentVC);
//check the tempList
if(tempConsecutiveList.size()==0) {
if(isRef) {
//add the record to the tempList
tempConsecutiveList.add(currentVC);
}
else {
//add the record to gvcfVariants
gvcfVariants.add(currentVC);
}
continue;
}
//When either a Variant is found or we have a missing record
VariantContext previousVC = tempConsecutiveList.get(tempConsecutiveList.size()-1);
if(isRef && isPreviousConsecutive(previousVC,currentVC)) {
//add to the tempList
tempConsecutiveList.add(currentVC);
}
else {
//Merge the Variants in the consecutiveList into one VariantContext and clear out the list
VariantContext mergedVariant = mergeRecords(tempConsecutiveList);
//clear the list
tempConsecutiveList = new ArrayList<>();
//Add the new VariantContext to the end of the gvcfVariants List
gvcfVariants.add(mergedVariant);
if(isRef) {
tempConsecutiveList.add(currentVC);
}
else {
gvcfVariants.add(removeExtraAnnotations(currentVC,currentVC.getEnd(),currentVC.getGenotype(0).getDP()));
}
}
}
if(tempConsecutiveList.size() > 0 ) {
//Merge the Variants in the consecutiveList into one VariantContext and clear out the list
VariantContext mergedVariant = mergeRecords(tempConsecutiveList);
//clear the list
tempConsecutiveList = new ArrayList<>();
//Add the new VariantContext to the end of the gvcfVariants List
gvcfVariants.add(mergedVariant);
}
return gvcfVariants;
}
private static boolean isPreviousConsecutive(VariantContext previousVC, VariantContext currentVC) {
//Check that the chromosome matches
if(!previousVC.getContig().equals(currentVC.getContig())) {
return false;
}
//Check the end position of the previous = currentVC.startPos-1
if(previousVC.getEnd() != currentVC.getStart()-1) {
return false;
}
//if it passes these checks, it is consecutiveReference
return true;
}
/**
* Takes a VariantContext record and determines if the
* data is for a reference range block ro a variant.
*
* @param currentVC
* @return true: is a reference record; false: is a variant record
*/
public static boolean isReferenceRecord(VariantContext currentVC) {
//check the allele list
List currentAlleles = currentVC.getAlleles();
//if we have only 1 allele its ref automatically
if(currentAlleles.size()==1) {
return true;
}
//if it has 2 check to see if one is and if so its ref
else if(currentAlleles.size()==2) {
boolean containsNonRef = false;
for(Allele currentAllele : currentAlleles) {
if(currentAllele.getBaseString().equals("")) {
// getBaseString has stripped
containsNonRef = true;
}
}
if(containsNonRef) {
return true;
}
else {
return false;
}
}
//otherwise it is a variant
else {
return false;
}
}
private static VariantContext mergeRecords(List refRecords) {
VariantContext firstVariant = refRecords.get(0);
VariantContext secondVariant = refRecords.get(refRecords.size()-1);
int averagedDepth = (int)refRecords.stream()
.filter(variantContext -> variantContext.hasGenotypes())
.mapToInt(variantContext -> variantContext.getGenotype(0).getDP())
.average()
.getAsDouble();
return removeExtraAnnotations(firstVariant,secondVariant.getEnd(),averagedDepth);
// //Build the genotypes
// GenotypeBuilder gtBuilder = new GenotypeBuilder(firstVariant.getGenotype(0))
// .noAD()
// .noGQ()
// .noPL();
//
//
// VariantContextBuilder vcBuilder = new VariantContextBuilder(firstVariant).stop(secondVariant.getEnd())
// .genotypesNoValidation(GenotypesContext.create(gtBuilder.make()))
// .noID()
// .rmAttribute("DP")
// .rmAttribute("AN");
//
// return vcBuilder.make();
}
private static VariantContext removeExtraAnnotations(VariantContext currentVariantContext, int endPosition, int averageDepth) {
//Build the genotypes
GenotypeBuilder gtBuilder = new GenotypeBuilder(currentVariantContext.getGenotype(0))
// .noAD()
.noGQ()
.noPL()
.DP(averageDepth);
//TODO merge the DP
VariantContextBuilder vcBuilder = new VariantContextBuilder(currentVariantContext)
.stop(endPosition)
.genotypesNoValidation(GenotypesContext.create(gtBuilder.make()))
.noID()
// .rmAttribute("DP")
.rmAttribute("AN")
.rmAttribute("AC")
.rmAttribute("AF")
.rmAttribute("FS")
.rmAttribute("MLEAC")
.rmAttribute("MLEAF")
.rmAttribute("MQ")
.rmAttribute("QD")
.rmAttribute("SOR")
.attribute("DP",averageDepth);
return vcBuilder.make();
}
}