All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.hapcollapse.GVCFUtils Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.hapcollapse;

import htsjdk.variant.variantcontext.*;
import org.apache.log4j.Logger;

import java.util.ArrayList;
import java.util.List;


/**
 * Created by zrm22 on 9/6/17.
 */
public class GVCFUtils {
    public static List convertVCFToGVCF(List vcfVariantContexts) {
        List gvcfVariants = new ArrayList<>();
        //need to loop through the list of Variant contexts from the VCF
        //Keep a list of consecutive reference variants
        List tempConsecutiveList = new ArrayList<>();
        for(VariantContext currentVC : vcfVariantContexts) {
            boolean isRef = isReferenceRecord(currentVC);
            //check the tempList
            if(tempConsecutiveList.size()==0) {
                if(isRef) {
                    //add the record to the tempList
                    tempConsecutiveList.add(currentVC);
                }
                else {
                    //add the record to gvcfVariants
                    gvcfVariants.add(currentVC);
                }
                continue;
            }


            //When either a Variant is found or we have a missing record
            VariantContext previousVC = tempConsecutiveList.get(tempConsecutiveList.size()-1);

            if(isRef && isPreviousConsecutive(previousVC,currentVC)) {
                //add to the tempList
                tempConsecutiveList.add(currentVC);
            }
            else {
                //Merge the Variants in the consecutiveList into one VariantContext and clear out the list
                VariantContext mergedVariant = mergeRecords(tempConsecutiveList);
                //clear the list
                tempConsecutiveList = new ArrayList<>();
                //Add the new VariantContext to the end of the gvcfVariants List
                gvcfVariants.add(mergedVariant);
                if(isRef) {
                    tempConsecutiveList.add(currentVC);
                }
                else {
                    gvcfVariants.add(removeExtraAnnotations(currentVC,currentVC.getEnd(),currentVC.getGenotype(0).getDP()));
                }
            }

        }

        if(tempConsecutiveList.size() > 0 ) {
            //Merge the Variants in the consecutiveList into one VariantContext and clear out the list
            VariantContext mergedVariant = mergeRecords(tempConsecutiveList);
            //clear the list
            tempConsecutiveList = new ArrayList<>();
            //Add the new VariantContext to the end of the gvcfVariants List
            gvcfVariants.add(mergedVariant);
        }
        return gvcfVariants;
    }

    private static boolean isPreviousConsecutive(VariantContext previousVC, VariantContext currentVC) {
        //Check that the chromosome matches
        if(!previousVC.getContig().equals(currentVC.getContig())) {
            return false;
        }
        //Check the end position of the previous < currentVC.startPos-1 if so it is not overlapping
        if(previousVC.getEnd() < currentVC.getStart()-1) {
            return false;
        }

        //if it passes these checks, it is consecutiveReference
        //if the previousVC.end is greater than currentVC.start-1 then it is overlapping/consecutive
        return true;
    }

    /**
     * Takes a VariantContext record and determines if the 
     * data is for a reference range block ro a variant.
     * 
     * @param currentVC
     * @return true: is a reference record; false: is a variant record
     */
    public static boolean isReferenceRecord(VariantContext currentVC) {
        //check the allele list
        List currentAlleles = currentVC.getAlleles();
        //if we have only 1 allele its ref automatically
        if(currentAlleles.size()==1) {
            return true;
        }
        //if it has 2 check to see if one is  and if so its ref
        else if(currentAlleles.size()==2) {
            boolean containsNonRef = false;
            for(Allele currentAllele : currentAlleles) {
                if(currentAllele.getBaseString().equals("")) {
                    // getBaseString has  stripped
                    containsNonRef = true;
                }
            }

            if(containsNonRef) {
                return true;
            }
            else {
                return false;
            }
        }
        //otherwise it is a variant
        else {
            return false;
        }
    }

    private static VariantContext mergeRecords(List refRecords) {
        VariantContext firstVariant = refRecords.get(0);
        VariantContext secondVariant = refRecords.get(refRecords.size()-1);

        int averagedDepth = (int)refRecords.stream()
                .filter(variantContext -> variantContext.hasGenotypes())
                .mapToInt(variantContext -> variantContext.getGenotype(0).getDP())
                .average()
                .getAsDouble();

        return removeExtraAnnotations(firstVariant,secondVariant.getEnd(),averagedDepth);

//        //Build the genotypes
//        GenotypeBuilder gtBuilder = new GenotypeBuilder(firstVariant.getGenotype(0))
//                .noAD()
//                .noGQ()
//                .noPL();
//
//
//        VariantContextBuilder vcBuilder  = new VariantContextBuilder(firstVariant).stop(secondVariant.getEnd())
//                .genotypesNoValidation(GenotypesContext.create(gtBuilder.make()))
//                .noID()
//                .rmAttribute("DP")
//                .rmAttribute("AN");
//
//        return vcBuilder.make();
    }

    private static VariantContext removeExtraAnnotations(VariantContext currentVariantContext, int endPosition, int averageDepth) {
        //Build the genotypes
        GenotypeBuilder gtBuilder = new GenotypeBuilder(currentVariantContext.getGenotype(0))
//                .noAD()
                .noGQ()
                .noPL()
                .DP(averageDepth);

        //TODO merge the DP
        VariantContextBuilder vcBuilder  = new VariantContextBuilder(currentVariantContext)
                .stop(endPosition)
                .genotypesNoValidation(GenotypesContext.create(gtBuilder.make()))
                .noID()
//                .rmAttribute("DP")
                .rmAttribute("AN")
                .rmAttribute("AC")
                .rmAttribute("AF")
                .rmAttribute("FS")
                .rmAttribute("MLEAC")
                .rmAttribute("MLEAF")
                .rmAttribute("MQ")
                .rmAttribute("QD")
                .rmAttribute("SOR")
                .attribute("DP",averageDepth);

        return vcBuilder.make();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy