All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.processAssemblyGenomes.ResizeRefBlockPlugin Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.processAssemblyGenomes;

import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFFileReader;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GenomeSequence;
import net.maizegenetics.dna.map.GenomeSequenceBuilder;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.hapCalling.HapCallingUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
import org.apache.log4j.Logger;

import javax.swing.*;
import java.awt.Frame;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class ResizeRefBlockPlugin extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(ResizeRefBlockPlugin.class);

    private PluginParameter myVCFFile = new PluginParameter.Builder("normVCF", null, String.class).guiName("Normalized VCF File").required(true).inFile()
            .description("Normalized VCF file which needs to be corrected").build();

    private PluginParameter myVCFFileCorrected = new PluginParameter.Builder("correctedVCF", null, String.class).guiName("Output VCF File").required(true).outFile()
            .description("Corrected RefBlock VCF file").build();

    private PluginParameter myRef = new PluginParameter.Builder("ref",null, String.class).guiName("Reference").required(true).description("Reference fasta File").inFile().build();

    private PluginParameter myTaxon = new PluginParameter.Builder("taxon",null, String.class).guiName("Taxon Name").required(true).description("Name to be assigned for the taxon").build();



    public ResizeRefBlockPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    public ResizeRefBlockPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }

    @Override
    public DataSet processData(DataSet input) {
        resizeReferenceBlocks(vCFFile(),vCFFileCorrected());
        return null;
    }

    /**
     * Method to resize the Reference blocks contained in a GVCF file.
     *
     * If the Indel now overlaps a Reference Block, we resize the reference block END annotation to be 1 less than the start of the indel
     * If there is a GAP caused by the indel being moved, we resize the next Reference Block to be 1 plus the end of the indel.
     * @param inputFile
     * @param outputFile
     */
    private void resizeReferenceBlocks(String inputFile, String outputFile) {
        try(VCFFileReader inputVCFReader = new VCFFileReader(new File(inputFile),false)) {
            List variants = inputVCFReader.iterator().stream().collect(Collectors.toList());

            GenomeSequence gs = GenomeSequenceBuilder.instance(ref());

            List adjustedVariants = new ArrayList<>();
            adjustedVariants.add(variants.get(0));
            int errorCount = 0;
            for(int i = 1; i < variants.size()-1; i++) {

                VariantContext currentVC = variants.get(i);
                VariantContext prevVC = adjustedVariants.get(adjustedVariants.size()-1);
                VariantContext nextVC = variants.get(i+1);
                //Check to see if current Variant object is an indel
                //if so we may need to resize the reference block
                if(currentVC.isIndel()) {
                    //Check to see if its start site overlaps the previous entry
                    //If So we need to resize the previous block
                    //If not the indel must have already been in the leftmost position
                    if(currentVC.getStart() <= prevVC.getEnd()) {
                        if(!AssemblyProcessingUtils.isRefBlock(prevVC)) {
                            myLogger.warn("Overlapping indel with a nonRefBlock \n"+prevVC+"\n"+currentVC);
                            errorCount++;
                            continue;
                        }
                        //Pull out the previous variant
                        adjustedVariants.remove(adjustedVariants.size()-1);
                        if(prevVC.getStart() < currentVC.getStart()-1) {
                            adjustedVariants.add(createRefRangeVC(prevVC.getSampleNamesOrderedByName().get(0), Position.of(prevVC.getContig(), prevVC.getStart()), Position.of(prevVC.getContig(), currentVC.getStart() - 1), prevVC.getReference()));

                        }else {
                            myLogger.warn("Indel completely overlapping previous ref block, deleting previous ref \n"+prevVC+"\n"+currentVC);
                            errorCount++;
                        }
                    }
                    adjustedVariants.add(currentVC);
                    //check to see if there is a gap caused by shifting up the indel
                    if(currentVC.getEnd() < nextVC.getStart()-1) {
                        adjustedVariants.add(createRefRangeVC(taxon(),
                                Position.of(currentVC.getContig(),currentVC.getEnd()+1),
                                Position.of(nextVC.getContig(),nextVC.getEnd()),
                                Allele.create(gs.genotypeAsString(Chromosome.instance(currentVC.getContig()),currentVC.getEnd()+1),true)));
                        i++;
                    }
                }
                else {
                    adjustedVariants.add(currentVC);
                }

            }
            myLogger.info("Number of errors:"+errorCount);
            HapCallingUtils.writeVariantContextsToVCF(adjustedVariants,outputFile,ref(),taxon());

        }
        catch(Exception e) {
            myLogger.debug("Error resizing the VC Ref Blocks",e);
            throw new IllegalStateException("Error resizing the VCF Reference Blocks.",e);
        }


    }

    /**
     * Helper method to create a Reference Block VariantContext for a given range
     * @param assemblyTaxon
     * @param refRangeStart
     * @param refRangeEnd
     * @param refAllele
     * @return
     */
    private static VariantContext createRefRangeVC( String assemblyTaxon, Position refRangeStart, Position refRangeEnd, Allele refAllele) {
        Genotype gt = new GenotypeBuilder().name(assemblyTaxon).alleles(Arrays.asList(refAllele)).make();

        if (refRangeStart.getPosition() > refRangeEnd.getPosition()) {
            throw new IllegalStateException("createRefRangeVC - start postion greater than end: start=" +
                    refRangeStart.getPosition() + " end=" + refRangeEnd.getPosition());
        }
        VariantContextBuilder vcb = new VariantContextBuilder()
                .chr(refRangeStart.getChromosome().getName())
                .start(refRangeStart.getPosition())
                .stop(refRangeEnd.getPosition())
                .attribute("END",refRangeEnd.getPosition())
                .alleles(Arrays.asList(refAllele))
                .genotypes(gt);

        return vcb.make();
    }

    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        return ("Resize RefBlocks");
    }

    @Override
    public String getToolTipText() {

        return ("Resize Reference block errors caused by indel normalization");
    }
    // The following getters and setters were auto-generated.
    // Please use this method to re-generate.
    //
    // public static void main(String[] args) {
    //     GeneratePluginCode.generate(ResizeRefBlockPlugin.class);
    // }

    /**
     * Normalized VCF file which needs to be corrected
     *
     * @return Normalized VCF File
     */
    public String vCFFile() {
        return myVCFFile.value();
    }

    /**
     * Set Normalized VCF File. Normalized VCF file which
     * needs to be corrected
     *
     * @param value Normalized VCF File
     *
     * @return this plugin
     */
    public ResizeRefBlockPlugin vCFFile(String value) {
        myVCFFile = new PluginParameter<>(myVCFFile, value);
        return this;
    }

    /**
     * Corrected RefBlock VCF file
     *
     * @return Output VCF File
     */
    public String vCFFileCorrected() {
        return myVCFFileCorrected.value();
    }

    /**
     * Set Output VCF File. Corrected RefBlock VCF file
     *
     * @param value Output VCF File
     *
     * @return this plugin
     */
    public ResizeRefBlockPlugin vCFFileCorrected(String value) {
        myVCFFileCorrected = new PluginParameter<>(myVCFFileCorrected, value);
        return this;
    }

    /**
     * Reference fasta File
     *
     * @return Reference
     */
    public String ref() {
        return myRef.value();
    }

    /**
     * Set Reference. Reference fasta File
     *
     * @param value Reference
     *
     * @return this plugin
     */
    public ResizeRefBlockPlugin ref(String value) {
        myRef = new PluginParameter<>(myRef, value);
        return this;
    }

    /**
     * Name to be assigned for the taxon
     *
     * @return Taxon Name
     */
    public String taxon() {
        return myTaxon.value();
    }

    /**
     * Set Taxon Name. Name to be assigned for the taxon
     *
     * @param value Taxon Name
     *
     * @return this plugin
     */
    public ResizeRefBlockPlugin taxon(String value) {
        myTaxon = new PluginParameter<>(myTaxon, value);
        return this;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy