All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.db_loading.AddRefRangeAsAssemblyPlugin Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.db_loading;

import java.awt.Frame;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.swing.ImageIcon;

import net.maizegenetics.pangenome.api.ConvertVariantContextToVariantInfo;
import net.maizegenetics.plugindef.GeneratePluginCode;
import org.apache.log4j.Logger;

import com.google.common.collect.Range;

import htsjdk.variant.variantcontext.VariantContext;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GeneralPosition;
import net.maizegenetics.dna.map.GenomeSequence;
import net.maizegenetics.dna.map.GenomeSequenceBuilder;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.pangenome.processAssemblyGenomes.AssemblyProcessingUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.PluginParameter;

import static net.maizegenetics.pangenome.db_loading.DBLoadingUtils.createPathNodesForGameteGrp;

/**
 * This function is fairly obsolete.  IT was used before the graph was able to take multiple methods
 * as parameters.
 *
 * This class created to load a reference genome as an assembly.  The method is hard-coded to "mummer4", which
 * is the assembly method name at the time of this writing.  All the variants are reference records.  The 
 * addition of these haplotypes allows for pulling
 *
 *
 *
 * TODO: IT it is deemd this class is still useful, it should be updated to create a gvcf file as is done in LoadAllIntervalsToPHGdbPlugin,
 *   store the gvcf file data to the genomeFileData table, and save the gvcfFileId to the anchorDataPHG object.
 * @author lcj34
 *
 */
public class AddRefRangeAsAssemblyPlugin extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(AddRefRangeAsAssemblyPlugin.class);

    private PluginParameter refGenome = new PluginParameter.Builder("ref", null, String.class).guiName("Reference Genome File").required(true)
            .description("Referemce Genome File for aligning against ").build();
    private PluginParameter configFile = new PluginParameter.Builder("configFile", null, String.class).guiName("Genome Data File").required(true)
            .description("Path to config file for accessing/loading the DB")
            .build();
    private PluginParameter haplotypeMethod = new PluginParameter.Builder("haplotypeMethod", null, String.class).guiName("Haplotype Method Name").required(true)
            .description("Name of method used for processing the haplotypes.  Should match the method used to load assemblies ")
            .build();
    private PluginParameter pathMethod = new PluginParameter.Builder("pathMethod", "mummer4_PATH", String.class)
            .guiName("Path Method Name").required(false)
            .description("OPTIONAL: Name of method used to create PHG Path.  Should match the assembly path method  - that default is mummer4_PATH")
            .build();
    private PluginParameter lineName = new PluginParameter.Builder("lineName", null, String.class).guiName("Line Name").required(true)
            .description("Line name to be stored in the genotypes table, e.g. B73_Assembly")
            .build();

    // This data should consistently be as below
    private String line_data = "Reference genome stored as an assembly, identical to the reference" ;
    private int ploidy = 1 ;
    private int hapNumber = 0;
    private boolean genesPhased = true;
    private boolean chromsPhased = true;
    private float conf = 1;

    static GenomeSequence myRefSequence = null;

    public AddRefRangeAsAssemblyPlugin() {
        super(null, false);
    }

    public AddRefRangeAsAssemblyPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    public AddRefRangeAsAssemblyPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }


    @Override
    public DataSet processData(DataSet input) {

        long totalTime = System.nanoTime();
        long time=System.nanoTime();

        Connection dbConnect = DBLoadingUtils.connection(configFile(), false);
        if (dbConnect == null) {
            throw new IllegalStateException("AddRefRangeAsAssembly: no connection supplied!");
        }
        myLogger.info("AddRefRangeAsAssembly: have connection, create PHGdbAccess object");
        PHGDataWriter phg = new PHGdbAccess(dbConnect);

        myRefSequence = GenomeSequenceBuilder.instance(refGenome());

        myLogger.info("AddRefRangeAsAssembly:  finished GenomeSequenceBuilder for ref genome");

        // get reference range map from API
        Map refRangeMap = CreateGraphUtils.referenceRangeMap(dbConnect);
                
        Map anchorSequences = new HashMap();
        int gamete_grp_id = -1;
        try {
            String prevChrom = "";
            String curChrom = "";
            GenoHaploData ghd = new GenoHaploData(ploidy,false,lineName(), line_data,genesPhased, chromsPhased, hapNumber,  conf);
            phg.putGenoAndHaploTypeData(ghd);    
            int method_id = phg.getMethodIdFromName(haplotypeMethod()); // might already be in the db
            
            if (method_id == 0) {
                myLogger.info("Adding method " + haplotypeMethod() + " to db methods table");
                // Assuming this will never be TEST_ASSEMBLY_HAPLOTYPES
                method_id = phg.putMethod(haplotypeMethod(), DBLoadingUtils.MethodType.ASSEMBLY_HAPLOTYPES,pluginParameters() );
            }
            
            // Load the gamete_groups and gamete_haplotypes table
            String nameWithHap = lineName() + "_" + hapNumber;
            List gameteGroupList = new ArrayList();
            gameteGroupList.add(nameWithHap);
            phg.putGameteGroupAndHaplotypes(gameteGroupList);

            gamete_grp_id = phg.getGameteGroupIDFromTaxaList(gameteGroupList);

            // When ordered numerically, the refRangeIds are in order for chrom 1-10 for
            // all the anchors, followed by all the interanchors.  So you'll have a streamd of
            // them for chroms 1-10, then it will repeat (assuming you are on a species that has
            // 10 chromosomes).
            for (Map.Entry entry : refRangeMap.entrySet()) {
                ReferenceRange refRange = entry.getValue();
                int refRangeID = entry.getKey();
                int anchorStart = refRange.start(); 
                int anchorEnd = refRange.end();
                Chromosome chr = refRange.chromosome();
                curChrom = chr.getName();
                if (!chr.getName().equals(prevChrom) ) {
                    if (anchorSequences.size() > 0) {
                        // load haplotypes for this chrom
                        myLogger.info("Calling putHaplotypesData for chrom " + prevChrom);
                        phg.putHaplotypesData(gamete_grp_id, method_id, anchorSequences, prevChrom,-1, -1);
                        anchorSequences.clear();
                    }
                    prevChrom = curChrom;                   
                }
                String anchorString = myRefSequence.genotypeAsString(chr, anchorStart, anchorEnd);
                Position intervalStart = new GeneralPosition.Builder( chr,anchorStart).build(); 
                Position intervalEnd = new GeneralPosition.Builder( chr,anchorEnd).build(); 
                Range intervalRange =  Range.closed(intervalStart, intervalEnd);

                // Create VCList:
                List rangeVCList = new ArrayList<>();
                // ref and asm are the same here for encodeVariantContextListToByteArray
                VariantContext vc = AssemblyProcessingUtils.createRefRangeVC(myRefSequence, lineName(), intervalStart, intervalEnd, intervalStart, intervalEnd);
                rangeVCList.add(vc);

                // Changes for PHG-485: assembly coordianates are the same as the reference
                // coordinates.  AsmFileId is the same as the reference asmFileId (genomeFileId)
                // which is always 1 as reference is loaded before anything else when the db is created.
                AnchorDataPHG adata = new AnchorDataPHG( intervalRange, chr.getName(),anchorStart,anchorEnd,".",
                        refGenome(),   anchorString,1,-1);
                anchorSequences.put(refRangeID,adata);
            }
         
            if (anchorSequences.size() > 0) {
                // load haplotypes for this chrom 
                myLogger.info("calling putHaplotypesData for last chrom: " + curChrom);

                //If the reference has already been created, then perhaps the genomeFileId and gvcfFileId fields
                // should be determined by pulling the ref data.  You'd need to get the ref genoid based on the
                // line_name from genotypes that has is_reference set to true.  Tnen grab the id based on genoid
                // and type from the genomeFileData table.
                phg.putHaplotypesData(gamete_grp_id, method_id, anchorSequences, curChrom, -1, -1);
                
            }

        } catch (Exception exc) {
            exc.printStackTrace();
        }

        // Create a path
        List hapidList = createPathNodesForGameteGrp(lineName(), dbConnect, gamete_grp_id);
        byte[] pathBytes = DBLoadingUtils.encodePathsFromIntArray(hapidList);
        String pathMethod = pathName();
        HashMap methodParams = new HashMap<>();
        methodParams.put("notes","path created when assembly was loaded");

        // Assuming here we never create refRanges as assemblies as a test
        int pathid = phg.putPathsData(pathMethod,  methodParams, lineName(), null,  pathBytes, false);
        myLogger.info("Paths added to db for reference as assembly");

        try {
            ((PHGdbAccess)phg).close();
        } catch (Exception exc) {
            exc.printStackTrace();
        }
        myLogger.info("\nFinished, TotalTime for AddRefRangeAsAssemblyPlugin was " + (System.nanoTime() - totalTime) / 1e9 + " seconds");
        return null;
    }

    public static void main(String[] args) {
        GeneratePluginCode.generate(AddRefRangeAsAssemblyPlugin.class);
    }


    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        return ("Load intervals to reference_ranges table");
    }

    @Override
    public String getToolTipText() {
        return ("Load intervals to reference_ranges table");
    }
    /**
     * Referemce Genome File for aligning against 
     *
     * @return Reference Genome File
     */
    public String refGenome() {
        return refGenome.value();
    }

    /**
     * Set Reference Genome File. Referemce Genome File for
     * aligning against 
     *
     * @param value Reference Genome File
     *
     * @return this plugin
     */
    public AddRefRangeAsAssemblyPlugin refGenome(String value) {
        refGenome = new PluginParameter<>(refGenome, value);
        return this;
    }


    /**
     * Path to config file for db loading
     *
     * @return config file
     */
    public String configFile() {
        return configFile.value();
    }

    /**
     * Set config file for db access
     *
     * @param value config file
     *
     * @return this plugin
     */
    public AddRefRangeAsAssemblyPlugin configFile(String value) {
        configFile = new PluginParameter<>(configFile, value);
        return this;
    }
    
    /**
     * Name of method to be stored in db
     *
     * @return methodName file
     */
    public String haplotypeMethod() {
        return haplotypeMethod.value();
    }

    /**
     * Set methodName 
     *
     * @param value methodName 
     *
     * @return this plugin
     */
    public AddRefRangeAsAssemblyPlugin haplotypeMethod(String value) {
        haplotypeMethod = new PluginParameter<>(haplotypeMethod, value);
        return this;
    }

    /**
     * Name of method used for create PHG Path.  Should match
     * the assembly path method  - that default is mummer4_PATH
     *
     * @return Path Method Name
     */
    public String pathName() {
        return pathMethod.value();
    }

    /**
     * Set Path Method Name. Name of method used for create
     * PHG Path.  Should match the assembly path method  -
     * that default is mummer4_PATH
     *
     * @param value Path Method Name
     *
     * @return this plugin
     */
    public AddRefRangeAsAssemblyPlugin pathName(String value) {
        pathMethod = new PluginParameter<>(pathMethod, value);
        return this;
    }

    /**
     * Name name to be stored in the genotypes table, e.g.
     * B73_Assembly
     *
     * @return Line Name
     */
    public String lineName() {
        return lineName.value();
    }

    /**
     * Set Line Name. Name name to be stored in the genotypes
     * table, e.g. B73_Assembly
     *
     * @param value Line Name
     *
     * @return this plugin
     */
    public AddRefRangeAsAssemblyPlugin lineName(String value) {
        lineName = new PluginParameter<>(lineName, value);
        return this;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy