All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.hapCalling.ExportVCForTaxonMethodPlugin Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.hapCalling;

import java.awt.Frame;
import java.sql.Connection;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import javax.swing.ImageIcon;

import org.apache.log4j.Logger;

import com.google.common.collect.ImmutableMultimap;

import htsjdk.variant.variantcontext.VariantContext;
import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.api.HaplotypeGraph;
import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.db_loading.DBLoadingUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;

/**
 * Exports variant context lists to vcf file.
 * 
 * NOTE:  PathsToVCF is not used as that method only exports SNPs.
 * 
 * Input:  taxon and method to identify haplotypes table entries
 *         A single chromosome may also be supplied.  If no chromoseom is given, 
 *         data for all chromosomes will be pulled. 
 * Output: vcf file
 * 
 * TODO:  create hmp or vcf based on file extension.
 * @author lcj34
 *
 */
@Deprecated
public class ExportVCForTaxonMethodPlugin extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(ExportVCForTaxonMethodPlugin.class);

    private PluginParameter configFile = new PluginParameter.Builder<>("configFile", null, String.class).required(true)
            .inFile()
            .description(" Config file that specifies database connection parameters")
            .build();
    
    private PluginParameter taxon = new PluginParameter.Builder<>("taxon", null, String.class).required(true)
            .description(" Name of taxon whose variant contexts should be pulled.")
            .build();

    private PluginParameter methods = new PluginParameter.Builder<>("methods", null, String.class)
            .required(true)
            .description("Pairs of methods (haplotype method name and range group method name). Method pair separated by a comma, and pairs separated by semicolon. The range group is optional \n" +
                    "Usage: ,;,;")
            .build();

    private PluginParameter outputFile = new PluginParameter.Builder<>("outputFile", null, String.class).required(true)
            .outFile()
            .description("FUll path to output file created by this plugin .")
            .build();

    private PluginParameter chrom = new PluginParameter.Builder<>("chrom", "all", String.class)
            .description("If a chrom is specified, pull only data for that chromosome.  If no chromosome is specified, data for all chromsomes will be pulled.")
            .build();
    public ExportVCForTaxonMethodPlugin() {
        super(null, false);
    }

    public ExportVCForTaxonMethodPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }
    public ExportVCForTaxonMethodPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }
    

    @Override
    public DataSet processData(DataSet input) {

        // Connect to the database
        Connection conn = DBLoadingUtils.connection(configFile(),false);
        
        if (conn == null) {
            throw new IllegalStateException("ExportVCForTaxonMethodPlugin: could not get connection using info in config file " + configFile());           
        }
                
        // got the connection, create a node list:  This gets all haplotype_ids for a given
        // method where the gamete_grp_id includes the specified taxon.
        SortedSet taxonGameteGrpIdSet = new TreeSet();
        String query = "SELECT gamete_haplotypes.gamete_grp_id FROM gamete_haplotypes " +
                "INNER JOIN gametes ON gamete_haplotypes.gameteid = gametes.gameteid " +
                "INNER JOIN genotypes ON gametes.genoid = genotypes.genoid " +
                "WHERE genotypes.line_name = '" + taxon() + "' ORDER BY gamete_haplotypes.gamete_grp_id;";

        myLogger.info("ExportVCForTaxonMethodPlugin: query statement: " + query);

        try (ResultSet rs = conn.createStatement().executeQuery(query)) {

            while (rs.next()) {
                int id = rs.getInt("gamete_grp_id");
                taxonGameteGrpIdSet.add(id);
            }

        } catch (Exception exc) {
            myLogger.debug(exc.getMessage(), exc);
            throw new IllegalStateException("ExportVCForTaxonMethodPlugin:  Problem querying the database: " + exc.getMessage());
        }

        if (taxonGameteGrpIdSet.size() == 0) {
            // No nodes found for method and taxon - throw error
            throw new IllegalStateException("ExportVCForTaxonMethodPlugin: no gamete groups found in db for taxon " + taxon());           
        }
        
        
        myLogger.info("\nExportVCForTaxonMethodPlugin:  number of gameteGroupIds for taxon: " + taxonGameteGrpIdSet.size());

        // We have all the gamete group ids that contain the specified taxon.  Now find the haplotypes entries
        // for those gamete_grp_ids where the method matches method parameter

        String method = methods().split(";")[0].split(",")[0];
        int methodId = CreateGraphUtils.methodId(conn, method);
        
        StringBuilder sb = new StringBuilder();
        sb.append("SELECT haplotypes_id FROM haplotypes ");
 
        if (!chrom().equals("all")) {
            sb.append(" INNER JOIN reference_ranges on haplotypes.ref_range_id=reference_ranges.ref_range_id ");
            sb.append(" AND reference_ranges.chrom='");
            sb.append(chrom());
            sb.append("'");
        }
        sb.append(" WHERE method_id = ");
        sb.append(methodId);
        sb.append(" AND gamete_grp_id in (");
             
        String gameteGrpIds = taxonGameteGrpIdSet.stream().map(id -> Integer.toString(id)).collect(Collectors.joining(","));

        sb.append(gameteGrpIds);
        sb.append(");");
 
        query = sb.toString();
        myLogger.info("ExportVCForTaxonMethodPlugin: query statement: " + query);
        SortedSet hapIdsSet = new TreeSet();
        try (ResultSet rs = conn.createStatement().executeQuery(query)) {

            while (rs.next()) {
                int id = rs.getInt("haplotypes_id");
                hapIdsSet.add(id);
            }

        } catch (Exception exc) {
            myLogger.debug(exc.getMessage(), exc);
            throw new IllegalStateException("ExportVCForTaxonMethodPlugin:  Problem querying the database: " + exc.getMessage());
        }

        if (hapIdsSet.size() == 0) {
            // No nodes found for method and taxon - throw error
            throw new IllegalStateException("ExportVCForTaxonMethodPlugin: no haplotypes_id found in db for taxon " + taxon()
                + " with methods: " + methods() + " for chroms " + chrom());
        }
        
        myLogger.info("\nExportVCForTaxonMethodPlugin: calling hapGraph with methods: " + methods());
        // We have a haplotype node list, create the graph needed for pathsToVCFPlugin      
        HaplotypeGraph hapGraph = new HaplotypeGraphBuilderPlugin(null, false)
                .configFile(configFile())
                .includeVariantContexts(true)
                .methods(methods())
                .hapids(hapIdsSet)
                .build();
               
        ImmutableMultimap.Builder builder = ImmutableMultimap.builder();
 
        List hapNodeList = new ArrayList();
        
        myLogger.info("\nExportVCForTaxonMethodPlugin: streaming graph nodes ");
        hapGraph.nodeStream().forEach(node -> {
            // instead, grab each VC, and put on a list.
            // ust htsjdk export to vcf.
            // need to add a header
            hapNodeList.add(node);
        });
        
        List vcList = HapCallingUtils.getVariantContextFromHaplotypeNodeList(hapNodeList);
        HapCallingUtils.writeVariantContextsToVCF( vcList,  outputFile(), null, taxon());
        
        myLogger.info("ExportVCForTaxonMethodPlugin finished!!");
        
        return null;
    }

 
    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        return "Export VCF for Taxon Method";
    }

    @Override
    public String getToolTipText() {
        return "Export VCF for Taxon Method";
    }

    /**
     *  Config file that specifies database connection parameters
     *
     * @return Config File
     */
    public String configFile() {
        return configFile.value();
    }

    /**
     * Set Config File.  Config file that specifies database
     * connection parameters
     *
     * @param value Config File
     *
     * @return this plugin
     */
    public ExportVCForTaxonMethodPlugin configFile(String value) {
        configFile = new PluginParameter<>(configFile, value);
        return this;
    }

    /**
     *  Name of taxon whose variant contexts should be pulled.
     *
     * @return Taxon
     */
    public String taxon() {
        return taxon.value();
    }

    /**
     * Set Taxon.  Name of taxon whose variant contexts should
     * be pulled.
     *
     * @param value Taxon
     *
     * @return this plugin
     */
    public ExportVCForTaxonMethodPlugin taxon(String value) {
        taxon = new PluginParameter<>(taxon, value);
        return this;
    }

    /**
     * Pairs of methods (haplotype method name and range group
     * method name). Method pair separated by a comma, and
     * pairs separated by semicolon. The range group is optional
     *
     * Usage: ,;,;
     *
     * @return Methods
     */
    public String methods() {
        return methods.value();
    }

    /**
     * Set Methods. Pairs of methods (haplotype method name
     * and range group method name). Method pair separated
     * by a comma, and pairs separated by semicolon. The range
     * group is optional
     * Usage: ,;,;
     *
     * @param value Methods
     *
     * @return this plugin
     */
    public ExportVCForTaxonMethodPlugin methods(String value) {
        methods = new PluginParameter<>(methods, value);
        return this;
    }

    /**
     * FUll path to output file created by this plugin .
     *
     * @return Output File
     */
    public String outputFile() {
        return outputFile.value();
    }

    /**
     * Set Output File. FUll path to output file created by
     * this plugin .
     *
     * @param value Output File
     *
     * @return this plugin
     */
    public ExportVCForTaxonMethodPlugin outputFile(String value) {
        outputFile = new PluginParameter<>(outputFile, value);
        return this;
    }

    /**
     * If a chrom is specified, pull only data for that chromosome.
     *  If no chromosome is specified, data for all chromsomes
     * will be pulled.
     *
     * @return Chrom
     */
    public String chrom() {
        return chrom.value();
    }

    /**
     * Set Chrom. If a chrom is specified, pull only data
     * for that chromosome.  If no chromosome is specified,
     * data for all chromsomes will be pulled.
     *
     * @param value Chrom
     *
     * @return this plugin
     */
    public ExportVCForTaxonMethodPlugin chrom(String value) {
        chrom = new PluginParameter<>(chrom, value);
        return this;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy