All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.pipelineTests.CountConsensusTaxaAtRefRange Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.pipelineTests;

import java.awt.Frame;
import java.io.BufferedWriter;
import java.io.IOException;
import java.sql.Connection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import javax.swing.ImageIcon;

import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import org.apache.log4j.Logger;

import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.util.Utils;

/**
 * 
 * This class counts the number of taxa represented at each genome_interval region
 * based on the method supplied.  
 * 
 * NOTE:  Some intervals are not represented when the consensus data was created.
 * This is due to gvcf files returning no data, or another bug in the consensus creation
 * code.
 *
 * Methods:  The first method added is for reference data.  THe second is a ref_range group
 * for anchor, the third is a ref_range group for inter-anchors.
 *
 * To find the total number of reference_ranges (focus only), run the following on your DB:
 *   select count(*) from reference_ranges inner join ref_range_ref_range_group
 *     where ref_range_ref_range_method.method_id=2;
 * To find total number of reference_ranges (non-focus only), run the following on your DB:
 *
 *   select count(*) from reference_ranges inner join ref_range_ref_range_group
 *     where ref_range_ref_range_method.method_id=3
 *
 * Current count (sept 25, 2017) should be 37804 (anchors) or 75448 (anchors + inter-anchors)
 *   
 * 
 * @author lcj34
 *
 */
public class CountConsensusTaxaAtRefRange extends AbstractPlugin{
    private static final Logger myLogger = Logger.getLogger(CountConsensusTaxaAtRefRange.class);


    private PluginParameter methods = new PluginParameter.Builder<>("methods", null, String.class)
            .required(true)
            .description("Pairs of methods (haplotype method name and range group method name). Method pair separated by a comma, and pairs separated by semicolon. The range group is optional \n" +
                    "Usage: ,;,;")
            .build();
    
    private PluginParameter configFile = new PluginParameter.Builder<>("configFile", null, String.class)
            .required(true)
            .description("Config File: containing lines for host=<>, user=<>, password=<>, DB=<>, and DBtype=<> where DBtype must be either sqlite or postgres")
            .build();
    
    private PluginParameter myOutputFile = new PluginParameter.Builder<>("outputFile", null, String.class)
            .required(true)
            .description("Output File")
            .build();
    
    public CountConsensusTaxaAtRefRange() {
        super(null, false);
    }

    public CountConsensusTaxaAtRefRange(Frame parentFrame) {
        super(parentFrame, false);
    }

    public CountConsensusTaxaAtRefRange(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }
    
    @Override
    public DataSet processData(DataSet input) {
        
        long totalTime = System.nanoTime();

        
        // If db doesn't exist, this will fail.
        try (Connection conn = CreateGraphUtils.connection(configFile());BufferedWriter bw = Utils.getBufferedWriter(outputFile());) {
            // Get reference ranges - filter for anchors
            myLogger.info("getting referenceRangeMap ...");
            // createHaplotypeNodes will filter refRangeMap based on onlyAnchors() value
            Map refRangeMap = CreateGraphUtils.referenceRangeMap( conn);

            myLogger.info("Size of refRangeMap from db: " + refRangeMap.keySet().size() + ", create taxaListMap and haplotypeNodes");
            Map taxaListMap = CreateGraphUtils.taxaListMap(conn);

            TreeMap> consensusTaxaByRange = CreateGraphUtils.createHaplotypeNodes(conn, refRangeMap, taxaListMap,
                    HaplotypeGraphBuilderPlugin.convertMethods(methods()), true, false, null); // don't include variant context
            
            // count number of taxa present at each reference range, print to outFile
            String header = "ReferenceRangeID\tReferenceRangeInterval\tTaxaCount\n";
            bw.write(header);

            myLogger.info("Processing consensusTaxaByRange map, size of keySet : " + consensusTaxaByRange.keySet().size());
            consensusTaxaByRange.entrySet().stream()
            .forEach(entry -> {
                List hapNodeList = entry.getValue();
                int count = 0;
                for (HaplotypeNode hapNode : hapNodeList)  {
                    count += hapNode.numTaxa();
                    // Do we want string of taxa from HaplotypeNode's taxa list ? 
                }
                ReferenceRange refRange = entry.getKey();
                StringBuilder refRangeTaxaCount = new StringBuilder()
                        .append(refRange.id()).append("\t")
                        .append(refRange.intervalString()).append("\t")
                        .append(count).append("\n");
                try {
                    bw.write(refRangeTaxaCount.toString());
                } catch (IOException ioe) {
                    myLogger.debug(ioe.getMessage(), ioe);
                    throw new IllegalStateException("Problem writing  CountConsensusTaxaAtRefRange: " + ioe.getMessage());
                }
            });
            
        } catch (Exception exc) {
            myLogger.debug(exc.getMessage(), exc);
            throw new IllegalStateException("Problem creating CountConsensusTaxaAtRefRange: " + exc.getMessage());
        }

        System.out.println("\nTime to process " + (System.nanoTime()-totalTime)/1e9 + " seconds");
        return null;
        
    }

    @Override
    public ImageIcon getIcon() {
        
        return null;
    }

    @Override
    public String getButtonName() {
        
        return ("Num Consensus Taxa at Reference Range");
    }

    @Override
    public String getToolTipText() {
        
        return ("Num ConsensusTaxa at Reference Range");
    }
    
 

    /**
     * Pairs of methods (haplotype method name and range group
     * method name). Method pair separated by a comma, and
     * pairs separated by semicolon. The range group is optional
     *
     * Usage: ,;,;
     *
     * @return Methods
     */
    public String methods() {
        return methods.value();
    }

    /**
     * Set Methods. Pairs of methods (haplotype method name
     * and range group method name). Method pair separated
     * by a comma, and pairs separated by semicolon. The range
     * group is optional
     * Usage: ,;,;
     *
     * @param value Methods
     *
     * @return this plugin
     */
    public CountConsensusTaxaAtRefRange methods(String value) {
        methods = new PluginParameter<>(methods, value);
        return this;
    }

    /**
     * Config File
     *
     * @return config File
     */
    public String configFile() {
        return configFile.value();
    }

    /**
     * Set COnfig File. Config File
     *
     * @param value Config File
     *
     * @return this plugin
     */
    public CountConsensusTaxaAtRefRange configFile(String value) {
        configFile = new PluginParameter<>(configFile, value);
        return this;
    }

    /**
     * Output File
     *
     * @return Output File
     */
    public String outputFile() {
        return myOutputFile.value();
    }

    /**
     * Set Output File. Output File
     *
     * @param value Output File
     *
     * @return this plugin
     */
    public CountConsensusTaxaAtRefRange outputFile(String value) {
        myOutputFile = new PluginParameter<>(myOutputFile, value);
        return this;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy