net.maizegenetics.pangenome.pipelineTests.CountConsensusTaxaAtRefRange Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
/**
*
*/
package net.maizegenetics.pangenome.pipelineTests;
import java.awt.Frame;
import java.io.BufferedWriter;
import java.io.IOException;
import java.sql.Connection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.swing.ImageIcon;
import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import org.apache.log4j.Logger;
import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.util.Utils;
/**
*
* This class counts the number of taxa represented at each genome_interval region
* based on the method supplied.
*
* NOTE: Some intervals are not represented when the consensus data was created.
* This is due to gvcf files returning no data, or another bug in the consensus creation
* code.
*
* Methods: The first method added is for reference data. THe second is a ref_range group
* for anchor, the third is a ref_range group for inter-anchors.
*
* To find the total number of reference_ranges (focus only), run the following on your DB:
* select count(*) from reference_ranges inner join ref_range_ref_range_group
* where ref_range_ref_range_method.method_id=2;
* To find total number of reference_ranges (non-focus only), run the following on your DB:
*
* select count(*) from reference_ranges inner join ref_range_ref_range_group
* where ref_range_ref_range_method.method_id=3
*
* Current count (sept 25, 2017) should be 37804 (anchors) or 75448 (anchors + inter-anchors)
*
*
* @author lcj34
*
*/
public class CountConsensusTaxaAtRefRange extends AbstractPlugin{
private static final Logger myLogger = Logger.getLogger(CountConsensusTaxaAtRefRange.class);
private PluginParameter methods = new PluginParameter.Builder<>("methods", null, String.class)
.required(true)
.description("Pairs of methods (haplotype method name and range group method name). Method pair separated by a comma, and pairs separated by semicolon. The range group is optional \n" +
"Usage: ,;,;")
.build();
private PluginParameter configFile = new PluginParameter.Builder<>("configFile", null, String.class)
.required(true)
.description("Config File: containing lines for host=<>, user=<>, password=<>, DB=<>, and DBtype=<> where DBtype must be either sqlite or postgres")
.build();
private PluginParameter myOutputFile = new PluginParameter.Builder<>("outputFile", null, String.class)
.required(true)
.description("Output File")
.build();
public CountConsensusTaxaAtRefRange() {
super(null, false);
}
public CountConsensusTaxaAtRefRange(Frame parentFrame) {
super(parentFrame, false);
}
public CountConsensusTaxaAtRefRange(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
@Override
public DataSet processData(DataSet input) {
long totalTime = System.nanoTime();
// If db doesn't exist, this will fail.
try (Connection conn = CreateGraphUtils.connection(configFile());BufferedWriter bw = Utils.getBufferedWriter(outputFile());) {
// Get reference ranges - filter for anchors
myLogger.info("getting referenceRangeMap ...");
// createHaplotypeNodes will filter refRangeMap based on onlyAnchors() value
Map refRangeMap = CreateGraphUtils.referenceRangeMap( conn);
myLogger.info("Size of refRangeMap from db: " + refRangeMap.keySet().size() + ", create taxaListMap and haplotypeNodes");
Map taxaListMap = CreateGraphUtils.taxaListMap(conn);
TreeMap> consensusTaxaByRange = CreateGraphUtils.createHaplotypeNodes(conn, refRangeMap, taxaListMap,
HaplotypeGraphBuilderPlugin.convertMethods(methods()), true, false, null); // don't include variant context
// count number of taxa present at each reference range, print to outFile
String header = "ReferenceRangeID\tReferenceRangeInterval\tTaxaCount\n";
bw.write(header);
myLogger.info("Processing consensusTaxaByRange map, size of keySet : " + consensusTaxaByRange.keySet().size());
consensusTaxaByRange.entrySet().stream()
.forEach(entry -> {
List hapNodeList = entry.getValue();
int count = 0;
for (HaplotypeNode hapNode : hapNodeList) {
count += hapNode.numTaxa();
// Do we want string of taxa from HaplotypeNode's taxa list ?
}
ReferenceRange refRange = entry.getKey();
StringBuilder refRangeTaxaCount = new StringBuilder()
.append(refRange.id()).append("\t")
.append(refRange.intervalString()).append("\t")
.append(count).append("\n");
try {
bw.write(refRangeTaxaCount.toString());
} catch (IOException ioe) {
myLogger.debug(ioe.getMessage(), ioe);
throw new IllegalStateException("Problem writing CountConsensusTaxaAtRefRange: " + ioe.getMessage());
}
});
} catch (Exception exc) {
myLogger.debug(exc.getMessage(), exc);
throw new IllegalStateException("Problem creating CountConsensusTaxaAtRefRange: " + exc.getMessage());
}
System.out.println("\nTime to process " + (System.nanoTime()-totalTime)/1e9 + " seconds");
return null;
}
@Override
public ImageIcon getIcon() {
return null;
}
@Override
public String getButtonName() {
return ("Num Consensus Taxa at Reference Range");
}
@Override
public String getToolTipText() {
return ("Num ConsensusTaxa at Reference Range");
}
/**
* Pairs of methods (haplotype method name and range group
* method name). Method pair separated by a comma, and
* pairs separated by semicolon. The range group is optional
*
* Usage: ,;,;
*
* @return Methods
*/
public String methods() {
return methods.value();
}
/**
* Set Methods. Pairs of methods (haplotype method name
* and range group method name). Method pair separated
* by a comma, and pairs separated by semicolon. The range
* group is optional
* Usage: ,;,;
*
* @param value Methods
*
* @return this plugin
*/
public CountConsensusTaxaAtRefRange methods(String value) {
methods = new PluginParameter<>(methods, value);
return this;
}
/**
* Config File
*
* @return config File
*/
public String configFile() {
return configFile.value();
}
/**
* Set COnfig File. Config File
*
* @param value Config File
*
* @return this plugin
*/
public CountConsensusTaxaAtRefRange configFile(String value) {
configFile = new PluginParameter<>(configFile, value);
return this;
}
/**
* Output File
*
* @return Output File
*/
public String outputFile() {
return myOutputFile.value();
}
/**
* Set Output File. Output File
*
* @param value Output File
*
* @return this plugin
*/
public CountConsensusTaxaAtRefRange outputFile(String value) {
myOutputFile = new PluginParameter<>(myOutputFile, value);
return this;
}
}