net.maizegenetics.pangenome.hapCalling.ExportVCForTaxonMethodPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
/**
*
*/
package net.maizegenetics.pangenome.hapCalling;
import java.awt.Frame;
import java.sql.Connection;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import javax.swing.ImageIcon;
import org.apache.log4j.Logger;
import com.google.common.collect.ImmutableMultimap;
import htsjdk.variant.variantcontext.VariantContext;
import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.api.HaplotypeGraph;
import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.db_loading.DBLoadingUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
/**
* Exports variant context lists to vcf file.
*
* NOTE: PathsToVCF is not used as that method only exports SNPs.
*
* Input: taxon and method to identify haplotypes table entries
* A single chromosome may also be supplied. If no chromoseom is given,
* data for all chromosomes will be pulled.
* Output: vcf file
*
* TODO: create hmp or vcf based on file extension.
* @author lcj34
*
*/
@Deprecated
public class ExportVCForTaxonMethodPlugin extends AbstractPlugin {
private static final Logger myLogger = Logger.getLogger(ExportVCForTaxonMethodPlugin.class);
private PluginParameter configFile = new PluginParameter.Builder<>("configFile", null, String.class).required(true)
.inFile()
.description(" Config file that specifies database connection parameters")
.build();
private PluginParameter taxon = new PluginParameter.Builder<>("taxon", null, String.class).required(true)
.description(" Name of taxon whose variant contexts should be pulled.")
.build();
private PluginParameter methods = new PluginParameter.Builder<>("methods", null, String.class)
.required(true)
.description("Pairs of methods (haplotype method name and range group method name). Method pair separated by a comma, and pairs separated by semicolon. The range group is optional \n" +
"Usage: ,;,;")
.build();
private PluginParameter outputFile = new PluginParameter.Builder<>("outputFile", null, String.class).required(true)
.outFile()
.description("FUll path to output file created by this plugin .")
.build();
private PluginParameter chrom = new PluginParameter.Builder<>("chrom", "all", String.class)
.description("If a chrom is specified, pull only data for that chromosome. If no chromosome is specified, data for all chromsomes will be pulled.")
.build();
public ExportVCForTaxonMethodPlugin() {
super(null, false);
}
public ExportVCForTaxonMethodPlugin(Frame parentFrame) {
super(parentFrame, false);
}
public ExportVCForTaxonMethodPlugin(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
@Override
public DataSet processData(DataSet input) {
// Connect to the database
Connection conn = DBLoadingUtils.connection(configFile(),false);
if (conn == null) {
throw new IllegalStateException("ExportVCForTaxonMethodPlugin: could not get connection using info in config file " + configFile());
}
// got the connection, create a node list: This gets all haplotype_ids for a given
// method where the gamete_grp_id includes the specified taxon.
SortedSet taxonGameteGrpIdSet = new TreeSet();
String query = "SELECT gamete_haplotypes.gamete_grp_id FROM gamete_haplotypes " +
"INNER JOIN gametes ON gamete_haplotypes.gameteid = gametes.gameteid " +
"INNER JOIN genotypes ON gametes.genoid = genotypes.genoid " +
"WHERE genotypes.line_name = '" + taxon() + "' ORDER BY gamete_haplotypes.gamete_grp_id;";
myLogger.info("ExportVCForTaxonMethodPlugin: query statement: " + query);
try (ResultSet rs = conn.createStatement().executeQuery(query)) {
while (rs.next()) {
int id = rs.getInt("gamete_grp_id");
taxonGameteGrpIdSet.add(id);
}
} catch (Exception exc) {
myLogger.debug(exc.getMessage(), exc);
throw new IllegalStateException("ExportVCForTaxonMethodPlugin: Problem querying the database: " + exc.getMessage());
}
if (taxonGameteGrpIdSet.size() == 0) {
// No nodes found for method and taxon - throw error
throw new IllegalStateException("ExportVCForTaxonMethodPlugin: no gamete groups found in db for taxon " + taxon());
}
myLogger.info("\nExportVCForTaxonMethodPlugin: number of gameteGroupIds for taxon: " + taxonGameteGrpIdSet.size());
// We have all the gamete group ids that contain the specified taxon. Now find the haplotypes entries
// for those gamete_grp_ids where the method matches method parameter
String method = methods().split(";")[0].split(",")[0];
int methodId = CreateGraphUtils.methodId(conn, method);
StringBuilder sb = new StringBuilder();
sb.append("SELECT haplotypes_id FROM haplotypes ");
if (!chrom().equals("all")) {
sb.append(" INNER JOIN reference_ranges on haplotypes.ref_range_id=reference_ranges.ref_range_id ");
sb.append(" AND reference_ranges.chrom='");
sb.append(chrom());
sb.append("'");
}
sb.append(" WHERE method_id = ");
sb.append(methodId);
sb.append(" AND gamete_grp_id in (");
String gameteGrpIds = taxonGameteGrpIdSet.stream().map(id -> Integer.toString(id)).collect(Collectors.joining(","));
sb.append(gameteGrpIds);
sb.append(");");
query = sb.toString();
myLogger.info("ExportVCForTaxonMethodPlugin: query statement: " + query);
SortedSet hapIdsSet = new TreeSet();
try (ResultSet rs = conn.createStatement().executeQuery(query)) {
while (rs.next()) {
int id = rs.getInt("haplotypes_id");
hapIdsSet.add(id);
}
} catch (Exception exc) {
myLogger.debug(exc.getMessage(), exc);
throw new IllegalStateException("ExportVCForTaxonMethodPlugin: Problem querying the database: " + exc.getMessage());
}
if (hapIdsSet.size() == 0) {
// No nodes found for method and taxon - throw error
throw new IllegalStateException("ExportVCForTaxonMethodPlugin: no haplotypes_id found in db for taxon " + taxon()
+ " with methods: " + methods() + " for chroms " + chrom());
}
myLogger.info("\nExportVCForTaxonMethodPlugin: calling hapGraph with methods: " + methods());
// We have a haplotype node list, create the graph needed for pathsToVCFPlugin
HaplotypeGraph hapGraph = new HaplotypeGraphBuilderPlugin(null, false)
.configFile(configFile())
.includeVariantContexts(true)
.methods(methods())
.hapids(hapIdsSet)
.build();
ImmutableMultimap.Builder builder = ImmutableMultimap.builder();
List hapNodeList = new ArrayList();
myLogger.info("\nExportVCForTaxonMethodPlugin: streaming graph nodes ");
hapGraph.nodeStream().forEach(node -> {
// instead, grab each VC, and put on a list.
// ust htsjdk export to vcf.
// need to add a header
hapNodeList.add(node);
});
List vcList = HapCallingUtils.getVariantContextFromHaplotypeNodeList(hapNodeList);
HapCallingUtils.writeVariantContextsToVCF( vcList, outputFile(), null, taxon());
myLogger.info("ExportVCForTaxonMethodPlugin finished!!");
return null;
}
@Override
public ImageIcon getIcon() {
return null;
}
@Override
public String getButtonName() {
return "Export VCF for Taxon Method";
}
@Override
public String getToolTipText() {
return "Export VCF for Taxon Method";
}
/**
* Config file that specifies database connection parameters
*
* @return Config File
*/
public String configFile() {
return configFile.value();
}
/**
* Set Config File. Config file that specifies database
* connection parameters
*
* @param value Config File
*
* @return this plugin
*/
public ExportVCForTaxonMethodPlugin configFile(String value) {
configFile = new PluginParameter<>(configFile, value);
return this;
}
/**
* Name of taxon whose variant contexts should be pulled.
*
* @return Taxon
*/
public String taxon() {
return taxon.value();
}
/**
* Set Taxon. Name of taxon whose variant contexts should
* be pulled.
*
* @param value Taxon
*
* @return this plugin
*/
public ExportVCForTaxonMethodPlugin taxon(String value) {
taxon = new PluginParameter<>(taxon, value);
return this;
}
/**
* Pairs of methods (haplotype method name and range group
* method name). Method pair separated by a comma, and
* pairs separated by semicolon. The range group is optional
*
* Usage: ,;,;
*
* @return Methods
*/
public String methods() {
return methods.value();
}
/**
* Set Methods. Pairs of methods (haplotype method name
* and range group method name). Method pair separated
* by a comma, and pairs separated by semicolon. The range
* group is optional
* Usage: ,;,;
*
* @param value Methods
*
* @return this plugin
*/
public ExportVCForTaxonMethodPlugin methods(String value) {
methods = new PluginParameter<>(methods, value);
return this;
}
/**
* FUll path to output file created by this plugin .
*
* @return Output File
*/
public String outputFile() {
return outputFile.value();
}
/**
* Set Output File. FUll path to output file created by
* this plugin .
*
* @param value Output File
*
* @return this plugin
*/
public ExportVCForTaxonMethodPlugin outputFile(String value) {
outputFile = new PluginParameter<>(outputFile, value);
return this;
}
/**
* If a chrom is specified, pull only data for that chromosome.
* If no chromosome is specified, data for all chromsomes
* will be pulled.
*
* @return Chrom
*/
public String chrom() {
return chrom.value();
}
/**
* Set Chrom. If a chrom is specified, pull only data
* for that chromosome. If no chromosome is specified,
* data for all chromsomes will be pulled.
*
* @param value Chrom
*
* @return this plugin
*/
public ExportVCForTaxonMethodPlugin chrom(String value) {
chrom = new PluginParameter<>(chrom, value);
return this;
}
}