net.maizegenetics.analysis.gobii.GWAS_IFLPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/**
*
*/
package net.maizegenetics.analysis.gobii;
import java.awt.Frame;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.swing.ImageIcon;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.util.DirectoryCrawler;
import net.maizegenetics.util.Utils;
/**
* The tables populated from this plugin are described in TAS-1162
*
* The plugin takes files of gwas data and adds them to the gwas_data
* in a GOBII instance. It is assume the gwas_method and gwas_experiment
* tables associated with this data have already been populated. These
* are all proprietary tables currently only in use by Buckler Lab.
*
* TO speed up procesing, the experimentId and methodIds are hard-coded
* in the juint that calls this plugin. Those tables are generally small,
* and if you have to go to it to get the name, you might as well just
* input the ID anda save GOBII IFL processing time.
*
* The values stored in the "values" column will be stored as "real" in the gwas_data table
* This is because there is 1 "value" field, which holds values for all data, of any type.
* The method table will provide specifics on how to interpret each statistic.
*
* In addition to the .gz files of gwas data, a mapping file of
* phenotype names to IDs is created - data pulled from b4R table.
* @author lcj34
*
*/
public class GWAS_IFLPlugin extends AbstractPlugin
{
private PluginParameter b4rConfigFile= new PluginParameter.Builder<>("b4rConfigFile",null,String.class).guiName("B4R Config File").required(true)
.description("DB config file containing connection information to the B4r database").build();
private PluginParameter inputFile= new PluginParameter.Builder<>("inputFile",null,String.class).guiName("Input File").required(true)
.description("Tab-delimited Input File containing a header line and entries, or Directory containing Tab-delimited files of gwas data. \nIf parameter is a directory, each file must contain a header line, and the files must end with .txt or .txt.gz").build();
private PluginParameter methodIds= new PluginParameter.Builder<>("methodIds",null,String.class).guiName("Method IDs").required(true)
.description("Method Id from the method table. Must be in same order as the statNames, one for each statname.").build();
private PluginParameter expID= new PluginParameter.Builder<>("expId",null,String.class).guiName("GWAS Experiment ID").required(true)
.description("ID of the GWAS experiment to which this data belongs. This experiment must already existin the gwas_experiment table.").build();
private PluginParameter statNames = new PluginParameter.Builder<>("statNames",null,String.class).guiName("Statistic Names").required(true)
.description("Comma separated list of column names from which to pull data. \nThese names must match values in the statistics field of the gwas_method table for the specified method name.").build();
private PluginParameter outputDir= new PluginParameter.Builder<>("outputDir",null,String.class).guiName("Path of output directory").required(true)
.description("Full path name of directory to which output files will be written. If no user prefix, then end with /").build();
// private PluginParameter methodIds= new PluginParameter.Builder<>("methodIds",null,String.class).guiName("GWAS Method").required(true)
// .description("Method id's for each used to create the statistics, e.g. Fast Association, etc").build();
// private PluginParameter traitMapFile = new PluginParameter.Builder<>("traitMapFile",null,String.class).guiName("Phenotype Trait Mapping File").required(true)
// .description("tab-delimited File containing a Trait and an ID column to be used to map gwas traits with the B4R ID.").build();
public GWAS_IFLPlugin(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
public GWAS_IFLPlugin() {
super(null, false);
}
private static HashMap traitWriters= null;
private static HashMap traitHM= null;
private static HashMap methodHM = null; // this is trait (from traitHM)/method_id
private static int chrCol= -1;
private static int markerCol= -1;
private static int posCol= -1;
private static int traitCol = -1;
@Override
public DataSet processData(DataSet input) {
long totalTime = System.nanoTime();
File dataFile = new File(inputFile());
if (!dataFile.exists()) {
System.out.println("ERROR - input file doesn't exit: " + inputFile());
return null;
}
// Create list of files
List directoryFiles = new ArrayList<>();
String inputFileGlob="glob:*{txt,txt.gz,}";
if (dataFile.isDirectory()) {
System.out.println("LCJ - input file is a directory");
directoryFiles= DirectoryCrawler.listPaths(inputFileGlob, Paths.get(inputFile.value()).toAbsolutePath());
Collections.sort(directoryFiles);
} else {
Path inputPath= Paths.get(inputFile()).toAbsolutePath();
directoryFiles.add(inputPath);
}
System.out.println("LCJ - postProcessParamers: size of DirectoryFiles is " + directoryFiles.size());
for (int idx = 0;idx traitIDMap = new HashMap();
getTraitIds( dbConnection, query, traitIDMap);
System.out.println("LCJ - Number of entries from b4r trait map: " + traitIDMap.size());
// Create header line for output file
StringBuilder sb = new StringBuilder();
// process the data
try {
// OK, this is more complicated. For each line in Terry's file, we
// need to create 3 db lines.
// We need to have multiple writers - one for each data column. in each
// file, we write all the same data
// (phenotype_id/chr/position/method_name/experiment_name/statistic_name/value)
// but to each one we add the column type, e.g. p-value, or r2. This goes
// under "statistic_name"
// Each one of these files is written to the gwas_data table, but IFL uses the
// method_name and the value_name column to find the method_id from the gwas_method
// table.
// IFL will find expeirment_id from experiment_name column
String headerLine = "phenotype_id\tmarker\tchr\tposition\texperiment_id\tmethod_id\tstatistic_name\tvalue\n";
writeHeaderLineToFiles(headerLine);
sb.setLength(0); // reset the buffer
for (int idx = 0; idx < directoryFiles.size(); idx++) {
int totalLines = 0;
long time=System.nanoTime();
Path infile = directoryFiles.get(idx);
String infileString = infile.toString();
System.out.println("GWAS_IFLPlugin: processing file " + infileString);
filebr = Utils.getBufferedReader(infileString, 1 << 22);
// first line should contain headers. Find the columns for each data to process
// and header line must be consistent with all files in the directory
String line = filebr.readLine(); // toss the header
// read the data, write to the files. Each of these files will have the same
// headers and all will be written into the IFL map table
sb.setLength(0);
int count = 0;
while ((line=filebr.readLine()) != null) {
sb.setLength(0);
String[] tokens = line.split("\t");
// FInd phenotype id
int phenotypeId = traitIDMap.get(tokens[traitCol]);
sb.append(Integer.toString(phenotypeId));
sb.append("\t");
sb.append(tokens[markerCol]);
sb.append("\t");
sb.append(tokens[chrCol]);
sb.append("\t");
sb.append(tokens[posCol]);
sb.append("\t");
sb.append(expID());
sb.append("\t");
writeValues(tokens,sb.toString());
}
System.out.println("Process took " + (System.nanoTime() - time)/1e9 + " seconds for file " + infileString);
}
System.out.println("TOtalTime for all files: " + (System.nanoTime() - totalTime)/1e9);
filebr.close();
Shutdown();
} catch (IOException ioe) {
System.out.println("LCJ - caught exception readding or writing data file " );
ioe.printStackTrace();
}
return null;
}
// Grab the trait and id's from the B4R phenotype table. These will be used
// to map traits from the input file to phenotype Ids to store in the gwas_data
// table, field phenotype_id.
private static void getTraitIds(Connection conn, String query, Map traitIDMap) {
try {
ResultSet rs = conn.createStatement().executeQuery(query);
while (rs.next()) {
int id = rs.getInt("id");
String trait = rs.getString("name");
traitIDMap.put(trait, id);
}
} catch (SQLException sqle) {
System.out.println("getTraitIds barfed on query: " + query);
sqle.printStackTrace();
return;
}
return;
}
private static boolean findColumns( String headerLine, String statNames, String methodIds, String outputDir) {
System.out.println("LCJ - header line: " + headerLine);
String [] headers = headerLine.split("\t");
int index = 0;
for (String header : headers) {
if (header.trim().toUpperCase().equals("CHR")) {
chrCol = index;
} else if (header.trim().toUpperCase().equals("POS")) {
posCol = index;
} else if (header.trim().toUpperCase().equals("TRAIT")) {
traitCol = index;
} else if (header.trim().toUpperCase().equals("MARKER")) {
markerCol = index;
}
index++;
}
if (chrCol == -1 || posCol == -1 || traitCol == -1 || markerCol == -1) {
System.out.println("LCJ - didn't find chr or pos or trait column - quitting");
return false;
}
//Get indices of column names for trait statistics
int ncols = 0;
if (statNames!=null && !statNames.equalsIgnoreCase("null") ) {
traitHM= new HashMap<>();
methodHM = new HashMap<>();
traitWriters= new HashMap<>();
int search= -1;
String[] traitTokens = statNames.split(",");
String[] methodTokens = methodIds.split(",");
for (String str:traitTokens) {
for (int col = 0; col < headers.length; col++) {
if (str.equalsIgnoreCase(headers[col])) {
search= col;
break;
}
}
if (search<0) {
System.out.println("Cannot find column "+str);
return false;
} else {
String strToStore = str;
if (str.equals("p")) strToStore = "pvalue"; // Terry had jus "p", which I don't like
traitHM.put(search, strToStore);
int methodIdx = Arrays.asList(traitTokens).indexOf(str); // use "str" to find "p", not pvalue
methodHM.put(strToStore, methodTokens[methodIdx]);
ncols++;
System.out.println("Found "+str +" in column "+search);
}
}
}
// create the file writers
if (traitHM!=null) {
for (Integer idx:traitHM.keySet()) {
String outFile = outputDir + traitHM.get(idx) + ".gwas_data"; // IFL wants table name as suffix
BufferedWriter statWriter = Utils.getBufferedWriter(outFile);
traitWriters.put(idx,statWriter);
}
}
if (ncols<1) throw new IllegalStateException("No valid columns to read in!");
return true;
}
// Each file gets same header line
private void writeHeaderLineToFiles(String headerline) {
try {
if (traitHM!=null) {
for (Integer idx:traitWriters.keySet()) {
// write the full line
traitWriters.get(idx).write(headerline);
}
}
} catch (IOException ioe) {
System.out.println("LCJ - error writing trait files");
ioe.printStackTrace();
}
}
private void writeValues(String[] next, String initialLine) {
// next contains the dataline. We pull only the trait we want from the column
// that contains that trait. traitHM contains a list of column. The name
// of the trait goes into the "statistic_name" column. The value goes in last
// header order:
// String headerLine = "phenotype_id\tchr\tposition\texperiment_id\tmethod_id\tstatistic_name\tvalue\n";
// NOTE: statistic name is ignored, I kept in so I'd know the value
try {
if (traitHM!=null) {
for (Integer idx:traitWriters.keySet()) {
// write the full line
String methodId = methodHM.get(traitHM.get(idx)) + "\t";
String lineToWrite = initialLine + methodId + traitHM.get(idx) + "\t" + next[idx.intValue()] + "\n";
traitWriters.get(idx).write(lineToWrite);
}
}
} catch (IOException ioe) {
System.out.println("LCJ - error writing trait files");
ioe.printStackTrace();
}
}
private void Shutdown() {
try {
if (traitHM!=null) {
for (Integer i:traitWriters.keySet()) {
traitWriters.get(i).close();
}
}
} catch (Exception exc) {
System.out.println("Problem with shutdown");
exc.printStackTrace();
}
}
@Override
public ImageIcon getIcon() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getButtonName() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getToolTipText() {
// TODO Auto-generated method stub
return null;
}
public static void main(String[] args) {
GeneratePluginCode.generate(GWAS_IFLPlugin.class);
}
// The following getters and setters were auto-generated.
// Please use this method to re-generate.
//
// public static void main(String[] args) {
// GeneratePluginCode.generate(GWAS_IFLPlugin.class);
// }
/**
* Convenience method to run plugin with one return object.
*/
// TODO: Replace with specific type.
// public runPlugin(DataSet input) {
// return () performFunction(input).getData(0).getData();
// }
/**
* Tab-delimited Input File containing a header line and
* entries, or Directory containing Tab-delimited files
* of gwas data.
* If parameter is a directory, each file must contain
* a header line, and the files must end with .txt or
* .txt.gz
*
* @return Input File
*/
public String inputFile() {
return inputFile.value();
}
/**
* Set Input File. Tab-delimited Input File containing
* a header line and entries, or Directory containing
* Tab-delimited files of gwas data.
* If parameter is a directory, each file must contain
* a header line, and the files must end with .txt or
* .txt.gz
*
* @param value Input File
*
* @return this plugin
*/
public GWAS_IFLPlugin inputFile(String value) {
inputFile = new PluginParameter<>(inputFile, value);
return this;
}
/**
* Name of GWAS method as it appears in the name field
* of the gwas_method table
*
* @return Method Name
*/
public String methodIds() {
return methodIds.value();
}
/**
* Set Method Name. Name of GWAS method as it appears
* in the name field of the gwas_method table
*
* @param value Method Name
*
* @return this plugin
*/
public GWAS_IFLPlugin methodIds(String value) {
methodIds = new PluginParameter<>(methodIds, value);
return this;
}
/**
* Name of the GWAS experiment to which this data belongs.
* This name must already exist in the name field of the
* gwas_experiment table.
*
* @return GWAS Experiment Name
*/
public String expID() {
return expID.value();
}
/**
* Set GWAS Experiment Name. Name of the GWAS experiment
* to which this data belongs. This name must already
* exist in the name field of the gwas_experiment table.
*
* @param value GWAS Experiment Name
*
* @return this plugin
*/
public GWAS_IFLPlugin expID(String value) {
expID = new PluginParameter<>(expID, value);
return this;
}
/**
* Comma separated list of column names from which to
* pull data.
* These names must match values in the statistics field
* of the gwas_method table for the specified method name.
*
* @return Statistic Names
*/
public String statNames() {
return statNames.value();
}
/**
* Set Statistic Names. Comma separated list of column
* names from which to pull data.
* These names must match values in the statistics field
* of the gwas_method table for the specified method name.
*
* @param value Statistic Names
*
* @return this plugin
*/
public GWAS_IFLPlugin statNames(String value) {
statNames = new PluginParameter<>(statNames, value);
return this;
}
/**
* Full path name of directory to which output files will
* be written, must end with a /
*
* @return Path of output directory
*/
public String outputDir() {
return outputDir.value();
}
/**
* Set Path of output directory. Full path name of directory
* to which output files will be written, must end with
* a /
*
* @param value Path of output directory
*
* @return this plugin
*/
public GWAS_IFLPlugin outputDir(String value) {
outputDir = new PluginParameter<>(outputDir, value);
return this;
}
// /**
// * Method used to create the statistics, e.g. Fast Association,
// * etc
// *
// * @return GWAS Method
// */
// public String method() {
// return method.value();
// }
//
// /**
// * Set GWAS Method. Method used to create the statistics,
// * e.g. Fast Association, etc
// *
// * @param value GWAS Method
// *
// * @return this plugin
// */
// public GWAS_IFLPlugin method(String value) {
// method = new PluginParameter<>(method, value);
// return this;
// }
/**
* DB config file containing connection information to
* the B4r database
*
* @return B4R Config File
*/
public String b4rConfigFile() {
return b4rConfigFile.value();
}
/**
* Set B4R Config File. DB config file containing connection
* information to the B4r database
*
* @param value B4R Config File
*
* @return this plugin
*/
public GWAS_IFLPlugin b4rConfigFile(String value) {
b4rConfigFile = new PluginParameter<>(b4rConfigFile, value);
return this;
}
// /**
// * tab-delimited File containing a Trait and an ID column
// * to be used to map gwas traits with the B4R ID.
// *
// * @return Phenotype Trait Mapping File
// */
// public String traitMapFile() {
// return traitMapFile.value();
// }
//
// /**
// * Set Phenotype Trait Mapping File. tab-delimited File
// * containing a Trait and an ID column to be used to map
// * gwas traits with the B4R ID.
// *
// * @param value Phenotype Trait Mapping File
// *
// * @return this plugin
// */
// public GWAS_IFLPlugin traitMapFile(String value) {
// traitMapFile = new PluginParameter<>(traitMapFile, value);
// return this;
// }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy