net.maizegenetics.analysis.gobii.UpdateMarkerAndDNA_idxes Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/**
*
*/
package net.maizegenetics.analysis.gobii;
import java.io.BufferedWriter;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import org.apache.log4j.Logger;
import net.maizegenetics.util.Utils;
/**
* Once I have the datasets fixed, this class should not be needed.
*
* What it does: Initially the marker_idx and dnarun_idx columns of the dataset_marker
* and dataset_dnarun tables respectively were not populated. They are now needed and
* are populated. Kevin Palis created a couple scripts to handle populating these fields
* in tables when they were missing. These scripts live with the gobii_ifl_scripts
* on CBSU, and are called update_marker_idx.py and update_dnarun_idx.py. For GOBII,
* then are in the rpository at the same level as the gobii_ifl.py scrips.
*
* The file below creates an intermediate file that will be worked on by the preprocess_ifile.py
* script. You can also run the gobii_ifl.py script instead if you uncomment the "return" statement
* that occurs after the preprocess_ifile.py script has been called.
*
* Here is the order:
* 1. Run this class to create the needed files (DS_X.mh5i and DS_X.sh5i)
* 2. sftp these files to cbsudc01.tc.cornell into /workdir/lcj34/postgresFiles/update_idxes_files dir
* 3. Run the file through gobii_ifl.scripts (change the script to return after the preprocess_ifl.py step !!)
* python gobii_ifl.py -c postgresql://lcj34:@localhost:5432/gobii_maize2 -i /workdir/lcj34/postgresFiles/update_idxes_files/DS_5.sh5i -o /tmp/ -v
* 4. Run the /tmp/ppd_* file created in step 3 through the update_dnarun_idx.py or update_marker_idx.py script
* python update_dnarun_idx.py "postgresql://lcj34:@cbsudc01.tc.cornell.edu/gobii_maize2" /tmp/ppd_DS_5.sh5i 5
* 5. Verify the db has values for dataset_marker.marker_idx and dataset_dnarun.dnarun_idx for
* the specified dataset_id.
* 6. Change the gobii_ifl.py script to re-comment the "return" after the preprocess_ifl call
*
* @author lcj34
*
*/
public class UpdateMarkerAndDNA_idxes {
private static final Logger myLogger = Logger.getLogger(UpdateMarkerAndDNA_idxes.class);
public static void createIdxValues(String configFile, String outputDir, int datasetID, int platformID, int experimentID) {
// connect to db
// what should these files be called ??
String dnarunFile = outputDir + "DS_" + datasetID + ".sh5i"; // the outputDir should include the DS_
String markerFile = outputDir + "DS_" + datasetID + ".mh5i";
// process the input data file
try {
BufferedWriter writerRunID = Utils.getBufferedWriter(dnarunFile);
BufferedWriter writerMarkerID = Utils.getBufferedWriter(markerFile);
long time=System.nanoTime();
// Connect to db
Connection dbConnection = GOBIIDbUtils.connectToDB(configFile);
if (dbConnection == null) {
throw new IllegalStateException("UpdateMarkerAndDNA_idxes: Problem connecting to database.");
}
// create dnarun_idx query: here we need the dnarun.name field
StringBuilder builder = new StringBuilder();
builder.append("select name from dnarun,dataset_dnarun where dataset_dnarun.dnarun_id=dnarun.dnarun_id and dataset_dnarun.dataset_id='");
builder.append(datasetID);
builder.append("' order by dnarun.dnarun_id;");
String query = builder.toString();
myLogger.info("processData: query statement for dnarun: " + query);
System.out.println("UpdateMarkerAndDNA_idxes: execute query: " + query);
dbConnection.setAutoCommit(false); // required for Cursor processing (fetchSize)
Statement st = dbConnection.createStatement();
st.setFetchSize(100000); // should return results in batches
ResultSet rs = st.executeQuery(query);
// ResultSet rs = dbConnection.createStatement().executeQuery(query);
writerRunID.write("dnarun_name\td_name\texperiment_id\n"); // header
while (rs.next()) {
String dnarun_n = rs.getString("name");
writerRunID.write(dnarun_n);
writerRunID.write("\t");
writerRunID.write(dnarun_n); // write name out twice as first one is converted.
writerRunID.write("\t");
writerRunID.write(Integer.toString(experimentID));
writerRunID.write("\n");
}
System.out.printf("TotalTime for dnarun_name query %g sec%n", (double) (System.nanoTime() - time) / 1e9);
// create marker query:
// select marker_id from dataset_marker, dataset where dataset.name = datasetName and dataset.dataset_id = dataset_marker.dataset_id
builder = new StringBuilder();
builder.append("select name from marker, dataset_marker where marker.marker_id=dataset_marker.marker_id and dataset_marker.dataset_id='");
builder.append(datasetID);
builder.append("' order by marker.marker_id;");
query = builder.toString();
myLogger.info("processData: query statement: " + query);
System.out.println("UpdateMarkerAndDNA_idxes: execute query: " + query);
st = dbConnection.createStatement();
st.setFetchSize(100000); // shouldn't need to set this again
rs = st.executeQuery(query);
// rs = dbConnection.createStatement().executeQuery(query);
writerMarkerID.write("marker_name\tm_name\tplatform_id\n"); // header
while (rs.next()) {
String marker_n = rs.getString("name");
writerMarkerID.write(marker_n);
writerMarkerID.write("\t");
writerMarkerID.write(marker_n); // write same name twice
writerMarkerID.write("\t");
writerMarkerID.write(Integer.toString(platformID));
writerMarkerID.write("\n");
}
writerRunID.close();
writerMarkerID.close();
System.out.printf("TotalTime for marker_name query: %g sec%n", (double) (System.nanoTime() - time) / 1e9);
} catch (Exception exc) {
System.out.println("UpdateMarkerAndDNA_idxes: caught exception processing writing files");
exc.printStackTrace();
}
System.out.println("\nFiles written to " + dnarunFile + " and " + markerFile);
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String configFile = "/Users/lcj34/notes_files/gobiiANDBms/gobii_loading/dbConfigFile_maize2.txt";
//String datasetName = "ZeaGBSv27impV5_20160209_AGPv2_282";
String outputDir = "/Users/lcj34/notes_files/gobiiANDBms/gobii_loading/update_idxes/";
int datasetID = 5;
int platformID = 3; // needed for marker file
int experimentID = 4; // needed for dnarun file
createIdxValues(configFile,outputDir,datasetID,platformID, experimentID); // first do the marker file
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy