net.maizegenetics.pangenome.fastaExtraction.CreateDBLoadScripts Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.fastaExtraction;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
/**
* Simple Main program to create the script used to load all of the haplotype_caller fastas into the db
* TODO Refractor the logic and move execution to a plugin
* Created by zrm22 on 5/12/17.
*/
public class CreateDBLoadScripts {
public static void main(String args[]) {
CreateDBLoadScripts app = new CreateDBLoadScripts();
// app.createLoadScript("/Users/zrm22/Desktop/listOfFastas.txt","/Users/zrm22/Desktop/fullFastaDBLoadScript.sh",
// "./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db","./PHGUploadFiles/", "./load_sequences_output/",
// "./dbInProgressBackups/");
app.createLoadScript("/Users/zrm22/PanGenome/UploadFastas/listOfRobertsFastas.txt","/Users/zrm22/PanGenome/UploadFastas/fullRobertFastaDBLoadScript.sh",
"./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db","./PHGUploadFiles/RobertsFiles/", "./load_sequences_output/",
"./dbInProgressBackupsRobert/");
}
/**
* Method which will create the loading script based on the input files. Allows for creation on cbsu with a couple file lists.
* @param listOfFastaFileName
* @param outputFileName
* @param pathToDB
* @param pathToLoadParamFiles
* @param loadingExportPath
* @param tempDBBackupDirectory
*/
public void createLoadScript(String listOfFastaFileName, String outputFileName,String pathToDB, String pathToLoadParamFiles, String loadingExportPath, String tempDBBackupDirectory) {
try {
BufferedReader reader = new BufferedReader(new FileReader(listOfFastaFileName));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));
String[] pathToDBSplit = pathToDB.split("/");
String dbName = pathToDBSplit[pathToDBSplit.length-1];
String[] dbNameSplit = dbName.split("[.]");
String dbNameNoExtension = dbNameSplit[0];
int counter = 1;
String currentLine = "";
while((currentLine = reader.readLine())!=null) {
//now we need to parse out the taxa name so we can grab the correct load param file
//its a full file path so split on "/" characters first
String[] currentLineSplit = currentLine.split("/");
//now the file is the last element in the array
String currentFastaFileName = currentLineSplit[currentLineSplit.length-1];
//we need to trim off the actual taxa name.
String[] currentFastaFileNameSplit = currentFastaFileName.split("_");
//Right now it should have the taxa name in the first index
String taxaName = currentFastaFileNameSplit[0];
//This is an example of the line to run
// time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db GVCFFastaFiles/MAIdgiRAWDIAAPEI-5_MergedAnchorSequence.fa phg_load_db/MAIdgiRAWDIAAPEI-5gvcf_load_data.txt none phg_load_db/load_sequences_output/ > phg_load_db/load_sequences_output/loadHapSequences_MAIdgiRAWDIAAPEI-5.txt
StringBuilder executeCommandStringBuilder = new StringBuilder();
executeCommandStringBuilder.append("time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ");
//db info
executeCommandStringBuilder.append(pathToDB);
executeCommandStringBuilder.append(" ");
//fasta info
executeCommandStringBuilder.append(currentLine);
executeCommandStringBuilder.append(" ");
//parameter file info
executeCommandStringBuilder.append(pathToLoadParamFiles);
executeCommandStringBuilder.append(taxaName);
executeCommandStringBuilder.append("_load_data.txt none ");
//Folder to hold the output files
executeCommandStringBuilder.append(loadingExportPath);
executeCommandStringBuilder.append(" >");
executeCommandStringBuilder.append(loadingExportPath);
executeCommandStringBuilder.append("loadHapSequences_");
executeCommandStringBuilder.append(taxaName);
executeCommandStringBuilder.append(".txt");
writer.write(executeCommandStringBuilder.toString());
writer.newLine();
System.out.println(executeCommandStringBuilder.toString());
if(counter%10==0) {
//Copy out the db out temporarily
StringBuilder copyBuilder = new StringBuilder();
copyBuilder.append("cp ");
copyBuilder.append(pathToDB);
copyBuilder.append(" ");
copyBuilder.append(tempDBBackupDirectory);
copyBuilder.append(dbNameNoExtension);
copyBuilder.append("_");
copyBuilder.append(counter);
copyBuilder.append("_taxaLoaded.db");
writer.write(copyBuilder.toString());
writer.newLine();
System.out.println(copyBuilder.toString());
}
counter++;
}
writer.close();
}
catch(Exception e) {
e.printStackTrace();
}
}
}