net.maizegenetics.pangenome.fastaExtraction.CreateDBLoadScripts Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
There is a newer version: 1.10
package net.maizegenetics.pangenome.fastaExtraction;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;

/**
 * Simple Main program to create the script used to load all of the haplotype_caller fastas into the db
 * TODO Refractor the logic and move execution to a plugin
 * Created by zrm22 on 5/12/17.
 */
public class CreateDBLoadScripts {

    public static void main(String args[]) {
        CreateDBLoadScripts app = new CreateDBLoadScripts();
//        app.createLoadScript("/Users/zrm22/Desktop/listOfFastas.txt","/Users/zrm22/Desktop/fullFastaDBLoadScript.sh",
//                "./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db","./PHGUploadFiles/", "./load_sequences_output/",
//                "./dbInProgressBackups/");

        app.createLoadScript("/Users/zrm22/PanGenome/UploadFastas/listOfRobertsFastas.txt","/Users/zrm22/PanGenome/UploadFastas/fullRobertFastaDBLoadScript.sh",
                "./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db","./PHGUploadFiles/RobertsFiles/", "./load_sequences_output/",
                "./dbInProgressBackupsRobert/");
    }

    /**
     * Method which will create the loading script based on the input files.   Allows for creation on cbsu with a couple file lists.
     * @param listOfFastaFileName
     * @param outputFileName
     * @param pathToDB
     * @param pathToLoadParamFiles
     * @param loadingExportPath
     * @param tempDBBackupDirectory
     */
    public void createLoadScript(String listOfFastaFileName, String outputFileName,String pathToDB, String pathToLoadParamFiles, String loadingExportPath, String tempDBBackupDirectory) {
        try {
            BufferedReader reader = new BufferedReader(new FileReader(listOfFastaFileName));
            BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));

            String[] pathToDBSplit = pathToDB.split("/");
            String dbName = pathToDBSplit[pathToDBSplit.length-1];
            String[] dbNameSplit = dbName.split("[.]");
            String dbNameNoExtension = dbNameSplit[0];
            int counter = 1;
            String currentLine = "";
            while((currentLine = reader.readLine())!=null) {
                //now we need to parse out the taxa name so we can grab the correct load param file
                //its a full file path so split on "/" characters first
                String[] currentLineSplit = currentLine.split("/");
                //now the file is the last element in the array
                String currentFastaFileName = currentLineSplit[currentLineSplit.length-1];
                //we need to trim off the actual taxa name.
                String[] currentFastaFileNameSplit = currentFastaFileName.split("_");
                //Right now it should have the taxa name in the first index
                String taxaName = currentFastaFileNameSplit[0];

                //This is an example of the line to run
              //  time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db GVCFFastaFiles/MAIdgiRAWDIAAPEI-5_MergedAnchorSequence.fa phg_load_db/MAIdgiRAWDIAAPEI-5gvcf_load_data.txt none phg_load_db/load_sequences_output/  > phg_load_db/load_sequences_output/loadHapSequences_MAIdgiRAWDIAAPEI-5.txt

                StringBuilder executeCommandStringBuilder = new StringBuilder();
                executeCommandStringBuilder.append("time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ");
                //db info
                executeCommandStringBuilder.append(pathToDB);
                executeCommandStringBuilder.append(" ");
                //fasta info
                executeCommandStringBuilder.append(currentLine);
                executeCommandStringBuilder.append(" ");
                //parameter file info
                executeCommandStringBuilder.append(pathToLoadParamFiles);
                executeCommandStringBuilder.append(taxaName);
                executeCommandStringBuilder.append("_load_data.txt none ");
                //Folder to hold the output files
                executeCommandStringBuilder.append(loadingExportPath);
                executeCommandStringBuilder.append(" >");
                executeCommandStringBuilder.append(loadingExportPath);
                executeCommandStringBuilder.append("loadHapSequences_");
                executeCommandStringBuilder.append(taxaName);
                executeCommandStringBuilder.append(".txt");

                writer.write(executeCommandStringBuilder.toString());
                writer.newLine();
                System.out.println(executeCommandStringBuilder.toString());

                if(counter%10==0) {
                    //Copy out the db out temporarily
                    StringBuilder copyBuilder = new StringBuilder();
                    copyBuilder.append("cp ");
                    copyBuilder.append(pathToDB);
                    copyBuilder.append(" ");
                    copyBuilder.append(tempDBBackupDirectory);
                    copyBuilder.append(dbNameNoExtension);
                    copyBuilder.append("_");
                    copyBuilder.append(counter);
                    copyBuilder.append("_taxaLoaded.db");
                    writer.write(copyBuilder.toString());
                    writer.newLine();
                    System.out.println(copyBuilder.toString());
                }
                counter++;
            }
            writer.close();
        }
        catch(Exception e) {
            e.printStackTrace();
        }
    }
}