All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.fastaExtraction.CreateFastaDBFiles Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.fastaExtraction;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;

/**
 * Simple one off command line tool to create the necessary files to upload fastas to the db.
 * TODO create these files when you extract FASTA from GVCF.  No need for this step.
 * Created by zrm22 on 5/11/17.
 */
public class CreateFastaDBFiles {
    public static void main(String args[]) {
        CreateFastaDBFiles app = new CreateFastaDBFiles();
//        app.createFiles(args[0],args[1]);
//        app.createFiles("/Users/zrm22/Desktop/CIMMYTLinesForLoadPHG.txt","/Users/zrm22/Desktop/PHGUploadFiles/");
//        app.createFiles("/Users/zrm22/Desktop/NamLineTaxaList.txt","/Users/zrm22/Desktop/PHGUploadFiles/");
        //Most Recent run
//        app.createFiles("/Users/zrm22/PanGenome/UploadFastas/listOfRobertTaxaNames.txt","/Users/zrm22/PanGenome/UploadFastas/PHGUploadFiles/RobertsFiles/");

//        app.createInbredFiles("/Volumes/ZackBackup/Temp/Pangenome/InbredHaplotyperPipeline/DBUploadFiles/ListOfCurrentLines.txt","/Volumes/ZackBackup/Temp/Pangenome/InbredHaplotyperPipeline/DBUploadFiles/LoadFiles4/");

        app.createInbredFiles("/Users/zrm22/PanGenome/DBUploadFiles/listOfFiles.txt","/Users/zrm22/PanGenome/DBUploadFiles/UploadFiles/");
    }

    public void createFiles(String taxaListFile, String outputFolder) {
        try{
            BufferedReader reader = new BufferedReader(new FileReader(taxaListFile));
            String header = "Genotype\tHapnumber\tDataline\tploidy\treference\tgenesPhased\tchromsPhased\tconfidence\tMethod\tMethodDetails\tRefVersion";
            String currentLine = "";
            while((currentLine = reader.readLine())!=null) {
                BufferedWriter writer = new BufferedWriter(new FileWriter(outputFolder+""+currentLine+"_load_data.txt"));
                writer.write(header);
                writer.newLine();
                String dataLine = currentLine+"_Haplotype_Caller\t0\t"+currentLine+" from Haplotype Caller\t1\tfalse\tfalse\tfalse\t1\t"+currentLine+"_HAPLOTYPE_CALLER\t"+currentLine+" generated using Haplotype Caller\tB73v4_gffGenes_plus1000_trimmed";
                writer.write(dataLine);

//                time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db GVCFFastaFiles/MAIdgiRAWDIAAPEI-5_MergedAnchorSequence.fa phg_load_db/MAIdgiRAWDIAAPEI-5gvcf_load_data.txt none phg_load_db/load_sequences_output/  > phg_load_db/load_sequences_output/loadHapSequences_MAIdgiRAWDIAAPEI-5.txt
                System.out.println(currentLine+"_load_data.txt");
                writer.close();
            }
        }catch(Exception e) {
            e.printStackTrace();
        }
    }

    public void createInbredFiles(String taxaListFile, String outputFolder) {
        try{
            BufferedReader reader = new BufferedReader(new FileReader(taxaListFile));
            String header = "Genotype\tHapnumber\tDataline\tploidy\treference\tgenesPhased\tchromsPhased\tconfidence\tMethod\tMethodDetails\tRefVersion";
            String currentLine = "";
            while((currentLine = reader.readLine())!=null) {
                String outputFileName = outputFolder+""+currentLine+"_load_data.txt";
                if(currentLine.startsWith("ZEAxpp") || currentLine.startsWith("MAI")) {
                    String[] currentLineSplit = currentLine.split("-");
                    outputFileName = outputFolder+""+currentLineSplit[0]+"_load_data.txt";
                }
                BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));
                writer.write(header);
                writer.newLine();
                String dataLine = currentLine+"\t0\t"+currentLine+" from Haplotype Caller\t1\tfalse\tfalse\tfalse\t1\tHaplotype_caller\tFile generated using Haplotype Caller\tB73v4_gffGenes_plus1000_trimmed";
                writer.write(dataLine);

//                time java -jar -Xms200g -Xmx225g ./LoadHapSequencesToPHGdb.jar ./v4anchors_allChroms_mergedPlus1000orGap_md5Hash.db GVCFFastaFiles/MAIdgiRAWDIAAPEI-5_MergedAnchorSequence.fa phg_load_db/MAIdgiRAWDIAAPEI-5gvcf_load_data.txt none phg_load_db/load_sequences_output/  > phg_load_db/load_sequences_output/loadHapSequences_MAIdgiRAWDIAAPEI-5.txt
                System.out.println(currentLine+"_load_data.txt");
                writer.close();
            }
        }catch(Exception e) {
            e.printStackTrace();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy