All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.processAssemblyGenomes.CreateContigFastaFromAssemblyGenomePlugin Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.processAssemblyGenomes;

import java.awt.Frame;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;

import javax.swing.ImageIcon;

import org.apache.log4j.Logger;

import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.util.Utils;

/**
 * The sequence for each chromosome in the genome fasta file will be split based on
 * N's.  Read until an N is encountered, write the sequence as its own contig
 * in the contig fastq output file.  Skip past the "N's", start the next contig.
 * 
 * This algorithm is subject to change during pipeline testing.
 * 
 * Each sequence idline will be 
 *      >assemblyChrom:assemblyStart:assemblyEnd
 *   
 * INPUT: 
 *   1.  The assembly genome file to process
 *   
 * OUTPUT:
 *   1.  The newly created fasta file of contigs.
 *   
 * @author lcj34
 *
 */
public class CreateContigFastaFromAssemblyGenomePlugin extends AbstractPlugin {
    private static final Logger myLogger = Logger.getLogger(CreateContigFastaFromAssemblyGenomePlugin.class);

    private PluginParameter myGenomeFile = new PluginParameter.Builder("genomeFile", null, String.class).guiName("Assembly Genome").required(true).inFile()
            .description("Input assembly genome file from which to pull sequence").build();
    private PluginParameter myAssembly = new PluginParameter.Builder("assembly", null, String.class).guiName("Assembly Name").required(true)
            .description("Name of assembly to append to output files").build();
    private PluginParameter myOutputDir = new PluginParameter.Builder("o", null, String.class).guiName("Output Directory").required(true).outDir()
            .description("Output directory including trailing / for writing fasta and fastq files to use as input for BWA-MEM and other tools").build();
    
    public CreateContigFastaFromAssemblyGenomePlugin() {
        super(null, false);
    }

    public CreateContigFastaFromAssemblyGenomePlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    public CreateContigFastaFromAssemblyGenomePlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }
    
    @Override
    public DataSet processData(DataSet input) {
        myLogger.info("In processData");
        
        String fastaFile = outputDir() + assembly() + "_asContigs.fa";
        try (BufferedReader rd = Utils.getBufferedReader(genomeFile());
             BufferedWriter fastaBW = Utils.getBufferedWriter(fastaFile)) {
            String fileLine;
            String fastaChr = "-1";
  
            StringBuilder contigSB = new StringBuilder();
            boolean scaffold = false;
            // Current decision is to include scaffolds. If that decision changes,
            // filter them out here.
            while ((fileLine = rd.readLine()) != null) {
                if (fileLine.startsWith(">"))  {
                    if (contigSB.length() > 0) { // all chroms
                        createContigs(fastaChr,contigSB.toString().toUpperCase(),fastaBW);
                    }
                    contigSB.setLength(0);
                    fastaChr = fileLine.replace(">", "");
                    fastaChr = fastaChr.split(" ")[0];
                } else {
                    contigSB.append(fileLine); // keep appending until we hit next id line.
                }
            }
            // process last one
            if (contigSB.length() > 0) {
                createContigs(fastaChr,contigSB.toString().toUpperCase(),fastaBW);
            }

        } catch (Exception exc) {
            myLogger.error(exc.getMessage(), exc);
            throw new IllegalStateException("CreateContigFastaFromAssemblyGenomePlugin: error reading or writing file " + exc.getMessage());
        }
        
        return null;
    }
    
    
    // This method chops sequence into contigs, splitting by N's
    private static void createContigs(String chrom, String sequence, 
            BufferedWriter fastaBW) {
        int contigStart=1;
  
        int contigCount = 0;
        myLogger.info(" createContigs, processing chrom : " + chrom + ", length " + sequence.length());
        StringBuilder contigSB = new StringBuilder();
        for (int idx = 0; idx < sequence.length(); idx++) {
            if (sequence.charAt(idx) != 'N') {
                contigSB.append(sequence.charAt(idx));
            } else {
                // write this contig
                if (contigSB.length() > 0) {
                    String idline = ">" + chrom + ":" + contigStart + ":" + idx;
                    writeFasta(idline,contigSB.toString(),fastaBW);
                    contigSB.setLength(0);
                    contigCount++;
                }               
                contigStart = idx+2; // +1 cur to 0 vs 1 based, +1 again to move past N
            }
        }
        if (contigSB.length () > 0) {
            String idline = ">" + chrom + ":" + contigStart + ":" + sequence.length();
            writeFasta(idline,contigSB.toString(),fastaBW);
        }
    }
    
    // Creates a fastA file of contigs
    private static void writeFasta(String idline,String sequence, BufferedWriter fastaBW) {
        try {
            fastaBW.write(idline + "\n" + sequence + "\n");
        } catch (IOException ioe) {
            myLogger.error(ioe.getMessage(),ioe);
            throw new IllegalStateException("CreateContigFastaFromAssemblyGenomePlugin: error writing fasta file " + ioe.getMessage());
        }
    }
    
    @Override
    public String getButtonName() {
        return ("Contig Fasta from Assembly Genome");
    }
    @Override
    public String getToolTipText() {
        return ("Contig fasta from Assembly Genome");
    }

    // The following getters and setters were auto-generated.
    // Please use this method to re-generate.
    //
//     public static void main(String[] args) {
//         GeneratePluginCode.generate(CreateContigFastaFromAssemblyGenomePlugin.class);
//     }

    /**
     * Convenience method to run plugin with one return object.
     */
    // TODO: Replace  with specific type.
//    public  runPlugin(DataSet input) {
//        return () performFunction(input).getData(0).getData();
//    }

    /**
     * Input assembly genome file from which to pull sequence
     *
     * @return Assembly Genome
     */
    public String genomeFile() {
        return myGenomeFile.value();
    }

    /**
     * Set Assembly Genome. Input assembly genome file from
     * which to pull sequence
     *
     * @param value Assembly Genome
     *
     * @return this plugin
     */
    public CreateContigFastaFromAssemblyGenomePlugin genomeFile(String value) {
        myGenomeFile = new PluginParameter<>(myGenomeFile, value);
        return this;
    }

    /**
     * Output fastq file to use as input for BWA-MEM
     *
     * @return Output File
     */
    public String assembly() {
        return myAssembly.value();
    }

    /**
     * Set Output File. Output fastq file to use as input
     * for BWA-MEM
     *
     * @param value Output File
     *
     * @return this plugin
     */
    public CreateContigFastaFromAssemblyGenomePlugin assembly(String value) {
        myAssembly = new PluginParameter<>(myAssembly, value);
        return this;
    }
    /**
     * Output fastq file to use as input for BWA-MEM
     *
     * @return Output File
     */
    public String outputDir() {
        return myOutputDir.value();
    }

    /**
     * Set Output File. Output fastq file to use as input
     * for BWA-MEM
     *
     * @param value Output File
     *
     * @return this plugin
     */
    public CreateContigFastaFromAssemblyGenomePlugin outputDir(String value) {
        myOutputDir = new PluginParameter<>(myOutputDir, value);
        return this;
    }
    

    @Override
    public ImageIcon getIcon() {
        // TODO Auto-generated method stub
        return null;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy