All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.multiSequenceAlignment.RemoveLongRunNs Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.multiSequenceAlignment;

import net.maizegenetics.util.Utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.stream.Collectors;

/**
 * Simple test utility(script) which will remove Ns from a list of files
 * TODO Refractor out the methods to be more object oriented
 * Created by zrm22 on 6/7/17.
 */
public class RemoveLongRunNs {
    public static void main(String args[]) {
        RemoveLongRunNs app = new RemoveLongRunNs();
        System.out.println("Created object");
        app.run(args[0],args[1], args[2]);
    }


    /**
     * Setup the run of the tool and start it exporting files.
     * @param listOfFiles
     * @param outputDirectory
     * @param outputListOfFiles
     */
    public void run(String listOfFiles, String outputDirectory, String outputListOfFiles) {
        try {
            BufferedReader reader = new BufferedReader(new FileReader(listOfFiles));
            ArrayList lines = new ArrayList<>();
            String currLine = "";
            while((currLine = reader.readLine())!=null) {
                System.out.println(currLine);
                lines.add(currLine);
            }
            reader.close();
            loadRemoveNsAndExport(lines, outputDirectory, outputListOfFiles);
        }
        catch(Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Method will go through each line in the list and apply the Remove N algorithm and export both the changed Fasta file and a list of files for the next step
     * @param lines
     * @param outputDir
     * @param outputFileName
     */
    private void loadRemoveNsAndExport(ArrayList lines, String outputDir, String outputFileName) {
        final String outputDirectoryFinal = outputDir;
        final String outputFileNameFinal = outputFileName;
        ArrayList exportedFileNameList = (ArrayList)lines.stream().map(line -> {
            String exportedFileName = "";

            String[] lineSplit = line.split("/");
            try {
                BufferedReader reader = Utils.getBufferedReader(line);


                String justFileName = lineSplit[lineSplit.length-1];
                //pull off the .gz and .fa
                String[] justFileNameSplit = justFileName.split("\\.");
                String fileNameNoExt = justFileNameSplit[0];


                exportedFileName = outputDirectoryFinal+fileNameNoExt+"_LongNsRemoved.fa";
                BufferedWriter writer = Utils.getBufferedWriter(exportedFileName);
                String currLine = "";
                while((currLine = reader.readLine())!=null) {
                    if(currLine.startsWith(">")) {
                        //its the id line, just export
                        writer.write(currLine);
                        writer.newLine();
                    }
                    else {
                        //throw it through the removeN method then export
                        String seqNsRemoved = removeLongNs(currLine);
                        writer.write(seqNsRemoved);
                        writer.newLine();
                    }
                }

                reader.close();
                writer.close();
            }
            catch(Exception e) {
                e.printStackTrace();
            }

            return exportedFileName;
        }).collect(Collectors.toList());

        //Loop through the list add add each name to the outputFile
        try {
            BufferedWriter listOfFileWriter = Utils.getBufferedWriter(outputFileName);
            for(String exportedFileName : exportedFileNameList) {
                listOfFileWriter.write(exportedFileName);
                listOfFileWriter.newLine();
            }
            listOfFileWriter.close();
        }
        catch(Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Simple utility to remove long consecutive Ns.
     * @param anchorSequence
     * @return
     */
    private String removeLongNs(String anchorSequence) {
        StringBuilder longNRemovedBuilder = new StringBuilder();

        int nCounter = 0;
        for(int i = 0; i < anchorSequence.length(); i++) {
            if(anchorSequence.charAt(i)=='N') {
                if(nCounter<2) {
                    longNRemovedBuilder.append("N");
                    nCounter++;
                }
            }
            else {
                nCounter=0;
                longNRemovedBuilder.append(anchorSequence.charAt(i));
            }
        }

        return longNRemovedBuilder.toString();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy