net.maizegenetics.pangenome.multiSequenceAlignment.RemoveLongRunNs Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.multiSequenceAlignment;
import net.maizegenetics.util.Utils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.stream.Collectors;
/**
* Simple test utility(script) which will remove Ns from a list of files
* TODO Refractor out the methods to be more object oriented
* Created by zrm22 on 6/7/17.
*/
public class RemoveLongRunNs {
public static void main(String args[]) {
RemoveLongRunNs app = new RemoveLongRunNs();
System.out.println("Created object");
app.run(args[0],args[1], args[2]);
}
/**
* Setup the run of the tool and start it exporting files.
* @param listOfFiles
* @param outputDirectory
* @param outputListOfFiles
*/
public void run(String listOfFiles, String outputDirectory, String outputListOfFiles) {
try {
BufferedReader reader = new BufferedReader(new FileReader(listOfFiles));
ArrayList lines = new ArrayList<>();
String currLine = "";
while((currLine = reader.readLine())!=null) {
System.out.println(currLine);
lines.add(currLine);
}
reader.close();
loadRemoveNsAndExport(lines, outputDirectory, outputListOfFiles);
}
catch(Exception e) {
e.printStackTrace();
}
}
/**
* Method will go through each line in the list and apply the Remove N algorithm and export both the changed Fasta file and a list of files for the next step
* @param lines
* @param outputDir
* @param outputFileName
*/
private void loadRemoveNsAndExport(ArrayList lines, String outputDir, String outputFileName) {
final String outputDirectoryFinal = outputDir;
final String outputFileNameFinal = outputFileName;
ArrayList exportedFileNameList = (ArrayList)lines.stream().map(line -> {
String exportedFileName = "";
String[] lineSplit = line.split("/");
try {
BufferedReader reader = Utils.getBufferedReader(line);
String justFileName = lineSplit[lineSplit.length-1];
//pull off the .gz and .fa
String[] justFileNameSplit = justFileName.split("\\.");
String fileNameNoExt = justFileNameSplit[0];
exportedFileName = outputDirectoryFinal+fileNameNoExt+"_LongNsRemoved.fa";
BufferedWriter writer = Utils.getBufferedWriter(exportedFileName);
String currLine = "";
while((currLine = reader.readLine())!=null) {
if(currLine.startsWith(">")) {
//its the id line, just export
writer.write(currLine);
writer.newLine();
}
else {
//throw it through the removeN method then export
String seqNsRemoved = removeLongNs(currLine);
writer.write(seqNsRemoved);
writer.newLine();
}
}
reader.close();
writer.close();
}
catch(Exception e) {
e.printStackTrace();
}
return exportedFileName;
}).collect(Collectors.toList());
//Loop through the list add add each name to the outputFile
try {
BufferedWriter listOfFileWriter = Utils.getBufferedWriter(outputFileName);
for(String exportedFileName : exportedFileNameList) {
listOfFileWriter.write(exportedFileName);
listOfFileWriter.newLine();
}
listOfFileWriter.close();
}
catch(Exception e) {
e.printStackTrace();
}
}
/**
* Simple utility to remove long consecutive Ns.
* @param anchorSequence
* @return
*/
private String removeLongNs(String anchorSequence) {
StringBuilder longNRemovedBuilder = new StringBuilder();
int nCounter = 0;
for(int i = 0; i < anchorSequence.length(); i++) {
if(anchorSequence.charAt(i)=='N') {
if(nCounter<2) {
longNRemovedBuilder.append("N");
nCounter++;
}
}
else {
nCounter=0;
longNRemovedBuilder.append(anchorSequence.charAt(i));
}
}
return longNRemovedBuilder.toString();
}
}