net.maizegenetics.analysis.gbs.KeepSpecifiedReadsinFastqPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/*
* KeepSpecifiedReadsinFastqPlugin
*/
package net.maizegenetics.analysis.gbs;
import java.awt.Frame;
import java.io.BufferedWriter;
import java.io.File;
import javax.swing.ImageIcon;
import net.maizegenetics.dna.map.TOPMInterface;
import net.maizegenetics.dna.map.TOPMUtils;
import net.maizegenetics.dna.tag.FastqReader;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.util.ArgsEngine;
import net.maizegenetics.util.DirectoryCrawler;
import net.maizegenetics.util.Utils;
import org.apache.log4j.Logger;
/**
*
* @author terry
*/
public class KeepSpecifiedReadsinFastqPlugin extends AbstractPlugin {
private final Logger myLogger = Logger.getLogger(KeepSpecifiedReadsinFastqPlugin.class);
private static String FASTQ_FILENAME_REGEX = "(?i).*\\.fq$|.*\\.fq\\.gz$|.*\\.fastq$|.*_fastq\\.txt$|.*_fastq\\.gz$|.*_fastq\\.txt\\.gz$|.*_sequence\\.txt$|.*_sequence\\.txt\\.gz$";
private ArgsEngine myArgsEngine = null;
private String[] myInputFastqFileNames = null;
private String myOutputDir = null;
private String myTOPMFilename = null;
private String myKeyFilename = null;
private String myEnzyme = null;
private TOPMInterface myTOPM = null;
private int[] myChrs;
private int myStartPos;
private int myEndPos;
public KeepSpecifiedReadsinFastqPlugin(Frame parentFrame) {
super(parentFrame, false);
}
@Override
public DataSet performFunction(DataSet input) {
BufferedWriter writer = null;
try {
myTOPM = TOPMUtils.readTOPM(myTOPMFilename);
for (int i = 0; i < myInputFastqFileNames.length; i++) {
myLogger.info("Processing Fastq: " + myInputFastqFileNames[i]);
String outputFile = myOutputDir + "/" + Utils.getFilename(myInputFastqFileNames[i]);
myLogger.info("Output Fastq: " + outputFile);
writer = Utils.getBufferedWriter(outputFile);
ParseBarcodeRead thePBR = getParseBarcodeRead(myInputFastqFileNames[i]);
FastqReader reader = new FastqReader(myInputFastqFileNames[i]);
String[] current = reader.getNext();
while (current != null) {
ReadBarcodeResult rr = thePBR.parseReadIntoTagAndTaxa(current[1], current[3], true, 0);
if (rr != null) {
int index = myTOPM.getTagIndex(rr.getRead());
if (index >= 0) {
int chr = myTOPM.getChromosome(index);
int start = myTOPM.getStartPosition(index);
int end = myTOPM.getEndPosition(index);
if (isMatchingChr(chr)) {
if (((start >= myStartPos) && (start <= myEndPos))
|| ((end >= myStartPos) && (end <= myEndPos))) {
writer.write(current[0]);
writer.write("\n");
writer.write(current[1]);
writer.write("\n");
writer.write(current[2]);
writer.write("\n");
writer.write(current[3]);
writer.write("\n");
}
}
}
}
current = reader.getNext();
}
writer.close();
reader.close();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return null;
}
private void printUsage() {
myLogger.info(
"\nThe options for the KeepSpecifiedReadsinFastqPlugin:\n"
+ "-input Input directory containing fastq files\n"
+ "-topm TOPM\n"
+ "-k Key File\n"
+ "-e Enzyme\n"
+ "-output Output directory for resulting fastq files\n"
+ "-chrs Comma separated list of chromosomes. (no spaces)\n"
+ "-startPosition Start Position\n"
+ "-endPosition End Position\n");
}
@Override
public void setParameters(String[] args) {
if (args.length == 0) {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (myArgsEngine == null) {
myArgsEngine = new ArgsEngine();
myArgsEngine.add("-input", "-input", true);
myArgsEngine.add("-topm", "-topm", true);
myArgsEngine.add("-k", "-k", true);
myArgsEngine.add("-e", "-e", true);
myArgsEngine.add("-output", "-output", true);
myArgsEngine.add("-chrs", "-chrs", true);
myArgsEngine.add("-startPosition", "-startPosition", true);
myArgsEngine.add("-endPosition", "-endPosition", true);
}
myArgsEngine.parse(args);
String tempDirectory = myArgsEngine.getString("-input");
if ((tempDirectory != null) && tempDirectory.length() != 0) {
File inputDirectory = new File(tempDirectory);
if (!inputDirectory.isDirectory()) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: The input name you supplied is not a directory: " + tempDirectory);
}
myInputFastqFileNames = DirectoryCrawler.listFileNames(FASTQ_FILENAME_REGEX, inputDirectory.getAbsolutePath());
if (myInputFastqFileNames.length == 0 || myInputFastqFileNames == null) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No fastq files in: " + tempDirectory);
} else {
myLogger.info("setParameters: Using these fastq files:");
for (String filename : myInputFastqFileNames) {
myLogger.info("setParameters: found fastq: " + filename);
}
}
} else {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No input directory specified.");
}
myTOPMFilename = myArgsEngine.getString("-topm");
if ((myTOPMFilename == null) || (myTOPMFilename.length() == 0)) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Must define TOPM");
}
File tempFile = new File(myTOPMFilename);
if (!tempFile.exists()) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: The TOPM file doesn't exist: " + myTOPMFilename);
}
myKeyFilename = myArgsEngine.getString("-k");
if ((myKeyFilename == null) || (myKeyFilename.length() == 0)) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Must define Key File");
}
tempFile = new File(myKeyFilename);
if (!tempFile.exists()) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: The Key file doesn't exist: " + myKeyFilename);
}
myEnzyme = myArgsEngine.getString("-e");
if ((myEnzyme == null) || (myEnzyme.length() == 0)) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Must define TOPM");
}
myOutputDir = myArgsEngine.getString("-output");
if ((myOutputDir != null) && myOutputDir.length() != 0) {
File outputDirectory = new File(myOutputDir);
if (!outputDirectory.isDirectory()) {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: The output name you supplied is not a directory: " + myOutputDir);
}
} else {
printUsage();
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No output directory specified.");
}
if (myOutputDir.equals(tempDirectory)) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Output Directory should be different from Input Directory");
}
String tempChrs = myArgsEngine.getString("-chrs");
if ((tempChrs == null) || tempChrs.length() == 0) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No chromosomes specified.");
}
String[] tokenChrs = tempChrs.split(",");
myChrs = new int[tokenChrs.length];
for (int i = 0; i < tokenChrs.length; i++) {
try {
myChrs[i] = Integer.parseInt(tokenChrs[i]);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: chromosome not a number: " + tokenChrs[i]);
}
}
String tempStart = myArgsEngine.getString("-startPosition");
if ((tempStart == null) || tempStart.length() == 0) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No Start Position specified.");
}
try {
myStartPos = Integer.parseInt(tempStart);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Start Position not a number: " + tempStart);
}
String tempEnd = myArgsEngine.getString("-endPosition");
if ((tempEnd == null) || tempEnd.length() == 0) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: No End Position specified.");
}
try {
myEndPos = Integer.parseInt(tempEnd);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: End Position not a number: " + tempEnd);
}
if (myStartPos > myEndPos) {
throw new IllegalArgumentException("KeepSpecifiedReadsinFastqPlugin: setParameters: Start Position: " + myStartPos + " can't be larger than End Position: " + myEndPos);
}
}
private boolean isMatchingChr(int chr) {
for (int i = 0; i < myChrs.length; i++) {
if (myChrs[i] == chr) {
return true;
}
}
return false;
}
private ParseBarcodeRead getParseBarcodeRead(String fastqFile) {
String[] np = Utils.getFilename(fastqFile).split("_");
ParseBarcodeRead result;
if (np.length == 3) {
result = new ParseBarcodeRead(myKeyFilename, myEnzyme, np[0], np[1]);
} else if (np.length == 5) {
result = new ParseBarcodeRead(myKeyFilename, myEnzyme, np[1], np[3]);
} else if (np.length == 4) {
result = new ParseBarcodeRead(myKeyFilename, myEnzyme, np[0], np[2]);
} else if (np.length == 6) {
result = new ParseBarcodeRead(myKeyFilename, myEnzyme, np[1], np[3]);
} else {
myLogger.error("Error in parsing file name:");
myLogger.error(" The filename does not contain either 3 or 5 underscore-delimited values.");
myLogger.error(" Expect: flowcell_lane_fastq.txt OR code_flowcell_s_lane_fastq.txt");
myLogger.error(" Filename: " + fastqFile);
return null;
}
myLogger.info("Total barcodes found in lane:" + result.getBarCodeCount());
return result;
}
@Override
public ImageIcon getIcon() {
return null;
}
@Override
public String getButtonName() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String getToolTipText() {
throw new UnsupportedOperationException("Not supported yet.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy