All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.format.FASTA Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.format;

import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.BioSequenceFactory;
import com.hfg.util.StringUtil;
import com.hfg.util.io.LettersOnlyReader;

import java.io.*;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

//------------------------------------------------------------------------------
/**
 FASTA sequence format.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class FASTA extends ReadableSeqFormatBase implements WritableSeqFormat { private Integer mLineLength = sDefaultLineLength; private static int sDefaultLineLength = 75; private static Pattern sHeaderLinePattern = Pattern.compile(">(\\S+)(?:\\s+(.*?))?"); //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public FASTA() { super(null); } //--------------------------------------------------------------------------- public FASTA(BioSequenceFactory inSeqFactory) { super(inSeqFactory); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public FASTA setLineLength(Integer inValue) { mLineLength = inValue; return this; } //--------------------------------------------------------------------------- public Integer getLineLength() { return mLineLength; } //--------------------------------------------------------------------------- public T readRecord(BufferedReader inReader) throws SeqIOException { if (null == getBioSequenceFactory()) { throw new SeqIOException("No BioSequence factory has been specified!"); } T seq; try { seq = getBioSequenceFactory().createSeqObj(); String line; while ((line = inReader.readLine()) != null) { // Skip comment lines. if (line.startsWith("#") || line.startsWith("//")) { continue; } if (line.startsWith(">")) { line = line.trim(); if (seq.getID() != null) { throw new SeqIOException("Multiple header lines found in the sequence record!"); } Matcher m = sHeaderLinePattern.matcher(line); if (m.matches()) { seq.setID(m.group(1)); seq.setDescription(m.group(2)); } else { throw new SeqIOException("The header line" + StringUtil.singleQuote(line) + " is not in proper FASTA format!"); } break; } } // The rest of the record should be sequence // Cleanup the sequence to remove spaces and numbers Reader filterReader = new FASTASeqFilterReader(inReader); seq.setSequence(filterReader); filterReader.close(); } catch (Exception e) { if (e instanceof SeqIOException) { throw (SeqIOException) e; } else { throw new SeqIOException(e); } } return seq; } //--------------------------------------------------------------------------- public boolean isEndOfRecord(String inLine) { return inLine.startsWith(">"); } //--------------------------------------------------------------------------- public boolean hasJanusDelimiter() { return true; } //--------------------------------------------------------------------------- public String write(Collection inSeqs) throws SeqIOException { StringWriter writer = new StringWriter(); for (T seq : inSeqs) { write(seq, writer); } return writer.toString(); } //--------------------------------------------------------------------------- public String write(T inSeq) throws SeqIOException { StringWriter writer = new StringWriter(); write(inSeq, writer); return writer.toString(); } //--------------------------------------------------------------------------- public void write(T inSeq, OutputStream inStream) throws SeqIOException { Writer writer = new OutputStreamWriter(inStream); write(inSeq, writer); try { writer.flush(); } catch (Exception e) { throw new SeqIOException(e); } } //--------------------------------------------------------------------------- public void write(T inSeq, Writer inWriter) throws SeqIOException { Reader seqReader = null; BufferedWriter writer = null; try { try { if (writer instanceof BufferedWriter) { writer = (BufferedWriter) inWriter; } else { writer = new BufferedWriter(inWriter, 8196); } // Write the header line writer.write(">"); writer.write(inSeq.getID()); if (StringUtil.isSet(inSeq.getDescription())) { writer.write(" " + inSeq.getDescription()); } writer.write("\n"); // Write the sequence lines seqReader = inSeq.getSequenceReader(); // A null line length indicates that we should write the whole sequence on one line int bufferSize = (mLineLength != null ? mLineLength : 2048); char[] buffer = new char[bufferSize]; int numBytesRead; while ((numBytesRead = seqReader.read(buffer)) != -1) { writer.write(buffer, 0, numBytesRead); if (mLineLength != null) { writer.write("\n"); } } if (null == mLineLength) { writer.write("\n"); } } finally { if (seqReader != null) { seqReader.close(); } if (writer != null) { writer.flush(); } } } catch (SeqIOException e) { throw e; } catch (Exception e) { throw new SeqIOException(e); } } private class FASTASeqFilterReader extends LettersOnlyReader { int mPrevChar = -1; //--------------------------------------------------------------------------- public FASTASeqFilterReader(Reader inReader) { super(inReader); } //--------------------------------------------------------------------------- @Override public int read() throws IOException { int returnChar; do { returnChar = innerRead(); if (mPrevChar == '\n' && returnChar == '>') { throw new SeqIOException("Multiple header lines found in the sequence record!"); } if (returnChar != ' ') { mPrevChar = returnChar; } } while (returnChar >= 0 && (Character.isWhitespace(returnChar) || Character.isDigit(returnChar))); return returnChar; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy