com.hfg.bio.seq.format.FASTA Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.format;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.BioSequenceFactory;
import com.hfg.util.StringUtil;
import com.hfg.util.io.LettersOnlyReader;
import java.io.*;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//------------------------------------------------------------------------------
/**
FASTA sequence format.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class FASTA extends ReadableSeqFormatBase implements WritableSeqFormat
{
private Integer mLineLength = sDefaultLineLength;
private static int sDefaultLineLength = 75;
private static Pattern sHeaderLinePattern = Pattern.compile(">(\\S+)(?:\\s+(.*?))?");
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public FASTA()
{
super(null);
}
//---------------------------------------------------------------------------
public FASTA(BioSequenceFactory inSeqFactory)
{
super(inSeqFactory);
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public FASTA setLineLength(Integer inValue)
{
mLineLength = inValue;
return this;
}
//---------------------------------------------------------------------------
public Integer getLineLength()
{
return mLineLength;
}
//---------------------------------------------------------------------------
public T readRecord(BufferedReader inReader)
throws SeqIOException
{
if (null == getBioSequenceFactory())
{
throw new SeqIOException("No BioSequence factory has been specified!");
}
T seq;
try
{
seq = getBioSequenceFactory().createSeqObj();
String line;
while ((line = inReader.readLine()) != null)
{
// Skip comment lines.
if (line.startsWith("#")
|| line.startsWith("//"))
{
continue;
}
if (line.startsWith(">"))
{
line = line.trim();
if (seq.getID() != null)
{
throw new SeqIOException("Multiple header lines found in the sequence record!");
}
Matcher m = sHeaderLinePattern.matcher(line);
if (m.matches())
{
seq.setID(m.group(1));
seq.setDescription(m.group(2));
}
else
{
throw new SeqIOException("The header line" + StringUtil.singleQuote(line) + " is not in proper FASTA format!");
}
break;
}
}
// The rest of the record should be sequence
// Cleanup the sequence to remove spaces and numbers
Reader filterReader = new FASTASeqFilterReader(inReader);
seq.setSequence(filterReader);
filterReader.close();
}
catch (Exception e)
{
if (e instanceof SeqIOException)
{
throw (SeqIOException) e;
}
else
{
throw new SeqIOException(e);
}
}
return seq;
}
//---------------------------------------------------------------------------
public boolean isEndOfRecord(String inLine)
{
return inLine.startsWith(">");
}
//---------------------------------------------------------------------------
public boolean hasJanusDelimiter()
{
return true;
}
//---------------------------------------------------------------------------
public String write(Collection inSeqs)
throws SeqIOException
{
StringWriter writer = new StringWriter();
for (T seq : inSeqs)
{
write(seq, writer);
}
return writer.toString();
}
//---------------------------------------------------------------------------
public String write(T inSeq)
throws SeqIOException
{
StringWriter writer = new StringWriter();
write(inSeq, writer);
return writer.toString();
}
//---------------------------------------------------------------------------
public void write(T inSeq, OutputStream inStream)
throws SeqIOException
{
Writer writer = new OutputStreamWriter(inStream);
write(inSeq, writer);
try
{
writer.flush();
}
catch (Exception e)
{
throw new SeqIOException(e);
}
}
//---------------------------------------------------------------------------
public void write(T inSeq, Writer inWriter)
throws SeqIOException
{
Reader seqReader = null;
BufferedWriter writer = null;
try
{
try
{
if (writer instanceof BufferedWriter)
{
writer = (BufferedWriter) inWriter;
} else
{
writer = new BufferedWriter(inWriter, 8196);
}
// Write the header line
writer.write(">");
writer.write(inSeq.getID());
if (StringUtil.isSet(inSeq.getDescription()))
{
writer.write(" " + inSeq.getDescription());
}
writer.write("\n");
// Write the sequence lines
seqReader = inSeq.getSequenceReader();
// A null line length indicates that we should write the whole sequence on one line
int bufferSize = (mLineLength != null ? mLineLength : 2048);
char[] buffer = new char[bufferSize];
int numBytesRead;
while ((numBytesRead = seqReader.read(buffer)) != -1)
{
writer.write(buffer, 0, numBytesRead);
if (mLineLength != null)
{
writer.write("\n");
}
}
if (null == mLineLength)
{
writer.write("\n");
}
}
finally
{
if (seqReader != null)
{
seqReader.close();
}
if (writer != null)
{
writer.flush();
}
}
}
catch (SeqIOException e)
{
throw e;
}
catch (Exception e)
{
throw new SeqIOException(e);
}
}
private class FASTASeqFilterReader extends LettersOnlyReader
{
int mPrevChar = -1;
//---------------------------------------------------------------------------
public FASTASeqFilterReader(Reader inReader)
{
super(inReader);
}
//---------------------------------------------------------------------------
@Override
public int read()
throws IOException
{
int returnChar;
do
{
returnChar = innerRead();
if (mPrevChar == '\n'
&& returnChar == '>')
{
throw new SeqIOException("Multiple header lines found in the sequence record!");
}
if (returnChar != ' ')
{
mPrevChar = returnChar;
}
}
while (returnChar >= 0
&& (Character.isWhitespace(returnChar)
|| Character.isDigit(returnChar)));
return returnChar;
}
}
}