com.hfg.bio.seq.translation.NucleicAcidTranslator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.translation;
import com.hfg.bio.seq.NucleicAcid;
import com.hfg.bio.seq.Protein;
import com.hfg.bio.Strand;
import com.hfg.bio.seq.ProteinFactory;
import com.hfg.util.StringUtil;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
//------------------------------------------------------------------------------
/**
Translates nucleic acid sequences into protein.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class NucleicAcidTranslator
{
private TranslationTable mTranslationTable = StandardTranslationTable.getInstance();
private ProteinFactory mProteinFactory = new ProteinFactory();
public static final String FRAME_ATT = "translation frame";
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public NucleicAcidTranslator()
{
}
//---------------------------------------------------------------------------
public NucleicAcidTranslator(TranslationTable inTranslationTable)
{
mTranslationTable = inTranslationTable;
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public NucleicAcidTranslator setProteinFactory(ProteinFactory inValue)
{
mProteinFactory = inValue;
return this;
}
//---------------------------------------------------------------------------
public Protein translate(NucleicAcid inSeq, TranslationFrame inFrame)
throws TranslationException
{
Protein protein = mProteinFactory.createSeqObj();
protein.setID(inSeq.getID());
protein.setDescription(inSeq.getDescription());
protein.setAttribute(FRAME_ATT, inFrame);
try
{
if (0 == inSeq.length())
{
throw new RuntimeException("The nucleic acid " + StringUtil.singleQuote(inSeq.getID()) + " doesn't have a sequence!");
}
Reader naReader = (inFrame.getStrand().equals(Strand.PLUS) ? inSeq.getSequenceReader() : inSeq.getReverseComplementSequenceReader());
naReader.skip(inFrame.getOffset());
TranslationReader translationReader = new TranslationReader(naReader);
protein.setSequence(translationReader);
}
catch (Exception e)
{
throw new TranslationException("Problem during translation of " + inSeq.getID() + " in frame " + inFrame + "!", e);
}
return protein;
}
//###########################################################################
// INNER CLASS
//###########################################################################
// In order to be be able to efficiently deal with large nucleotide sequences,
// the translation operation is performed in a Reader.
private class TranslationReader extends FilterReader
{
private char[] mAABuffer = new char[1024];
private char[] mCodonBuffer = new char[3];
private int mBufferIndex = 0;
private int mBufferLimit = 0;
private boolean mEndOfStream;
//------------------------------------------------------------------------
protected TranslationReader(Reader reader)
{
super(reader);
}
//------------------------------------------------------------------------
@Override
public int read()
throws IOException
{
if (mBufferIndex >= mBufferLimit
&& ! mEndOfStream)
{
fillBuffer();
}
return (mBufferIndex < mBufferLimit ? mAABuffer[mBufferIndex++] : -1);
}
//------------------------------------------------------------------------
@Override
public int read(char[] inBuffer, int inOffset, int inLength)
throws IOException
{
int numChars = 0;
int bufferIndex = inOffset;
int nextChar;
do
{
nextChar = read();
if (nextChar > 0)
{
inBuffer[bufferIndex++] = (char) nextChar;
numChars++;
}
} while (nextChar >= 0
&& numChars < inLength);
return (-1 == nextChar && 0 == numChars ? -1 : numChars);
}
//------------------------------------------------------------------------
private void fillBuffer()
throws IOException
{
int bufferIndex = 0;
while (bufferIndex < mAABuffer.length)
{
int nucleotidesRead = in.read(mCodonBuffer, 0, 3);
if (-1 == nucleotidesRead)
{
mEndOfStream = true;
break;
}
else if (nucleotidesRead < 3)
{
// We didn't get a full codon.
int newNucleotidesRead = in.read(mCodonBuffer, nucleotidesRead, 3 - nucleotidesRead);
if (-1 == newNucleotidesRead)
{
mEndOfStream = true;
break;
}
nucleotidesRead += newNucleotidesRead;
if (nucleotidesRead < 3)
{
throw new TranslationException("Problem reading a full codon!");
}
}
String codonString = new String(mCodonBuffer);
char aa;
if (codonString.contains("-"))
{
aa = '-';
}
else
{
aa = mTranslationTable.translateCodon(new String(mCodonBuffer));
}
mAABuffer[bufferIndex++] = aa;
}
mBufferIndex = 0;
mBufferLimit = bufferIndex;
}
}
}