com.hfg.bio.seq.translation.CodonTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.translation;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import com.hfg.bio.Nucleotide;
import com.hfg.bio.taxonomy.ncbi.NCBITaxon;
import com.hfg.exception.ProgrammingException;
import com.hfg.math.Range;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;
import com.hfg.util.StringUtil;
import com.hfg.util.io.StreamUtil;
import com.hfg.xml.HfgXML;
import com.hfg.xml.XMLName;
import com.hfg.xml.XMLTag;
//------------------------------------------------------------------------------
/**
Table that maps codons to their respective amino acids.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class CodonTable implements TranslationTable
{
// XML Tag names
public static final XMLName XML_CODON_TABLE = new XMLName("CodonTable", HfgXML.HFG_NAMESPACE);
// XML Attribute names
public static final XMLName XML_NAME_ATT = new XMLName("name", HfgXML.HFG_NAMESPACE);
public static final XMLName XML_SPECIES_ATT = new XMLName("species", HfgXML.HFG_NAMESPACE);
public static final CodonTable HUMAN = new CodonTable("codondata/human.cod.xml.gz");
public static final CodonTable MOUSE = new CodonTable("codondata/mouse.cod.xml.gz");
private static final String sKazusaUrlTemplate = "http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=%d&aa=1&style=N";
private static final Pattern sKazusaLinePattern = Pattern.compile("\\s*(\\w{3})\\s+([\\w\\*])\\s+([\\.\\d]+)\\s+([\\.\\d]+)\\s+\\(\\s*(\\d+)\\)");
private static Random sRandom = new Random(System.currentTimeMillis());
private String mName;
private NCBITaxon mTaxon;
private Set mCodons = new OrderedSet<>(65);
private Map mCodonToAAMap = new HashMap<>(65);
private Map> mAAToCodonsMap = new HashMap<>(26);
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public CodonTable(NCBITaxon inSpecies)
{
mTaxon = inSpecies;
}
//---------------------------------------------------------------------------
public CodonTable(XMLTag inXMLTag)
{
fromXMLTag(inXMLTag);
init();
}
//---------------------------------------------------------------------------
protected CodonTable(String inRsrcPath)
{
InputStream stream = getClass().getResourceAsStream(inRsrcPath);
if (null == stream)
{
throw new ProgrammingException("The rsrc " + inRsrcPath + " couldn't be found!?");
}
try
{
if (inRsrcPath.endsWith(".gz"))
{
stream = new GZIPInputStream(stream);
}
fromXMLTag(new XMLTag(stream));
StreamUtil.close(stream);
}
catch (Exception e)
{
throw new ProgrammingException(e);
}
init();
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public String name()
{
return mName;
}
//---------------------------------------------------------------------------
private CodonTable setName(String inValue)
{
mName = inValue;
return this;
}
//---------------------------------------------------------------------------
/**
Dynamically retrieves codon usage data from the Codon Usage Database
(http://www.kazusa.or.jp/codon/)
@throws Exception if the data cannot be retrieved
*/
public void retrieveDataFromKazusa()
throws Exception
{
URL url = new URL(String.format(sKazusaUrlTemplate, mTaxon.getTaxonId()));
URLConnection connection = url.openConnection();
parseKazusaData(connection.getInputStream());
}
//---------------------------------------------------------------------------
public XMLTag toXMLTag()
{
XMLTag tag = new XMLTag(XML_CODON_TABLE);
if (name() != null)
{
tag.setAttribute(XML_NAME_ATT, name());
}
if (mTaxon != null)
{
tag.setAttribute(XML_SPECIES_ATT, mTaxon.getScientificName());
}
if (mCodons != null)
{
for (Codon codon : mCodons)
{
tag.addSubtag(codon.toXMLTag());
}
}
return tag;
}
//---------------------------------------------------------------------------
public CodonTable addCodon(Codon inValue)
{
mCodons.add(inValue);
mCodonToAAMap.put(inValue, inValue.getAA());
Set codons = mAAToCodonsMap.get(inValue.getAA());
if (null == codons)
{
codons = new HashSet<>(10);
mAAToCodonsMap.put(inValue.getAA(), codons);
}
codons.add(inValue);
return this;
}
//---------------------------------------------------------------------------
/**
Can be used to retrieve the internal Codon object which can contain the AA mapping and codon usage data.
@param inCodon the string representation of the Codon object to retrieve
@return the Codon object corresponding to the specified nucleotide triplet
*/
public Codon getCodon(String inCodon)
{
Codon queryCodon = new Codon(inCodon);
Codon requestedCodon = null;
for (Codon codon : mCodons)
{
if (codon.equals(queryCodon))
{
requestedCodon = codon;
break;
}
}
return requestedCodon;
}
//--------------------------------------------------------------------------
public Set getCodons()
{
return Collections.unmodifiableSet(mCodons);
}
//---------------------------------------------------------------------------
public Set getCodonsForAA(char inAA)
{
return mAAToCodonsMap.get(inAA);
}
//---------------------------------------------------------------------------
/**
Returns a weighted randomly selected codon for the specified amino acid.
@param inAA the amino acid character for which codons should be selected
@return the randomly selected Codon
*/
public Codon getCodonForAA_viaWeightedSelection(char inAA)
{
return getCodonForAA_viaWeightedSelection(inAA, 0);
}
//---------------------------------------------------------------------------
/**
Returns a weighted randomly selected codon meeting a specified minimum usage bias
for the specified amino acid.
@param inAA the amino acid character for which codons should be selected
@param inMinBias the minimum usage bias (frequency) for codons to be considered
@return the randomly selected Codon
*/
public Codon getCodonForAA_viaWeightedSelection(char inAA, float inMinBias)
{
// Force to uppercase
char aa = Character.toUpperCase(inAA);
float totalWeight = 0.0f;
Set codons = mAAToCodonsMap.get(aa);
if (! CollectionUtil.hasValues(codons))
{
throw new RuntimeException("No codons specified for amino acid " + StringUtil.singleQuote(inAA) + "!");
}
for (Codon codon : codons)
{
if (codon.getCodonUsage().getBias() >= inMinBias)
{
totalWeight += codon.getCodonUsage().getBias();
}
}
float randomNum = sRandom.nextFloat();
Codon selectedCodon = null;
Range valueRange = new Range().setStart(0f);
for (Codon codon : codons)
{
if (codon.getCodonUsage().getBias() >= inMinBias)
{
valueRange.setEnd(valueRange.getStart() + (codon.getCodonUsage().getBias() / totalWeight));
if (valueRange.contains(randomNum))
{
selectedCodon = codon;
break;
}
valueRange.setStart(valueRange.getEnd());
}
}
return selectedCodon;
}
//---------------------------------------------------------------------------
public char translateCodon(String inCodon)
{
return translateCodon(new Codon(inCodon));
}
//---------------------------------------------------------------------------
public char translateCodon(Codon inCodon)
{
Character aa = mCodonToAAMap.get(inCodon);
return (aa != null ? aa : 'X');
}
//---------------------------------------------------------------------------
public boolean containsCodonUsageData()
{
boolean result = false;
if (mAAToCodonsMap.size() > 0)
{
result = (mAAToCodonsMap.values().iterator().next().iterator().next().getCodonUsage() != null);
}
return result;
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
// Calculate degenerate codons
private void init()
{
for (Character aa : mAAToCodonsMap.keySet())
{
Set codonSet = mAAToCodonsMap.get(aa);
Map>> codonNucleotideMap = new HashMap<>(2);
for (Codon codon : codonSet)
{
Nucleotide pos1Nuc = Nucleotide.valueOf(codon.toString().charAt(0));
Nucleotide pos2Nuc = Nucleotide.valueOf(codon.toString().charAt(1));
Nucleotide pos3Nuc = Nucleotide.valueOf(codon.toString().charAt(2));
Map> pos2Map = codonNucleotideMap.get(pos1Nuc);
if (null == pos2Map)
{
pos2Map = new HashMap<>(3);
codonNucleotideMap.put(pos1Nuc, pos2Map);
}
Set pos3Set = pos2Map.get(pos2Nuc);
if (null == pos3Set)
{
pos3Set = new HashSet<>(4);
pos2Map.put(pos2Nuc, pos3Set);
}
pos3Set.add(pos3Nuc);
}
for (Nucleotide pos1Nuc : codonNucleotideMap.keySet())
{
Map> pos2Map = codonNucleotideMap.get(pos1Nuc);
for (Nucleotide pos2Nuc : pos2Map.keySet())
{
Set pos3Set = pos2Map.get(pos2Nuc);
for (Nucleotide degenerateNuc : Nucleotide.degenerateValues())
{
boolean matches = true;
for (Nucleotide base : degenerateNuc.getDegeneracy())
{
if (!pos3Set.contains(base))
{
matches = false;
break;
}
}
if (matches)
{
Codon degenerateCodon = new Codon(("" + pos1Nuc.getOneLetterCode() + pos2Nuc.getOneLetterCode() + degenerateNuc.getOneLetterCode()).toUpperCase());
// Not calling addCodon() because we don't want to add degenerate codons to mAAToCodonMap.
mCodons.add(degenerateCodon);
mCodonToAAMap.put(degenerateCodon, aa);
}
}
}
}
}
}
//---------------------------------------------------------------------------
private void fromXMLTag(XMLTag inXMLTag)
{
inXMLTag.verifyTagName(XML_CODON_TABLE);
String name = inXMLTag.getAttributeValue(XML_NAME_ATT);
if (StringUtil.isSet(name))
{
setName(name);
}
String species = inXMLTag.getAttributeValue(XML_SPECIES_ATT);
if (StringUtil.isSet(species))
{
// TODO: Use the taxon id instead?
Set taxons = NCBITaxon.getByName(species);
if (CollectionUtil.hasValues(taxons))
{
mTaxon = taxons.iterator().next();
}
}
List codonTags = inXMLTag.getSubtagsByName(Codon.XML_CODON);
if (CollectionUtil.hasValues(codonTags))
{
for (XMLTag codonTag : codonTags)
{
addCodon(new Codon(codonTag));
}
}
if (containsCodonUsageData())
{
// Is bias data present?
CodonUsage codonUsage = mCodonToAAMap.keySet().iterator().next().getCodonUsage();
if (null == codonUsage.getBias())
{
calculateBiasValues();
}
}
}
//---------------------------------------------------------------------------
private void clearCodonData()
{
mCodons.clear();
mAAToCodonsMap.clear();
mCodonToAAMap.clear();
}
//---------------------------------------------------------------------------
private void parseKazusaData(InputStream inContent)
throws Exception
{
clearCodonData();
BufferedReader reader = new BufferedReader(new InputStreamReader(inContent));
String line;
while ((line = reader.readLine()) != null)
{
line = line.trim();
if (StringUtil.isSet(line))
{
Matcher m = sKazusaLinePattern.matcher(line);
int offset = 0;
while (m.find(offset))
{
Codon codon = new Codon(m.group(1)).setAA(m.group(2).charAt(0));
CodonUsage usage = new CodonUsage();
usage.setBias(Float.parseFloat(m.group(3)));
usage.setFreqPer1000(Float.parseFloat(m.group(4)));
usage.setNumber(Integer.parseInt(m.group(5)));
codon.setCodonUsage(usage);
addCodon(codon);
if (m.end() + 1 < line.length())
{
offset = m.end() + 1;
}
else
{
break;
}
}
}
}
reader.close();
if (mCodons.size() != 64)
{
throw new RuntimeException("Problem retrieving codon data. " + mCodons.size() + " codons were parsed instead of 64!");
}
}
//---------------------------------------------------------------------------
private void calculateBiasValues()
{
for (Character aa : mAAToCodonsMap.keySet())
{
Set codons = mAAToCodonsMap.get(aa);
int totalNum = 0;
for (Codon codon : codons)
{
totalNum += codon.getCodonUsage().getNumber();
}
for (Codon codon : codons)
{
float bias = codon.getCodonUsage().getNumber() / (float) totalNum;
codon.getCodonUsage().setBias(bias);
}
}
}
}