com.hfg.bio.phylogeny.JukesCantorModel Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.phylogeny;
import com.hfg.bio.seq.BioSequence;
//------------------------------------------------------------------------------
/**
Implementation of the Jukes-Cantor distance matrix calculation model.
It assumes that all changes are equally likely.
Jukes, T.H., Cantor, C.R. (1969). "Evolution of protein molecules".
In Munro, H.N.. Mammalian protein metabolism. New York: Academic Press. pp. 21-123.
For a quick overview, see
http://en.wikipedia.org/wiki/Jukes-Cantor_model
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class JukesCantorModel implements DistanceMatrixModel
{
//---------------------------------------------------------------------------
/**
Returns the model name as required by the DistanceMatrixModel interface.
*/
public String name()
{
return "Jukes-Cantor";
}
//---------------------------------------------------------------------------
/**
Calculates the Jukes-Cantor distance score for a pair of sequences.
The sequences must be aligned to be the same length.
*/
public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2)
{
if (inSeq1.length() != inSeq2.length())
{
throw new RuntimeException("The length of seq1 [" + inSeq1.length() + "] and seq2 [" + inSeq2.length() + "] don't match!");
}
int mismatches = 0;
int length = 0;
String seq1 = inSeq1.getSequence().toUpperCase();
String seq2 = inSeq2.getSequence().toUpperCase();
for (int i = 0; i < seq1.length(); i++)
{
char char1 = seq1.charAt(i);
char char2 = seq2.charAt(i);
if (char1 != char2) mismatches++;
// If both seqs are gapped at this position, don't count it against length.
if (! (char1 == '-' && char2 == '-')) length++;
}
return (float) (- (3/4f) * Math.log(1 - (4/3f) *(mismatches / (float) length)));
}
}