All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.phylogeny.JukesCantorModel Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.phylogeny;

import com.hfg.bio.seq.BioSequence;

//------------------------------------------------------------------------------
/**
 Implementation of the Jukes-Cantor distance matrix calculation model.
 It assumes that all changes are equally likely.
 

Jukes, T.H., Cantor, C.R. (1969). "Evolution of protein molecules". In Munro, H.N.. Mammalian protein metabolism. New York: Academic Press. pp. 21-123.

For a quick overview, see http://en.wikipedia.org/wiki/Jukes-Cantor_model

@author J. Alex Taylor, hairyfatguy.com */ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class JukesCantorModel implements DistanceMatrixModel { //--------------------------------------------------------------------------- /** Returns the model name as required by the DistanceMatrixModel interface. */ public String name() { return "Jukes-Cantor"; } //--------------------------------------------------------------------------- /** Calculates the Jukes-Cantor distance score for a pair of sequences. The sequences must be aligned to be the same length. */ public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2) { if (inSeq1.length() != inSeq2.length()) { throw new RuntimeException("The length of seq1 [" + inSeq1.length() + "] and seq2 [" + inSeq2.length() + "] don't match!"); } int mismatches = 0; int length = 0; String seq1 = inSeq1.getSequence().toUpperCase(); String seq2 = inSeq2.getSequence().toUpperCase(); for (int i = 0; i < seq1.length(); i++) { char char1 = seq1.charAt(i); char char2 = seq2.charAt(i); if (char1 != char2) mismatches++; // If both seqs are gapped at this position, don't count it against length. if (! (char1 == '-' && char2 == '-')) length++; } return (float) (- (3/4f) * Math.log(1 - (4/3f) *(mismatches / (float) length))); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy