All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.phylogeny.NJ Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.phylogeny;

import com.hfg.network.Edge;

import java.util.Map;
import java.util.HashMap;
import java.util.Collection;
import java.util.Iterator;

//------------------------------------------------------------------------------
/**
 * NJ (Neighbor-Joining) method of phylogenetic tree construction.
 * First described by Saitou N, Nei M (1987). "The neighbor-joining method: a new
 * method for reconstructing phylogenetic trees". Mol Biol Evol 4 (4): 406-425.
 * See wikipedia.
 * Note that distances in the resulting tree will not exactly match those from
 * the input distance matrix.
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public class NJ implements TreeMethod
{

   //**************************************************************************
   // PUBLIC METHODS
   //**************************************************************************

   //--------------------------------------------------------------------------
   @Override
   public String toString()
   {
      return getClass().getSimpleName();
   }

   //--------------------------------------------------------------------------
   @Override
   public boolean equals(Object inObj2)
   {
      return (inObj2 != null
            && inObj2.getClass().equals(getClass()));
   }

   //--------------------------------------------------------------------------
   public PhylogeneticTree constructTree(DistanceMatrix inDistanceMatrix)
   {
      int nodeIndex = 1;
      Map nodeMap = new HashMap<>();

      // Start with a star tree
      //         A
      //    F    |    B
      //      \  |  /
      //       \ | /
      //        \|/
      //        /|\
      //       / | \
      //      /  |  \
      //    E    |    C
      //         D
      //

      PhyloNode centerNode = new PhyloNode();
      for (String otu : inDistanceMatrix.keySet())
      {
         PhyloNode otuNode = new PhyloNode().setLabel(otu);
         nodeMap.put(otu, otuNode);
         centerNode.addEdge(otuNode, null);
      }

      // Clone the distance matrix so we can consume it
      DistanceMatrix matrix = inDistanceMatrix.clone();

      while (matrix.numKeys() > 2)
      {
         Map netDivergenceMap = calculateNetDivergenceMap(matrix);
         DistanceMatrix qMatrix = calculateQMatrix(matrix, netDivergenceMap);

         // Note: OTU stands for 'Operational Taxonomic Unit'

         Edge shortestEdge = qMatrix.getShortestEdge();
         String minOTU1 = shortestEdge.getFrom();
         String minOTU2 = shortestEdge.getTo();
         float  minDistance = matrix.getDistance(minOTU1, minOTU2);

         String newNodeName = "_" + (nodeIndex++);
         PhyloNode newNode = new PhyloNode().setLabel(newNodeName);
         centerNode.addEdge(newNode, null);
         nodeMap.put(newNodeName, newNode);

         float distance1 = minDistance / 2
                           + (netDivergenceMap.get(minOTU1) - netDivergenceMap.get(minOTU2)) / (2 * (matrix.numKeys() - 2));
         float distance2 = minDistance - distance1;

         // Deal with Negative branch lengths
         // From : http://www.deduveinstitute.be/~opperd/private/neighbor.html
         // "As the neighbor-joining algorithm seeks to represent the data in the form of an additive tree, it can assign
         // a negative length to the branch. Here the interpretation of branch lengths as an estimated number of substitutions
         // gets into difficulties. When this occurs it is adviced to set the branch length to zero and transfer the difference
         // to the adjacent branch length so that the total distance between an adjacent pair of terminal nodes remains
         // unaffected. This does not alter the overall topology of the tree (Kuhner and Felsenstein, 1994)."
         if (distance1 < 0)
         {
            distance2 += -distance1;
            distance1 = 0;
         }

         PhyloNode node1 = nodeMap.get(minOTU1);
         node1.removeEdgeFrom(centerNode);
         newNode.addEdge(node1, distance1);
         nodeMap.remove(node1.getLabel());

         PhyloNode node2 = nodeMap.get(minOTU2);
         node2.removeEdgeFrom(centerNode);
         newNode.addEdge(node2, distance2);
         nodeMap.remove(node2.getLabel());

         matrix.addKey(newNodeName);
         for (String key : matrix.keySet())
         {
            if (key.equals(minOTU1) || key.equals(minOTU2) || key.equals(newNodeName)) continue;
            float distance = (matrix.getDistance(key, minOTU1) + matrix.getDistance(key, minOTU2) - minDistance) / 2;
            matrix.setDistance(newNodeName, key, distance);
         }
         matrix.removeKey(minOTU1);
         matrix.removeKey(minOTU2);
      }

      // Now remove the center node and connect the last two nodes.
      for (PhyloNode node : nodeMap.values())
      {
         node.removeEdgeFrom(centerNode);
      }

      Iterator iter = nodeMap.keySet().iterator();
      String otu1 = iter.next();
      String otu2 = iter.next();

      nodeMap.get(otu1).addEdge(nodeMap.get(otu2), matrix.getDistance(otu1, otu2));

      PhylogeneticTree tree = new PhylogeneticTree();

      PhyloNode startingNode = nodeMap.get(otu1);
      if (nodeMap.get(otu2).getEdges().size() > startingNode.getEdges().size())
      {
         startingNode = nodeMap.get(otu2);
      }

      tree.setRootNode(startingNode);

//      tree.rootTreeByMidpointMethod();
//      tree.orderByNodeCount();

      return tree;
   }

   //**************************************************************************
   // PRIVATE METHODS
   //**************************************************************************

   //--------------------------------------------------------------------------
   private static DistanceMatrix calculateQMatrix(DistanceMatrix inDistanceMatrix, Map inNetDivergenceMap)
   {
      DistanceMatrix qMatrix = new DistanceMatrix();

      Collection matrixKeys = inDistanceMatrix.keySet();
      int N = matrixKeys.size();
      for (String key : matrixKeys)
      {
         float netDivergence1 = inNetDivergenceMap.get(key);
         for (String key2 : matrixKeys)
         {
            if (! key.equals(key2))
            {
               float distance = inDistanceMatrix.getDistance(key, key2) - (netDivergence1 + inNetDivergenceMap.get(key2)) / (float) (N - 2);
               qMatrix.setDistance(key, key2, distance);
            }
         }
      }

      return qMatrix;
   }


   //--------------------------------------------------------------------------
   private static Map calculateNetDivergenceMap(DistanceMatrix inDistanceMatrix)
   {
      Map netDivergenceMap = new HashMap<>(inDistanceMatrix.numKeys());

      Collection matrixKeys = inDistanceMatrix.keySet();
      for (String key : matrixKeys)
      {
         float netDivergence = inDistanceMatrix.getNetDivergence(key);

         netDivergenceMap.put(key, netDivergence);
      }

      return netDivergenceMap;
   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy