com.hfg.bio.phylogeny.NJ Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.phylogeny;
import com.hfg.network.Edge;
import java.util.Map;
import java.util.HashMap;
import java.util.Collection;
import java.util.Iterator;
//------------------------------------------------------------------------------
/**
* NJ (Neighbor-Joining) method of phylogenetic tree construction.
* First described by Saitou N, Nei M (1987). "The neighbor-joining method: a new
* method for reconstructing phylogenetic trees". Mol Biol Evol 4 (4): 406-425.
* See wikipedia.
* Note that distances in the resulting tree will not exactly match those from
* the input distance matrix.
* @author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class NJ implements TreeMethod
{
//**************************************************************************
// PUBLIC METHODS
//**************************************************************************
//--------------------------------------------------------------------------
@Override
public String toString()
{
return getClass().getSimpleName();
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
return (inObj2 != null
&& inObj2.getClass().equals(getClass()));
}
//--------------------------------------------------------------------------
public NewickTree constructTree(DistanceMatrix inDistanceMatrix)
{
int nodeIndex = 1;
Map nodeMap = new HashMap<>();
// Start with a star tree
// A
// F | B
// \ | /
// \ | /
// \|/
// /|\
// / | \
// / | \
// E | C
// D
//
PhyloNode centerNode = new PhyloNode();
for (String otu : inDistanceMatrix.keySet())
{
PhyloNode otuNode = new PhyloNode().setLabel(otu);
nodeMap.put(otu, otuNode);
centerNode.addEdge(otuNode, null);
}
// Clone the distance matrix so we can consume it
DistanceMatrix matrix = inDistanceMatrix.clone();
while (matrix.keySet().size() > 2)
{
Map netDivergenceMap = calculateNetDivergenceMap(matrix);
DistanceMatrix qMatrix = calculateQMatrix(matrix, netDivergenceMap);
// Note: OTU stands for 'Operational Taxonomic Unit'
Edge shortestEdge = qMatrix.getShortestEdge();
String minOTU1 = shortestEdge.getFrom();
String minOTU2 = shortestEdge.getTo();
float minDistance = matrix.getDistance(minOTU1, minOTU2);
String newNodeName = "_" + (nodeIndex++);
PhyloNode newNode = new PhyloNode().setLabel(newNodeName);
centerNode.addEdge(newNode, null);
nodeMap.put(newNodeName, newNode);
float distance1 = minDistance / 2
+ (netDivergenceMap.get(minOTU1) - netDivergenceMap.get(minOTU2)) / (2 * (matrix.keySet().size() - 2));
float distance2 = minDistance - distance1;
// Deal with Negative branch lengths
// From : http://www.deduveinstitute.be/~opperd/private/neighbor.html
// "As the neighbor-joining algorithm seeks to represent the data in the form of an additive tree, it can assign
// a negative length to the branch. Here the interpretation of branch lengths as an estimated number of substititions
// gets into difficulties. When this occurs it is adviced to set the branch length to zero and transfer the difference
// to the adjacent branch length so that the total distance between an adjacent pair of terminal nodes remains
// unaffected. This does not alter the overall topology of the tree (Kuhner and Felsenstein, 1994)."
if (distance1 < 0)
{
distance2 += -distance1;
distance1 = 0;
}
PhyloNode node1 = nodeMap.get(minOTU1);
node1.removeEdgeFrom(centerNode);
newNode.addEdge(node1, distance1);
nodeMap.remove(node1.getLabel());
PhyloNode node2 = nodeMap.get(minOTU2);
node2.removeEdgeFrom(centerNode);
newNode.addEdge(node2, distance2);
nodeMap.remove(node2.getLabel());
matrix.addKey(newNodeName);
for (String key : matrix.keySet())
{
if (key.equals(minOTU1) || key.equals(minOTU2) || key.equals(newNodeName)) continue;
float distance = (matrix.getDistance(key, minOTU1) + matrix.getDistance(key, minOTU2) - minDistance) / 2;
matrix.setDistance(newNodeName, key, distance);
}
matrix.removeKey(minOTU1);
matrix.removeKey(minOTU2);
}
// Now remove the center node and connect the last two nodes.
for (PhyloNode node : nodeMap.values())
{
node.removeEdgeFrom(centerNode);
}
Iterator iter = nodeMap.keySet().iterator();
String otu1 = iter.next();
String otu2 = iter.next();
nodeMap.get(otu1).addEdge(nodeMap.get(otu2), matrix.getDistance(otu1, otu2));
NewickTree tree = new NewickTree();
PhyloNode startingNode = nodeMap.get(otu1);
if (nodeMap.get(otu2).getEdges().size() > startingNode.getEdges().size())
{
startingNode = nodeMap.get(otu2);
}
tree.setRootNode(startingNode);
// tree.rootTreeByMidpointMethod();
// tree.orderByNodeCount();
return tree;
}
//**************************************************************************
// PRIVATE METHODS
//**************************************************************************
//--------------------------------------------------------------------------
private static DistanceMatrix calculateQMatrix(DistanceMatrix inDistanceMatrix, Map inNetDivergenceMap)
{
DistanceMatrix qMatrix = new DistanceMatrix();
Collection matrixKeys = inDistanceMatrix.keySet();
int N = matrixKeys.size();
for (String key : matrixKeys)
{
float netDivergence1 = inNetDivergenceMap.get(key);
for (String key2 : matrixKeys)
{
if (! key.equals(key2))
{
float distance = inDistanceMatrix.getDistance(key, key2) - (netDivergence1 + inNetDivergenceMap.get(key2)) / (float) (N - 2);
qMatrix.setDistance(key, key2, distance);
}
}
}
return qMatrix;
}
//--------------------------------------------------------------------------
private static Map calculateNetDivergenceMap(DistanceMatrix inDistanceMatrix)
{
Map netDivergenceMap = new HashMap<>(inDistanceMatrix.size());
Collection matrixKeys = inDistanceMatrix.keySet();
for (String key : matrixKeys)
{
float netDivergence = 0;
for (String key2 : matrixKeys)
{
if (key.equals(key2)) continue;
netDivergence += inDistanceMatrix.getDistance(key, key2);
}
netDivergenceMap.put(key, netDivergence);
}
return netDivergenceMap;
}
}