com.hfg.bio.phylogeny.UPGMA Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.phylogeny;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import com.hfg.network.Edge;
//------------------------------------------------------------------------------
/**
* UPGMA (unweighted pair-group method using arithmetic averages) method of
* phylogenetic tree construction.
*
* See wikipedia.
* Note that distances in the resulting tree will not exactly match those from
* the input distance matrix.
*
* @author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class UPGMA implements TreeMethod
{
//**************************************************************************
// CONSTRUCTORS
//**************************************************************************
//---------------------------------------------------------------------------
public UPGMA()
{
}
//**************************************************************************
// PUBLIC METHODS
//**************************************************************************
//--------------------------------------------------------------------------
@Override
public String toString()
{
return getClass().getSimpleName();
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
return (inObj2 != null
&& inObj2.getClass().equals(getClass()));
}
//--------------------------------------------------------------------------
public NewickTree constructTree(DistanceMatrix inDistanceMatrix)
{
int nodeIndex = 1;
Map nodeMap = new HashMap<>();
DistanceMatrix matrix = inDistanceMatrix;
if (! matrix.isConsumable())
{
matrix = matrix.clone();
}
while (matrix.keySet().size() > 1)
{
Edge shortestEdge = matrix.getShortestEdge();
String minKey1 = shortestEdge.getFrom();
String minKey2 = shortestEdge.getTo();
float minDistance = shortestEdge.getDistance();
String newNodeName = "_" + (nodeIndex++);
PhyloNode newNode = new PhyloNode();
nodeMap.put(newNodeName, newNode);
if (! nodeMap.containsKey(minKey1))
{
PhyloNode childNode = new PhyloNode().setLabel(minKey1);
newNode.addEdge(childNode, minDistance / 2);
}
else
{
float distance = (minDistance / 2) - nodeMap.get(minKey1).getMaxDistanceToLeaf();
newNode.addEdge(nodeMap.get(minKey1), distance);
nodeMap.remove(minKey1);
}
if (! nodeMap.containsKey(minKey2))
{
PhyloNode childNode = new PhyloNode().setLabel(minKey2);
newNode.addEdge(childNode, minDistance / 2);
}
else
{
float distance = (minDistance / 2) - nodeMap.get(minKey2).getMaxDistanceToLeaf();
newNode.addEdge(nodeMap.get(minKey2), distance);
nodeMap.remove(minKey2);
}
// Reduce the matrix
matrix.addKey(newNodeName);
for (String key : matrix.keySet())
{
if (key.equals(minKey1) || key.equals(minKey2) || key.equals(newNodeName)) continue;
float avgDistance = (matrix.getDistance(minKey1, key) + matrix.getDistance(minKey2, key)) / 2;
matrix.setDistance(newNodeName, key, avgDistance);
}
// It is (slightly) faster to remove both keys at once
Set keys = new HashSet<>(2);
keys.add(minKey1);
keys.add(minKey2);
matrix.removeKeys(keys);
}
NewickTree tree = new NewickTree();
tree.setRootNode(nodeMap.get(matrix.keySet().iterator().next()));
tree.orderByNodeCount();
matrix.setIsConsumed();
return tree;
}
}