org.biojava.nbio.phylo.DistanceTreeEvaluator Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.phylo;
import org.forester.evoinference.matrix.distance.DistanceMatrix;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Check the accuracy of a Distance Tree by least squares error (LSE) of the
* Tree branch lengths and the original Distance Matrix.
*
* @author Scooter Willis
* @author Aleix Lafita
*
*/
public class DistanceTreeEvaluator {
private static final Logger logger = LoggerFactory
.getLogger(DistanceTreeEvaluator.class);
/** Prevent instantiation */
private DistanceTreeEvaluator() {
}
/**
* Evaluate the goodness of fit of a given tree to the original distance
* matrix. The returned value is the coefficient of variation, i.e. the
* square root of the LS error normalized by the mean.
*
* This measure can also give an estimate of the quality of the distance
* matrix, because a bad fit may mean that the distance is non-additive.
*
* @param tree
* Phylogenetic Distance Tree to evaluate
* @param matrix
* Distance Matrix with the original distances
* @return the square root of the average tree LS error normalized by the
* average tree distance (coefficient of variation, CV).
*/
public static double evaluate(Phylogeny tree, DistanceMatrix matrix) {
int numSequences = matrix.getSize();
List externalNodes = tree.getExternalNodes();
HashMap externalNodesHashMap = new HashMap();
Set path = new HashSet();
for (PhylogenyNode node : externalNodes) {
externalNodesHashMap.put(node.getName(), node);
}
int count = 0;
double averageMatrixDistance = 0.0;
double averageTreeDistance = 0.0;
double averageTreeErrorDistance = 0.0;
for (int row = 0; row < numSequences - 1; row++) {
String nodeName1 = matrix.getIdentifier(row);
PhylogenyNode node1 = externalNodesHashMap.get(nodeName1);
markPathToRoot(node1, path);
for (int col = row + 1; col < numSequences; col++) {
count++;
String nodeName2 = matrix.getIdentifier(col);
PhylogenyNode node2 = externalNodesHashMap.get(nodeName2);
double distance = matrix.getValue(col, row);
averageMatrixDistance = averageMatrixDistance + distance;
PhylogenyNode commonParent = findCommonParent(node2, path);
if (commonParent != null) {
double treeDistance = getNodeDistance(commonParent, node1)
+ getNodeDistance(commonParent, node2);
averageTreeDistance += treeDistance;
averageTreeErrorDistance += (distance - treeDistance)
* (distance - treeDistance);
logger.info("{} {} Distance: {}Tree: {} difference: {}",
nodeName1, nodeName2, distance, treeDistance,
Math.abs(distance - treeDistance));
} else {
logger.warn("Unable to find common parent with {} {}",
node1, node2);
}
}
path.clear();
}
averageMatrixDistance /= count;
averageTreeDistance /= count;
averageTreeErrorDistance /= count;
logger.info("Average matrix distance: {}", averageMatrixDistance);
logger.info("Average tree distance: {}", averageTreeDistance);
logger.info("Average LS error: {}", averageTreeErrorDistance);
return Math.sqrt(averageTreeErrorDistance) / averageMatrixDistance;
}
private static double getNodeDistance(PhylogenyNode parentNode,
PhylogenyNode childNode) {
double distance = 0.0;
while (childNode != parentNode) {
distance = distance + childNode.getDistanceToParent();
childNode = childNode.getParent();
}
return distance;
}
private static PhylogenyNode findCommonParent(PhylogenyNode node,
Set path) {
while (!path.contains(node)) {
node = node.getParent();
}
return node;
}
private static void markPathToRoot(PhylogenyNode node,
Set path) {
path.add(node);
while (!node.isRoot()) {
node = node.getParent();
path.add(node);
}
}
}