All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.phylo.DistanceTreeEvaluator Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.phylo;

import org.forester.evoinference.matrix.distance.DistanceMatrix;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * Check the accuracy of a Distance Tree by least squares error (LSE) of the
 * Tree branch lengths and the original Distance Matrix.
 *
 * @author Scooter Willis
 * @author Aleix Lafita
 *
 */
public class DistanceTreeEvaluator {

	private static final Logger logger = LoggerFactory
			.getLogger(DistanceTreeEvaluator.class);

	/** Prevent instantiation */
	private DistanceTreeEvaluator() {
	}

	/**
	 * Evaluate the goodness of fit of a given tree to the original distance
	 * matrix. The returned value is the coefficient of variation, i.e. the
	 * square root of the LS error normalized by the mean.
	 * 

* This measure can also give an estimate of the quality of the distance * matrix, because a bad fit may mean that the distance is non-additive. * * @param tree * Phylogenetic Distance Tree to evaluate * @param matrix * Distance Matrix with the original distances * @return the square root of the average tree LS error normalized by the * average tree distance (coefficient of variation, CV). */ public static double evaluate(Phylogeny tree, DistanceMatrix matrix) { int numSequences = matrix.getSize(); List externalNodes = tree.getExternalNodes(); HashMap externalNodesHashMap = new HashMap(); Set path = new HashSet(); for (PhylogenyNode node : externalNodes) { externalNodesHashMap.put(node.getName(), node); } int count = 0; double averageMatrixDistance = 0.0; double averageTreeDistance = 0.0; double averageTreeErrorDistance = 0.0; for (int row = 0; row < numSequences - 1; row++) { String nodeName1 = matrix.getIdentifier(row); PhylogenyNode node1 = externalNodesHashMap.get(nodeName1); markPathToRoot(node1, path); for (int col = row + 1; col < numSequences; col++) { count++; String nodeName2 = matrix.getIdentifier(col); PhylogenyNode node2 = externalNodesHashMap.get(nodeName2); double distance = matrix.getValue(col, row); averageMatrixDistance = averageMatrixDistance + distance; PhylogenyNode commonParent = findCommonParent(node2, path); if (commonParent != null) { double treeDistance = getNodeDistance(commonParent, node1) + getNodeDistance(commonParent, node2); averageTreeDistance += treeDistance; averageTreeErrorDistance += (distance - treeDistance) * (distance - treeDistance); logger.info("{} {} Distance: {}Tree: {} difference: {}", nodeName1, nodeName2, distance, treeDistance, Math.abs(distance - treeDistance)); } else { logger.warn("Unable to find common parent with {} {}", node1, node2); } } path.clear(); } averageMatrixDistance /= count; averageTreeDistance /= count; averageTreeErrorDistance /= count; logger.info("Average matrix distance: {}", averageMatrixDistance); logger.info("Average tree distance: {}", averageTreeDistance); logger.info("Average LS error: {}", averageTreeErrorDistance); return Math.sqrt(averageTreeErrorDistance) / averageMatrixDistance; } private static double getNodeDistance(PhylogenyNode parentNode, PhylogenyNode childNode) { double distance = 0.0; while (childNode != parentNode) { distance = distance + childNode.getDistanceToParent(); childNode = childNode.getParent(); } return distance; } private static PhylogenyNode findCommonParent(PhylogenyNode node, Set path) { while (!path.contains(node)) { node = node.getParent(); } return node; } private static void markPathToRoot(PhylogenyNode node, Set path) { path.add(node); while (!node.isRoot()) { node = node.getParent(); path.add(node); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy