net.maizegenetics.taxa.tree.TreeUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
// TreeUtils.java
//
// (c) 1999-2001 PAL Development Core Team
//
// This package may be distributed under the
// terms of the Lesser GNU General Public License (LGPL)
package net.maizegenetics.taxa.tree;
import java.io.IOException;
import net.maizegenetics.util.FormattedOutput;
import net.maizegenetics.stats.math.MersenneTwisterFast;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.taxa.Taxon;
import java.io.PrintWriter;
import java.io.Writer;
/**
* various utility functions on trees.
*
* @author Alexei Drummond
* @author Korbinian Strimmer
* @author Matthew Goode
*/
public class TreeUtils {
/**
* computes Robinson-Foulds (1981) distance between two trees
*
* @param t1 tree 1
* @param t2 tree 2
*
* Definition: Assuming that t1 is the reference tree, let fn be the false
* negatives, i.e. the number of edges in t1 missing in t2, and fp the
* number of false positives, i.e. the number of edges in t2 missing in t1.
* The RF distance is then (fn + fp)/2
*/
public static double getRobinsonFouldsDistance(Tree t1, Tree t2) {
SplitSystem s1 = SplitUtils.getSplits(t1);
return getRobinsonFouldsDistance(s1, t2);
}
/**
* computes Robinson-Foulds (1981) distance between two trees
*
* @param s1 tree 1 (as represented by a SplitSystem)
* @param t2 tree 2
*/
public static double getRobinsonFouldsDistance(SplitSystem s1, Tree t2) {
TaxaList idGroup = s1.getIdGroup();
SplitSystem s2 = SplitUtils.getSplits(idGroup, t2);
if (s1.getLabelCount() != s2.getLabelCount()) {
throw new IllegalArgumentException("Number of labels must be the same!");
}
int ns1 = s1.getSplitCount();
int ns2 = s2.getSplitCount();
// number of splits in t1 missing in t2
int fn = 0;
for (int i = 0; i < ns1; i++) {
if (!s2.hasSplit(s1.getSplit(i))) {
fn++;
}
}
// number of splits in t2 missing in t1
int fp = 0;
for (int i = 0; i < ns2; i++) {
if (!s1.hasSplit(s2.getSplit(i))) {
fp++;
}
}
return 0.5 * ((double) fp + (double) fn);
}
/**
* computes Robinson-Foulds (1981) distance between two trees rescaled to a
* number between 0 and 1
*
* @param t1 tree 1
* @param t2 tree 2
*/
public static double getRobinsonFouldsRescaledDistance(Tree t1, Tree t2) {
SplitSystem s1 = SplitUtils.getSplits(t1);
return getRobinsonFouldsRescaledDistance(s1, t2);
}
/**
* computes Robinson-Foulds (1981) distance between two trees rescaled to a
* number between 0 and 1
*
* @param s1 tree 1 (as represented by a SplitSystem)
* @param t2 tree 2
*/
public static double getRobinsonFouldsRescaledDistance(SplitSystem s1, Tree t2) {
return getRobinsonFouldsDistance(s1, t2) / (double) s1.getSplitCount();
}
private static MersenneTwisterFast random = new MersenneTwisterFast();
/**
* Returns a uniformly distributed random node from the tree, including both
* internal and external nodes.
*/
public static Node getRandomNode(Tree tree) {
int index = random.nextInt(tree.getExternalNodeCount() + tree.getInternalNodeCount());
if (index >= tree.getExternalNodeCount()) {
return tree.getInternalNode(index - tree.getExternalNodeCount());
} else {
return tree.getExternalNode(index);
}
}
/**
* @return the first found node that has a certain name (as determined by
* the nodes Taxon) in the tree defined by a root node.
* @param tree The Tree supposidly containing such a named node
* @param name The name of the node to find.
* @return The node with the name, or null if no such node exists
* @see net.maizegenetics.pal.taxa.Taxon , Node
*/
public static final Node getNodeByName(Tree tree, String name) {
return getNodeByName(tree.getRoot(), name);
}
/**
* @return the first found node that has a certain name (as determined by
* the nodes Taxon) in the tree defined by a root node.
* @param root The root node of a tree
* @param name The name of the node to find.
* @return The node with the name, or null if no such node exists
* @see net.maizegenetics.pal.taxa.Taxon , Node
*/
public static final Node getNodeByName(Node root, String name) {
if (root.getIdentifier().getName().equals(name)) {
return root;
}
for (int i = 0; i < root.getChildCount(); i++) {
Node result = getNodeByName(root.getChild(i), name);
if (result != null) {
return result;
}
}
return null;
}
/**
* Rotates branches by leaf count. WARNING: assumes binary tree!
*/
public static void rotateByLeafCount(Tree tree) {
rotateByLeafCount(tree.getRoot());
}
/**
* get list of the identifiers of the external nodes
*
* @return leaf identifier group
*/
public static final TaxaList getLeafIdGroup(Tree tree) {
tree.createNodeList();
TaxaListBuilder labelList = new TaxaListBuilder();
for (int i = 0; i < tree.getExternalNodeCount(); i++) {
labelList.add(tree.getExternalNode(i).getIdentifier());
}
return labelList.build();
}
/**
* print a this tree in New Hampshire format (including distances and
* internal labels)
*
* @param out output stream
*/
public static void printNH(Tree tree, PrintWriter out) throws IOException {
printNH(tree, out, true, true);
}
/**
* print this tree in New Hampshire format
*
* @param out output stream
* @param printLengths boolean variable determining whether branch lengths
* should be included in output
* @param printInternalLabels boolean variable determining whether internal
* labels should be included in output
*/
public static void printNH(Tree tree, PrintWriter out,
boolean printLengths, boolean printInternalLabels) throws IOException {
NodeUtils.printNH(out, tree.getRoot(),
printLengths, printInternalLabels);
out.println(";");
}
/**
* a tree that has been re-rooted at the given internal node. The original
* root of the tree must have had at least 3 children.
*/
public static void reroot(Tree tree, Node node) {
reroot(node);
tree.setRoot(node);
}
/**
* Return node with the highest average minimum and maximum distance. Ed
* thinks this is roughly the midpoint - need to look this up
*
* @param tree
* @return
*/
public static Node findMidpointNode(Tree tree) {
Node midNode = null;
double maxAvgDist = -1;
for (int i = 0; i < tree.getInternalNodeCount(); i++) {
Node iNode = tree.getInternalNode(i);
double[] d = NodeUtils.getPathLengthInfo(iNode);
double avg = (d[0] + d[1]) / 2.0;
if (maxAvgDist < avg) {
maxAvgDist = avg;
midNode = iNode;
}
}
return midNode;
}
/*
* compute distance of external node a to all other leaves
* (computational complexity of this method is only O(n), following
* D.Bryant and P. Wadell. 1998. MBE 15:1346-1359)
*
* @param tree tree
* @param a node
* @param dist array for the node-to-node distance distances
* @param idist array for the distance between a and all internal nodes
* @param countEdges boolean variable deciding whether the actual
* branch lengths are used in computing the distance
* or whether simply all edges larger or equal a certain
* threshold length are counted (each with weight 1.0)
* @param epsilon minimum branch length for a which an edge is counted
*/
public static void computeAllDistances(Tree tree,
int a, double[] dist, double[] idist,
boolean countEdges, double epsilon) {
tree.createNodeList();
dist[a] = 0.0;
Node node = tree.getExternalNode(a);
computeNodeDist(node, node.getParent(), dist, idist, countEdges, epsilon);
}
private static void computeNodeDist(Node origin, Node center,
double[] dist, double[] idist,
boolean countEdges, double epsilon) {
int indexCenter = center.getNumber();
int indexOrigin = origin.getNumber();
double[] distCenter;
double[] distOrigin;
if (center.isLeaf()) {
distCenter = dist;
} else {
distCenter = idist;
}
if (origin.isLeaf()) {
distOrigin = dist;
} else {
distOrigin = idist;
}
double len;
double tmp;
if (origin.getParent() == center) {
// center is parent of origin
tmp = origin.getBranchLength();
} else {
// center is child of origin
tmp = center.getBranchLength();
}
if (countEdges) // count all edges >= epsilon
{
if (tmp < epsilon) {
len = 0.0;
} else {
len = 1.0;
}
} else // use branch lengths
{
len = tmp;
}
distCenter[indexCenter] = distOrigin[indexOrigin] + len;
if (!center.isLeaf()) {
for (int i = 0; i < center.getChildCount(); i++) {
Node c = center.getChild(i);
if (c != origin) {
computeNodeDist(center, c, dist, idist, countEdges, epsilon);
}
}
if (!center.isRoot()) {
Node p = center.getParent();
if (p != origin) {
computeNodeDist(center, p, dist, idist, countEdges, epsilon);
}
}
}
}
private static Node[] path;
/**
* compute distance between two external nodes
*
* @param tree tree
* @param a external node 1
* @param b external node 2
*
* @return distance between node a and b
*/
public static final double computeDistance(Tree tree, int a, int b) {
tree.createNodeList();
int maxLen = tree.getInternalNodeCount() + 1;
if (path == null || path.length < maxLen) {
path = new Node[maxLen];
}
// len might be different from path.length
int len = findPath(tree, a, b);
double dist = 0.0;
for (int i = 0; i < len; i++) {
dist += path[i].getBranchLength();
}
return dist;
}
// Find path between external nodes a and b
// After calling this method path contains all nodes
// with edges lying between a and b (including a and b)
// (note that the node lying on the intersection of a-root
// and b-root is NOT contained because this node does
// not contain a branch of the path)
// The length of the path is also returned
private static final int findPath(Tree tree, int a, int b) {
// clean path
for (int i = 0; i < path.length; i++) {
path[i] = null;
}
// path from node a to root
Node node = tree.getExternalNode(a);
int len = 0;
path[len] = node;
len++;
while (!node.isRoot()) {
node = node.getParent();
path[len] = node;
len++;
}
// find intersection with path from node b to root
Node stopNode = null;
node = tree.getExternalNode(b);
while (!node.isRoot()) {
node = node.getParent();
int pos = findInPath(node);
if (pos != -1) {
len = pos;
stopNode = node;
break;
}
}
// fill rest of path
node = tree.getExternalNode(b);
path[len] = node;
len++;
node = node.getParent();
while (node != stopNode) {
path[len] = node;
len++;
node = node.getParent();
}
// clean rest
for (int i = len; i < path.length; i++) {
path[i] = null;
}
return len;
}
private static final int findInPath(Node node) {
for (int i = 0; i < path.length; i++) {
if (path[i] == node) {
return i;
} else if (path[i] == null) {
return -1;
}
}
return -1;
}
/**
* Rotates branches by leaf count. WARNING: assumes binary tree!
*/
private static void rotateByLeafCount(Node node) {
if (!node.isLeaf()) {
if (NodeUtils.getLeafCount(node.getChild(0))
> NodeUtils.getLeafCount(node.getChild(1))) {
Node temp = node.getChild(0);
node.removeChild(0);
node.addChild(temp);
}
for (int i = 0; i < node.getChildCount(); i++) {
rotateByLeafCount(node.getChild(i));
}
}
}
public static void report(Tree tree, Writer out) throws IOException {
printASCII(tree, out);
out.write("\n");
branchInfo(tree, out);
out.write("\n");
heightInfo(tree, out);
}
private static FormattedOutput format;
private static double proportion;
private static int minLength;
private static boolean[] umbrella;
private static int[] position;
private static int numExternalNodes;
private static int numInternalNodes;
private static int numBranches;
// Print picture of current tree in ASCII
private static void printASCII(Tree tree, Writer out) throws IOException {
format = FormattedOutput.getInstance();
tree.createNodeList();
numExternalNodes = tree.getExternalNodeCount();
numInternalNodes = tree.getInternalNodeCount();
numBranches = numInternalNodes + numExternalNodes - 1;
umbrella = new boolean[numExternalNodes];
position = new int[numExternalNodes];
minLength = (Integer.toString(numBranches)).length() + 1;
int MAXCOLUMN = 40;
Node root = tree.getRoot();
if (root.getNodeHeight() == 0.0) {
NodeUtils.lengths2Heights(root);
}
proportion = (double) MAXCOLUMN / root.getNodeHeight();
for (int n = 0; n < numExternalNodes; n++) {
umbrella[n] = false;
}
position[0] = 1;
for (int i = root.getChildCount() - 1; i > -1; i--) {
printNodeInASCII(out, root.getChild(i), 1, i, root.getChildCount());
if (i != 0) {
putCharAtLevel(out, 0, '|');
out.write("\n");
}
}
}
// Print branch information
private static void branchInfo(Tree tree, Writer out) throws IOException {
//
// CALL PRINTASCII FIRST !!!
//
// check if some SE values differ from the default zero
boolean showSE = false;
for (int i = 0; i < numExternalNodes && showSE == false; i++) {
if (tree.getExternalNode(i).getBranchLengthSE() != 0.0) {
showSE = true;
}
if (i < numInternalNodes - 1) {
if (tree.getInternalNode(i).getBranchLengthSE() != 0.0) {
showSE = true;
}
}
}
format.displayIntegerWhite(out, numExternalNodes);
out.write(" Length ");
if (showSE) {
out.write("S.E. ");
}
out.write("Label ");
if (numInternalNodes > 1) {
format.displayIntegerWhite(out, numBranches);
out.write(" Length ");
if (showSE) {
out.write("S.E. ");
}
out.write("Label");
}
out.write("\n");
for (int i = 0; i < numExternalNodes; i++) {
format.displayInteger(out, i + 1, numExternalNodes);
out.write(" ");
format.displayDecimal(out, tree.getExternalNode(i).getBranchLength(), 5);
out.write(" ");
if (showSE) {
format.displayDecimal(out, tree.getExternalNode(i).getBranchLengthSE(), 5);
out.write(" ");
}
format.displayLabel(out, tree.getExternalNode(i).getIdentifier().getName(), 10);
if (i < numInternalNodes - 1) {
format.multiplePrint(out, ' ', 5);
format.displayInteger(out, i + 1 + numExternalNodes, numBranches);
out.write(" ");
format.displayDecimal(out, tree.getInternalNode(i).getBranchLength(), 5);
out.write(" ");
if (showSE) {
format.displayDecimal(out, tree.getInternalNode(i).getBranchLengthSE(), 5);
out.write(" ");
}
format.displayLabel(out, tree.getInternalNode(i).getIdentifier().getName(), 10);
}
out.write("\n");
}
}
// Print height information
private static void heightInfo(Tree tree, Writer out) throws IOException {
//
// CALL PRINTASCII FIRST
//
if (tree.getRoot().getNodeHeight() == 0.0) {
NodeUtils.lengths2Heights(tree.getRoot());
}
// check if some SE values differ from the default zero
//boolean showSE = false;
//for (int i = 0; i < numInternalNodes && showSE == false; i++)
//{
// if (tree.getInternalNode(i).getNodeHeightSE() != 0.0)
// {
// showSE = true;
// }
//}
format.displayIntegerWhite(out, numExternalNodes);
out.write(" Height ");
format.displayIntegerWhite(out, numBranches);
out.write(" Height ");
//if (showSE) out.print("S.E.");
out.write("\n");
for (int i = 0; i < numExternalNodes; i++) {
format.displayInteger(out, i + 1, numExternalNodes);
out.write(" ");
format.displayDecimal(out, tree.getExternalNode(i).getNodeHeight(), 7);
out.write(" ");
if (i < numInternalNodes) {
format.multiplePrint(out, ' ', 5);
if (i == numInternalNodes - 1) {
out.write("R");
format.multiplePrint(out, ' ', Integer.toString(numBranches).length() - 1);
} else {
format.displayInteger(out, i + 1 + numExternalNodes, numBranches);
}
out.write(" ");
format.displayDecimal(out, tree.getInternalNode(i).getNodeHeight(), 7);
out.write(" ");
}
out.write("\n");
}
}
private static void printNodeInASCII(Writer out, Node node, int level, int m, int maxm) throws IOException {
position[level] = (int) (node.getBranchLength() * proportion);
if (position[level] < minLength) {
position[level] = minLength;
}
if (node.isLeaf()) // external branch
{
if (m == maxm - 1) {
umbrella[level - 1] = true;
}
printlnNodeWithNumberAndLabel(out, node, level);
if (m == 0) {
umbrella[level - 1] = false;
}
} else // internal branch
{
for (int n = node.getChildCount() - 1; n > -1; n--) {
printNodeInASCII(out, node.getChild(n), level + 1, n, node.getChildCount());
if (m == maxm - 1 && n == node.getChildCount() / 2) {
umbrella[level - 1] = true;
}
if (n != 0) {
if (n == node.getChildCount() / 2) {
printlnNodeWithNumberAndLabel(out, node, level);
} else {
for (int i = 0; i < level + 1; i++) {
if (umbrella[i]) {
putCharAtLevel(out, i, '|');
} else {
putCharAtLevel(out, i, ' ');
}
}
out.write("\n");
}
}
if (m == 0 && n == node.getChildCount() / 2) {
umbrella[level - 1] = false;
}
}
}
}
private static void printlnNodeWithNumberAndLabel(Writer out, Node node, int level) throws IOException {
for (int i = 0; i < level - 1; i++) {
if (umbrella[i]) {
putCharAtLevel(out, i, '|');
} else {
putCharAtLevel(out, i, ' ');
}
}
putCharAtLevel(out, level - 1, '+');
int branchNumber;
if (node.isLeaf()) {
branchNumber = node.getNumber() + 1;
} else {
branchNumber = node.getNumber() + 1 + numExternalNodes;
}
String numberAsString = Integer.toString(branchNumber);
int numDashs = position[level] - numberAsString.length();
for (int i = 0; i < numDashs; i++) {
out.write('-');
}
out.write(numberAsString);
if (node.isLeaf()) {
out.write(" " + node.getIdentifier() + "\n");
} else {
if (!node.getIdentifier().equals(Taxon.ANONYMOUS)) {
out.write("(" + node.getIdentifier() + ")");
}
out.write("\n");
}
}
private static void putCharAtLevel(Writer out, int level, char c) throws IOException {
int n = position[level] - 1;
for (int i = 0; i < n; i++) {
out.write(' ');
}
out.write(c);
}
/**
* make given node the root node
*
* @param node new root node
*/
private static void reroot(Node node) {
if (node.isRoot() || node.isLeaf()) {
return;
}
if (!node.getParent().isRoot()) {
reroot(node.getParent());
}
// Now the parent of node is root
if (node.getParent().getChildCount() < 3) {
// Rerooting not possible
return;
}
// Exchange branch label, length et cetera
NodeUtils.exchangeInfo(node.getParent(), node);
// Rearrange topology
Node parent = node.getParent();
NodeUtils.removeChild(parent, node);
node.addChild(parent);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy