org.biojava.nbio.alignment.GuideTree Maven / Gradle / Ivy
The newest version!
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on July 1, 2010
* Author: Mark Chapman
*/
package org.biojava.nbio.alignment;
import org.biojava.nbio.core.alignment.SimpleProfile;
import org.biojava.nbio.alignment.template.GuideTreeNode;
import org.biojava.nbio.alignment.template.PairwiseSequenceScorer;
import org.biojava.nbio.core.alignment.template.Profile;
import org.biojava.nbio.core.alignment.template.ProfilePair;
import org.biojava.nbio.core.sequence.AccessionID;
import org.biojava.nbio.core.sequence.template.Compound;
import org.biojava.nbio.core.sequence.template.Sequence;
import org.biojava.nbio.phylo.ForesterWrapper;
import org.biojava.nbio.phylo.TreeConstructor;
import org.biojava.nbio.phylo.TreeConstructorType;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import javax.swing.tree.TreeNode;
import java.util.*;
import java.util.concurrent.Future;
/**
* Implements a data structure for a guide tree used during progressive multiple sequence alignment. Leaf
* {@link Node}s correspond to single {@link Sequence}s. Internal {@link Node}s correspond to multiple sequence
* alignments. The root {@link Node} corresponds to the full multiple sequence alignment.
*
* @author Mark Chapman
* @param each {@link Sequence} in the tree is of type S
* @param each element of a {@link Sequence} is a {@link Compound} of type C
*/
public class GuideTree, C extends Compound> implements Iterable> {
private List sequences;
private List> scorers;
private BasicSymmetricalDistanceMatrix distances;
private String newick;
private Node root;
/**
* Creates a guide tree for use during progressive multiple sequence alignment.
*
* @param sequences the {@link List} of {@link Sequence}s to align
* @param scorers list of sequence pair scorers, one for each pair of sequences given
*/
public GuideTree(List sequences, List> scorers) {
this.sequences = Collections.unmodifiableList(sequences);
this.scorers = Collections.unmodifiableList(scorers);
distances = new BasicSymmetricalDistanceMatrix(sequences.size());
for (int i = 0, n = 0; i < sequences.size(); i++) {
AccessionID id = sequences.get(i).getAccession();
String str = (id == null) ? Integer.toString(i + 1) : id.getID();
distances.setIdentifier(i, str);
for (int j = i+1; j < sequences.size(); j++) {
double dist = scorers.get(n++).getDistance();
distances.setValue(i, j, dist);
}
}
BasicSymmetricalDistanceMatrix distclone = ForesterWrapper.cloneDM(distances);
Phylogeny phylogeny = TreeConstructor.distanceTree(distclone, TreeConstructorType.NJ);
newick = phylogeny.toString();
root = new Node(phylogeny.getRoot(), null);
}
/**
* Returns a sequence pair score for all {@link Sequence} pairs in the given {@link List}.
*
* @return list of sequence pair scores
*/
public double[] getAllPairsScores() {
double[] scores = new double[scorers.size()];
int n = 0;
for (PairwiseSequenceScorer scorer : scorers) {
scores[n++] = scorer.getScore();
}
return scores;
}
/**
* Returns the distance matrix used to construct this guide tree. The scores have been normalized.
*
* @return the distance matrix used to construct this guide tree
*/
public double[][] getDistanceMatrix() {
double[][] matrix = new double[distances.getSize()][distances.getSize()];
for (int i = 0; i < matrix.length; i++) {
for (int j = i+1; j < matrix.length; j++) {
matrix[i][j] = matrix[j][i] = distances.getValue(i, j);
}
}
return matrix;
}
/**
* Returns the root {@link Node} which corresponds to the full multiple sequence alignment.
*
* @return the root node
*/
public Node getRoot() {
return root;
}
/**
* Returns the similarity matrix used to construct this guide tree. The scores have not been normalized.
*
* @return the similarity matrix used to construct this guide tree
*/
public double[][] getScoreMatrix() {
double[][] matrix = new double[sequences.size()][sequences.size()];
for (int i = 0, n = 0; i < matrix.length; i++) {
matrix[i][i] = scorers.get(i).getMaxScore();
for (int j = i+1; j < matrix.length; j++) {
matrix[i][j] = matrix[j][i] = scorers.get(n++).getScore();
}
}
return matrix;
}
/**
* Returns the {@link Sequence}s which make up the leaves of this tree.
*
* @return the sequences which make up the leaves of this tree
*/
public List getSequences() {
return sequences;
}
// method for Iterable
/**
* Returns a post-order {@link Iterator} that traverses the tree from leaves to root.
*/
@Override
public Iterator> iterator() {
return new PostOrderIterator();
}
// method from Object
@Override
public String toString() {
return newick;
}
/**
* Implements a data structure for the node in a guide tree used during progressive multiple sequence alignment.
*/
public class Node implements GuideTreeNode {
private GuideTreeNode parent, child1, child2;
private double distance;
private String name;
private boolean isLeaf, isVisited;
private Profile profile;
private Future> profileFuture;
private Node(PhylogenyNode node, Node parent) {
this.parent = parent;
distance = node.getDistanceToParent();
name = node.getName();
if(isLeaf = node.isExternal()) {
profile = new SimpleProfile<>(sequences.get(distances.getIndex(name)));
} else {
child1 = new Node(node.getChildNode1(), this);
child2 = new Node(node.getChildNode2(), this);
}
}
// methods for GuideTreeNode
@Override
public GuideTreeNode getChild1() {
return child1;
}
@Override
public GuideTreeNode getChild2() {
return child2;
}
@Override
public double getDistanceToParent() {
return distance;
}
@Override
public String getName() {
return name;
}
@Override
public Profile getProfile() {
return profile;
}
@Override
public Future> getProfileFuture() {
return profileFuture;
}
@Override
public void setProfile(Profile profile) {
this.profile = profile;
profileFuture = null;
}
@Override
public void setProfileFuture(Future> profileFuture) {
this.profileFuture = profileFuture;
profile = null;
}
// methods for TreeNode
@Override
public Enumeration> children() {
Vector> children = new Vector<>();
children.add(getChild1());
children.add(getChild2());
return children.elements();
}
@Override
public boolean getAllowsChildren() {
return !isLeaf();
}
@Override
public GuideTreeNode getChildAt(int childIndex) {
if (childIndex == 1) {
return getChild1();
} else if (childIndex == 2) {
return getChild2();
}
throw new IndexOutOfBoundsException();
}
@Override
public int getChildCount() {
return 2;
}
@Override
public int getIndex(TreeNode child) {
return getChildAt(1) == child ? 1 : (getChildAt(2) == child ? 2 : -1);
}
@Override
public GuideTreeNode getParent() {
return parent;
}
@Override
public boolean isLeaf() {
return isLeaf;
}
// helper methods for iterator
private void clearVisited() {
isVisited = false;
if (!isLeaf()) {
((Node) getChild1()).clearVisited();
((Node) getChild2()).clearVisited();
}
}
private boolean isVisited() {
return isVisited;
}
private void visit() {
isVisited = true;
}
}
// helper class that defines the default post-order (leaves to root) traversal
private class PostOrderIterator implements Iterator> {
private Stack nodes;
private PostOrderIterator() {
getRoot().clearVisited();
nodes = new Stack<>();
nodes.push(getRoot());
}
// methods for Iterator
@Override
public boolean hasNext() {
return !nodes.isEmpty();
}
@Override
public GuideTreeNode next() {
if(!hasNext()){
throw new NoSuchElementException();
}
while (hasNext()) {
Node next = nodes.peek(), child1 = (Node) next.getChild1(), child2 = (Node) next.getChild2();
if (child1 != null && !child1.isVisited()) {
nodes.push(child1);
} else if (child2 != null && !child2.isVisited()) {
nodes.push(child2);
} else {
next.visit();
return nodes.pop();
}
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}