All Downloads are FREE. Search and download functionalities are using the official Maven repository.

demo.DemoDistanceTree Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package demo;

import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.Map;

import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.MultipleSequenceAlignment;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
import org.biojava.nbio.phylo.DistanceMatrixCalculator;
import org.biojava.nbio.phylo.DistanceTreeEvaluator;
import org.biojava.nbio.phylo.ForesterWrapper;
import org.biojava.nbio.phylo.TreeConstructor;
import org.biojava.nbio.phylo.TreeConstructorType;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.matrix.distance.DistanceMatrix;
import org.forester.phylogeny.Phylogeny;

/**
 * This demo contains the CookBook example to create a phylogenetic tree from a
 * multiple sequence alignment (MSA).
 *
 * @author Scooter Willis
 * @author Aleix Lafita
 *
 */
public class DemoDistanceTree {

	public static void main(String[] args) throws Exception {

		// 0. This is just to load an example MSA from a FASTA file
		InputStream inStream = TreeConstructor.class
				.getResourceAsStream("/PF00104_small.fasta");

		FastaReader fastaReader =
				new FastaReader<>(
				inStream,
				new GenericFastaHeaderParser(),
				new ProteinSequenceCreator(AminoAcidCompoundSet
						.getAminoAcidCompoundSet()));

		Map proteinSequences =
				fastaReader.process();

		inStream.close();

		MultipleSequenceAlignment msa =
				new MultipleSequenceAlignment<>();

		for (ProteinSequence proteinSequence : proteinSequences.values()) {
			msa.addAlignedSequence(proteinSequence);
		}

		long readT = System.currentTimeMillis();

		// 1. Calculate the evolutionary distance matrix (can take long)
		SubstitutionMatrix M = SubstitutionMatrixHelper
				.getBlosum62();
		DistanceMatrix DM = DistanceMatrixCalculator
				.dissimilarityScore(msa, M);

		// 2. Construct a distance tree using the NJ algorithm
		Phylogeny phylo = TreeConstructor.distanceTree(
				(BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ);

		long treeT = System.currentTimeMillis();
		String newick = ForesterWrapper.getNewickString(phylo, true);
		System.out.println(newick);
		System.out.println("Tree Construction: " + (treeT - readT) + " ms.");

		// 3. Evaluate the goodness of fit of the tree
		double cv = DistanceTreeEvaluator.evaluate(phylo, DM);
		System.out.println("CV of the tree: " + (int) (cv * 100) + " %");

	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy