net.maizegenetics.taxa.tree.NewickUtils.kt Maven / Gradle / Ivy
package net.maizegenetics.taxa.tree
import net.maizegenetics.taxa.TaxaList
import org.apache.commons.lang.StringUtils
import org.apache.log4j.Logger
import java.lang.Double
import java.util.*
import kotlin.collections.HashMap
* These utilities are related to the Newick Tree Format.
private val myLogger = Logger.getLogger("net.maizegenetics.taxa.tree.NewickUtils")
* Creates a Tree from the given newick formatted file
fun read(filename: String): Tree {
val newick = File(filename).readLines().joinToString(separator = "") { it }
if (newick.count { it == '(' } != newick.count { it == ')' }) {
throw IllegalArgumentException("NewickUtils: read: $filename: number of open parenthesis doesn't match number of close parenthesis.")
if (newick.last() != ';') {
throw IllegalArgumentException("NewickUtils: read: $filename: doesn't end with semicolon.")
return SimpleTree(makeNode(newick.substring(0, newick.length - 1)))
* Creates a node in the tree recursively creates the children for that node.
private fun makeNode(newick: String): Node {
val nameBranchLength = newick.substringAfterLast(')')
// The node name comes before the :. If no : present, this is the name
val name = nameBranchLength.substringBefore(':').replace("'", "").let {
try {
// Names that are numbers are changed to empty string
// These are support values from 0-100, that can be ignored for our purposes
} catch (ne: NumberFormatException) {
// Newick format considers underscores to be spaces
it.replace("_", " ")
val branchLength = if (nameBranchLength.contains(':')) nameBranchLength.substringAfter(':').toDouble() else 0.0
val result = SimpleNode(name, branchLength)
if (nameBranchLength != newick) {
var parenthesisCount = 0
var currentStr = StringBuilder()
newick.substringAfter('(').substringBeforeLast(')').forEach {
when (it) {
'(' -> {
')' -> {
',' -> {
if (parenthesisCount == 0) {
currentStr = StringBuilder()
} else {
else -> currentStr.append(it)
if (currentStr.isNotEmpty()) result.addChild(makeNode(currentStr.toString()))
return result
* Writes give tree to Newick formatted file.
fun write(filename: String, tree: Tree, includeBranchLengths: Boolean = true) {
try {
File(filename).bufferedWriter().use { writer ->
write(tree.root, writer, includeBranchLengths)
} catch (e: Exception) {
myLogger.debug(e.message, e)
throw IllegalStateException("NewickUtils: write: problem writing: $filename.\n${e.message}")
* Recursively writes nodes and its children to a file
private fun write(node: Node, writer: BufferedWriter, includeBranchLengths: Boolean) {
if (!node.isLeaf) {
for (i in 0 until node.childCount) {
if (i != 0) writer.append(",")
write(node.getChild(i), writer, includeBranchLengths)
node.identifier?.name?.let {
if (it.isNotEmpty()) {
val name = it.replace(' ', '_')
if (includeBranchLengths && node.branchLength != 0.0) {
var lengthStr = "%.7f".format(node.branchLength)
lengthStr = StringUtils.stripEnd(lengthStr, "0")
lengthStr = StringUtils.stripEnd(lengthStr, ".")
private const val MERGE_ROOT_NODE = "MERGE_ROOT_NODE"
* This merges two or more trees into one tree.
* Nodes are merged when the names are the same.
* Example:
* (AA:2,BB:2,CC:2);
* merged with
* (XX:2,YY:2,ZZ:2)AA;
* produces
* (('XX':2.0000000,'YY':2.0000000,'ZZ':2.0000000)'AA':2.0000000,'BB':2.0000000,'CC':2.0000000);
fun mergeTrees(trees: List): Tree {
if (trees.size < 2) {
throw IllegalArgumentException("NewickUtils: mergeTrees: must supply at least 2 trees.")
val nameToNode = HashMap()
val nodeToNode = IdentityHashMap()
lateinit var rootNode: Node
trees.forEach { tree ->
val nodes = tree.nodes()
nodes.forEach { node ->
val nodeName =
val existingNode = nameToNode[nodeName]
val existingParentNode = nodeToNode[node.parent]
val existingParentName = existingParentNode?.identifier?.name
val parentName = node.parent?.identifier?.name
if (nodeToNode.isEmpty()) {
rootNode = SimpleNode(nodeName, node.branchLength)
nodeToNode[node] = rootNode
if (nodeName.isNotEmpty()) nameToNode[nodeName] = rootNode!!
} else if (existingNode != null) {
if (existingParentName != null && parentName != null && existingParentName != parentName) {
throw IllegalArgumentException("NewickUtils: mergeTrees: node: $nodeName has different parents ($parentName, ${existingParentName}) in different trees.")
if (existingNode.branchLength != node.branchLength) {
myLogger.warn("mergeTrees: nodes named: $nodeName in different trees have different branch lengths.")
nodeToNode[node] = existingNode
} else if (existingParentNode == null) {
val mergeRoot = if (nameToNode[MERGE_ROOT_NODE] != null) {
} else {
val temp = SimpleNode("", 0.0)
nameToNode[MERGE_ROOT_NODE] = temp
if (rootNode != mergeRoot) mergeRoot.addChild(rootNode)
val newChild = SimpleNode(nodeName, node.branchLength)
nodeToNode[node] = newChild
if (nodeName.isNotEmpty()) nameToNode[nodeName] = newChild
rootNode = mergeRoot
} else {
val newChild = SimpleNode(nodeName, node.branchLength)
nodeToNode[node] = newChild
if (nodeName.isNotEmpty()) nameToNode[nodeName] = newChild
return SimpleTree(rootNode)
* Extension function that returns all nodes of the tree
fun Tree.nodes(): List {
val result = mutableListOf()
addChildren(result, this.root)
return result
* Recursively adds children to the list
private fun addChildren(nodes: MutableList, node: Node) {
for (i in 0 until node.childCount) {
addChildren(nodes, node.getChild(i))
* Returns a tree that is a subset of the
* original containing only the taxa specified.
fun subsetTree(tree: Tree, taxaList: TaxaList): Tree {
val nameList = { }
return subsetTree(tree, nameList)
* Returns a tree that is a subset of the
* original containing only the names specified.
fun subsetTree(tree: Tree, namesToKeep: List): Tree {
return SimpleTree(keepNode(tree.root, namesToKeep))
* Recursively iterates over the tree looking for nodes
* that are specified by the list of names.
private fun keepNode(node: Node, namesToKeep: List): Node? {
if (node.isLeaf) {
node.identifier?.name?.let {
if (namesToKeep.contains(it)) {
return SimpleNode(node.identifier?.name, node.branchLength)
} else {
val childNodesToKeep = mutableListOf()
for (i in 0 until node.childCount) {
keepNode(node.getChild(i), namesToKeep)?.let {
when (childNodesToKeep.size) {
0 -> return null
1 -> {
childNodesToKeep[0].branchLength += node.branchLength
return childNodesToKeep[0]
else -> {
val parent = SimpleNode(node.identifier?.name, node.branchLength)
childNodesToKeep.forEach {
return parent
return null
* This removes the branch lengths from
* the given tree
fun removeBranchLengths(tree: Tree): Tree {
return SimpleTree(removeBranchLengths(tree.root))
* Recursively iterates over the tree removing
* branch lengths.
private fun removeBranchLengths(node: Node): Node {
val result = SimpleNode(node.identifier?.name, 0.0)
for (i in 0 until node.childCount) {
return result
* This converts tree node names based on the
* conversion file specified.
fun convertNames(tree: Tree, filename: String): Tree {
val conversions = mutableMapOf()
File(filename).bufferedReader().lines().forEach {
val temp = it.split("\t")
if (temp.size != 2) {
throw IllegalArgumentException("NewickUtils: convertNames: each line of file: $filename should have two names separated by a tab.\nOffending line: $it")
conversions[temp[0].trim().replace('_', ' ')] = temp[1].trim().replace('_', ' ')
return SimpleTree(convertNames(tree.root, conversions))
* Recursively iterates over the tree changing node names
* that are specified in the map
private fun convertNames(node: Node, conversions: Map): Node {
val name = conversions[node.identifier?.name] ?: node.identifier?.name
val result = SimpleNode(name, 0.0)
for (i in 0 until node.childCount) {
result.addChild(convertNames(node.getChild(i), conversions))
return result
© 2015 - 2025 Weber Informatics LLC | Privacy Policy