
biokotlin.kegg.Kegg.kt Maven / Gradle / Ivy
package biokotlin.kegg
import krangl.DataFrame
import krangl.deparseRecords
object Kegg {
/**
* Create a [KeggGene] based on the db and kid.
* Follows the rules of [KeggEntry.of] for accessing.
* E.g. gene("zma:542318"), gene("T01088:542318"), or gene("zma","542318")
*/
fun gene(vararg dbKid: String): KeggGene {
val ke = KeggEntry.of(*dbKid)
return ke.gene()
}
fun allGenes(orgCode: String): DataFrame {
require(KeggCache.isOrgCode(orgCode)) {"Org code: '$orgCode' is not valid"}
return KeggDB.genes.list(orgCode)
}
/**
* Create a [KeggPathway] based on the db and kid.
* Follows the rules of [KeggEntry.of] for accessing.
* E.g. pathway("path:zma00500"), pathway("path","zma00500")
*/
fun pathway(vararg dbKid: String): KeggPathway {
val ke = KeggEntry.of(*dbKid)
return ke.pathway()
}
fun allPathways(orgCode: String? = null): DataFrame {
require(if(orgCode==null) true else KeggCache.isOrgCode(orgCode)) {"Org code: '$orgCode' is not valid"}
return KeggDB.pathway.list(orgCode.orEmpty())
}
/**
* Create a [KeggOrtholog] based on the db and kid.
* Follows the rules of [KeggEntry.of] for accessing.
* E.g. pathway("ko:K01214"), pathway("K01214")
*/
fun ortholog(vararg dbKid: String): KeggOrtholog {
val ke = KeggEntry.of(*dbKid)
return ke.ortholog()
}
fun allOrthologs(): DataFrame {
return KeggDB.orthology.list("")
}
}
/**
* The set of supported Kegg DBs along with their [abbr] and [kidPrefix]
* Kegg data is generally accessed through a combination of , where
* each kid starts with a prefix.
*
* Organism specific data is prefaced by an codes.
*
*/
enum class KeggDB(val abbr: String, val kidPrefix: List) {
/**KEGG pathway maps*/
pathway("path", listOf("map", "ko", "ec", "rn", "")),
/**BRITE functional hierarchies*/
brite("br", listOf("br", "jp", "ko", "")),
/**KEGG modules*/
module("md", listOf("M", "_M")),
/**KO functional orthologs*/
orthology("ko", listOf("K")),
/**KEGG organisms*/
genome("gn", listOf("T")),
/**Genes in KEGG organisms - composite DBs - one for each species plus vg for virus*/
genes("", emptyList()),
/**Small molecules*/
compound("cpd", listOf("C")),
/**Glycan*/
glycan("gl", listOf("G")),
/**Biochemical reactions*/
reaction("rn", listOf("R")),
/**Reaction class*/
rclass("rc", listOf("RC")),
/**Enzyme nomenclature*/
enzyme("ec", emptyList()),
/**Network elements*/
network("ne", listOf("N")),
//
// variant,
// disease, drug, dgroup, environ, ligand,
/**Kegg all the DBs*/
kegg("kegg", emptyList()),
/**Organisms covered by the KEGG - not official DB*/
organism("org", emptyList())
;
/**Provides basic statistics on the Kegg Database*/
fun info(): String = KeggServer.query(KeggOperations.info, this.name)
/**Provides basic statistics on the Kegg Database*/
fun find(query: String): DataFrame {
return KeggServer.query(KeggOperations.find, this.name, query).lines()
.map { it.split("\t") }
.filter { it.size == 2 } //there is EOF line
.deparseRecords { mapOf("kid" to it[0], "name" to it[1]) }
//TODO might want to provide KeggEntry column
}
/**Provides basic statistics on the Kegg Database*/
fun list(query: String): DataFrame {
val adjQuery = when {
this == genes -> query
query.isEmpty() -> this.name
else -> "${this.name}/$query"
}
return KeggServer.query(KeggOperations.list, adjQuery).lines()
.map { it.split("\t") }
//
// Add it.size == 4 to handle the case where there are more columns in the output.
// I don't think the KEGG API has a standardized specification for the output of the list command.
// Example: https://rest.kegg.jp/list/zma
//
.filter { it.size == 2 || it.size == 4 } //there is EOF line
.deparseRecords { mapOf("dbKid" to it[0], "name" to it[1]) }
}
/**Query the Kegg Database provide the raw string reponse*/
fun getText(query: String): String = KeggServer.query(KeggOperations.get, query)
companion object{
/**Map to look up the [KeggDB] based on the KID prefix*/
internal val prefixToDB: Map = values()
.flatMap { db -> db.kidPrefix.map { it to db } }.toMap()
/**Map to look up the [KeggDB] based on the database abbreviation
* Does not include the organism DBs
* */
private val abbrToDB: Map = values().associateBy { it.abbr }
/**Evaluates whether the database abbreviation is valid*/
fun validDBAbbr(abbr: String) = abbrToDB.containsKey(abbr)
fun getKeggDB(abbr: String): KeggDB {
return abbrToDB.getOrElse(abbr){
if(KeggCache.isOrgCode(abbr)) genes
else error("Kegg Entry abbreviation $abbr is invalid")
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy