All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.brapi.BrAPIConnection.kt Maven / Gradle / Ivy

package net.maizegenetics.analysis.brapi

import khttp.get
import khttp.responses.Response
import kotlinx.serialization.json.*
import net.maizegenetics.dna.factor.FeatureTable
import net.maizegenetics.dna.factor.FeatureTableBuilder
import net.maizegenetics.dna.map.Chromosome
import net.maizegenetics.dna.map.GenomicFeature
import net.maizegenetics.dna.map.GenomicFeatureList
import net.maizegenetics.gui.basicLoggingInfo
import net.maizegenetics.taxa.TaxaList
import net.maizegenetics.taxa.TaxaListBuilder
import net.maizegenetics.taxa.Taxon
import net.maizegenetics.util.setupDebugLogging
import org.apache.logging.log4j.LogManager
import kotlin.system.measureNanoTime


class BrAPIConnection(val baseURL: String) {

    private val logger = LogManager.getLogger(BrAPIConnection::class.java)

    /**
     * Returns list of variant sets available from BrAPI Server.
     *
     * Example: http://cbsudc01.biohpc.cornell.edu/brapi/v2/variantsets
     */
    fun getVariantsets(): List? {

        val url = "$baseURL/variantsets"
        logger.info("getVariantsets: query: $url")
        val json = Json.parseToJsonElement(get(url).text)

        val variantSetsArray = json.jsonObject["result"]?.jsonObject?.get("data")?.jsonArray

        return variantSetsArray
                ?.map { variantset(it.jsonObject) }
                ?.toList()

    }

    /**
     * Returns the variant set information for the given id.
     *
     * Example: http://cbsudc01.biohpc.cornell.edu/brapi/v2/variantsets/Ames_MergedReadMapping_AllLines_Haploid
     */
    fun getVariantset(id: String): VariantSet? {

        val url = "$baseURL/variantsets/$id"
        logger.info("getVariantset: query: $url")
        val json = Json.parseToJsonElement(get(url).text)

        return json.jsonObject["result"]?.jsonObject?.let {
            variantset(it)
        }

    }

    private fun variantset(json: JsonObject): VariantSet {
        return VariantSet(json["variantSetDbId"].toString(), json["variantCount"].toString().toInt(), json["callSetCount"].toString().toInt())
    }

    /**
     * Returns a list of genomic features (i.e. position ranges))
     * for the given variant set id.
     *
     * Example: http://cbsudc01.biohpc.cornell.edu/brapi/v2/variantsets/Ames_MergedReadMapping_AllLines_Haploid/variants
     */
    fun getVariants(id: String): GenomicFeatureList {

        val url = "$baseURL/variantsets/$id/variants"
        logger.info("getVariants: query: $url")
        val json = Json.parseToJsonElement(get(url, timeout = 0.0).text)

        val builder = GenomicFeatureList.Builder()
        json.jsonObject["result"]?.jsonObject?.get("data")?.jsonArray
                ?.forEach { builder.add(genomicFeature(it.jsonObject)) }

        val result = builder.build()

        logger.info("getVariants: number of features: ${result.size}")

        return result

    }

    private fun genomicFeature(json: JsonElement): GenomicFeature {
        val chromosome = Chromosome.instance(getStringAttribute(json, "referenceName"))
        val startPos = json.jsonObject["start"].toString().toInt()
        val endPos = json.jsonObject["end"].toString().toInt()
        val name = getStringFirstArrayElement(json, "variantNames")
        val id = getStringAttribute(json, "variantDbId")
        return GenomicFeature(chromosome, startPos, chromosome, endPos, name, id)
    }

    /**
     * Returns a list of taxa for the given variant set id.
     *
     * Example: http://cbsudc01.biohpc.cornell.edu/brapi/v2/variantsets/Ames_MergedReadMapping_AllLines_Haploid/callsets
     */
    fun getCallsets(id: String): TaxaList {

        val url = "$baseURL/variantsets/$id/callsets"
        logger.info("getCallsets: query: $url")
        val json = Json.parseToJsonElement(get(url, timeout = 0.0).text)

        val builder = TaxaListBuilder()
        json.jsonObject["result"]?.jsonObject?.get("data")?.jsonArray
                ?.forEach { builder.add(taxon(it)) }

        val result = builder.build()

        logger.info("getCallsets: number of taxa: ${result.numberOfTaxa()}")

        return result

    }

    private fun taxon(json: JsonElement): Taxon {
        return Taxon(getStringAttribute(json, "callSetName"))
    }

    private val callsPageSize = 1

    /**
     * Returns a feature table for the given variant set id.
     *
     * Example: http://cbsudc01.biohpc.cornell.edu/brapi/v2/variantsets/Ames_MergedReadMapping_AllLines_Haploid/calls?page=1
     */
    fun getCalls(id: String): FeatureTable {

        val builder = FeatureTableBuilder(getCallsets(id), getVariants(id))

        var url = "$baseURL/variantsets/$id/calls?pageSize=$callsPageSize"
        logger.info("getCalls: query: $url")
        var json = Json.parseToJsonElement(get(url, timeout = 0.0).text)

        val metadata = metadata(json.jsonObject)

        val numPages = metadata["pagination.totalPages"]?.toInt()
                ?: error("BrAPIConnection: getCalls: can't get number of pages.")

        setGenotypes(json, builder)

        (1 until numPages).forEach {
            url = "$baseURL/variantsets/$id/calls?pageSize=$callsPageSize;page=$it"
            logger.info("getCalls: query: $url")
            lateinit var response: Response
            var notFinished = true
            while (notFinished) {
                try {
                    response = get(url, timeout = 0.0)
                    notFinished = false
                } catch (e: Exception) {
                    logger.warn("getCalls: BrAPI Server query failed: ${e.message}")
                }
            }
            json = Json.parseToJsonElement(response.text)
            setGenotypes(json, builder)
        }

        return builder.build()

    }

    private fun setGenotypes(json: JsonElement, builder: FeatureTableBuilder) {
        json.jsonObject["result"]?.jsonObject?.get("data")?.jsonArray
                ?.forEach { genotype ->
                    val taxon = getStringAttribute(genotype, "callSetName")
                    val id = getStringAttribute(genotype, "variantDbId")
                    val genotypeArray = genotype.jsonObject["genotype"]?.jsonObject?.get("values")?.jsonArray
                    check(genotypeArray != null) { "BrAPIConnection: getCalls: genotype values can't be null" }
                    val genotypeValues = genotypeArray.map { it.toString() }
                    builder.set(taxon, id, genotypeValues)
                }
    }

    private fun getStringAttribute(element: JsonElement, attribute: String): String {
        return element.jsonObject[attribute].toString().removePrefix("\"").removeSuffix("\"").trim()
    }

    private fun getStringFirstArrayElement(element: JsonElement, attribute: String): String {
        return element.jsonObject[attribute]?.jsonArray?.get(0).toString().removePrefix("\"").removeSuffix("\"").trim()
    }

}

data class VariantSet(val id: String, val numVariants: Int, val numCallsets: Int)

fun main() {

    setupDebugLogging()
    basicLoggingInfo()

    val connection = BrAPIConnection("http://cbsudc01.biohpc.cornell.edu/brapi/v2")

    //val taxa = connection.getCallsets("MergedReadMapping_AllNamParents_Haploid")
    //taxa.take(100).forEach { println(it) }

    //taxa.forEachIndexed { index, taxon ->
    //    if (taxon.name == "Z001E0104" || taxon.name == "Z001E0103") println("matched: $index  name: ${taxon.name}")
    //}

    //System.exit(0)

    //println(connection.getVariantsets())

    //println(connection.getVariantset("Ames_MergedReadMapping_AllLines_Haploid"))

    //println(connection.getVariants("Ames_MergedReadMapping_AllLines_Haploid").toList())

    //println(connection.getCallsets("Ames_MergedReadMapping_AllLines_Haploid").numberOfTaxa())

    var featureTable: FeatureTable
    val time = measureNanoTime {
        featureTable = connection.getCalls("MergedReadMapping_AllNamParents_Haploid")

        println("num of features: ${featureTable.numFeatures()}")
        println("num of taxa: ${featureTable.numTaxa()}")
    }

    val taxonIndex = featureTable.taxa.indexOf("Z001E0104")
    featureTable.forEach { site ->
        val genotype = site.genotypeAsString(taxonIndex)
        println("taxon: Z001E0104  id: ${site.feature.id}  name: ${site.feature.name}  chr: ${site.feature.startChr}  pos: ${site.feature.startPos}  genotype: $genotype")
    }

    println("time: ${time / 1e9} secs")

}

fun main1() {

    val connection = BrAPIConnection("http://cbsudc01.biohpc.cornell.edu/brapi/v2")

    //val taxa = connection.getCallsets("MergedReadMapping_AllNamParents_Haploid")
    //taxa.take(100).forEach { println(it) }

    //taxa.forEachIndexed { index, taxon ->
    //    if (taxon.name == "Z001E0104" || taxon.name == "Z001E0103") println("matched: $index  name: ${taxon.name}")
    //}

    //System.exit(0)

    //println(connection.getVariantsets())

    //println(connection.getVariantset("Ames_MergedReadMapping_AllLines_Haploid"))

    //println(connection.getVariants("Ames_MergedReadMapping_AllLines_Haploid").toList())

    //println(connection.getCallsets("Ames_MergedReadMapping_AllLines_Haploid").numberOfTaxa())

    var featureTable: FeatureTable
    val time = measureNanoTime {
        featureTable = connection.getCalls("MergedReadMapping_AllNamParents_Haploid")

        println("num of features: ${featureTable.numFeatures()}")
        println("num of taxa: ${featureTable.numTaxa()}")
    }

    val taxonIndex = featureTable.taxa.indexOf("Z001E0104")
    featureTable.forEach { site ->
        site.forEach { alleles ->
            // TODO
        }
        val genotype = site.genotypeAsString(taxonIndex)
        println("taxon: Z001E0104  id: ${site.feature.id}  name: ${site.feature.name}  chr: ${site.feature.startChr}  pos: ${site.feature.startPos}  genotype: $genotype")
    }

    println("time: ${time / 1e9} secs")

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy