All Downloads are FREE. Search and download functionalities are using the official Maven repository.

biokotlin.genome.GetCovIDFromMAFMultiThread.kt Maven / Gradle / Ivy

package biokotlin.genome

import biokotlin.util.bufferedReader
import kotlinx.coroutines.*
import kotlinx.coroutines.channels.Channel
import java.io.File
import java.nio.file.Files

/**
 * This class has methods which create coverage and identity counts
 * for a list of mafFiles.
 *
 * THe main function called by a user is getCoverageAndIdentityFromMAFS
 * coverage/identity is computed for a specific contig found in the MAF files
 *
 */
class GetCovIDFromMAFMultiThread {
    private val resultChannel = Channel>()

    fun getCoverageAndIdentityFromMAFs(
        userContig: String,
        start: Int,
        end: Int,
        mafDir: String
    ): Pair {

        val startTime = System.nanoTime()
        // get list of .maf files from the user provided folder
        val mafFiles = File(mafDir).walk()
            .filter { item -> Files.isRegularFile(item.toPath()) }
            .filter { item -> item.toString().endsWith(".maf") }
            .toList()

        val jobs = mutableListOf()
        for (mafFile in mafFiles) {
            jobs.add(CoroutineScope(Dispatchers.IO).launch {
                readFile(mafFile,start, end, userContig)
            })
        }

        CoroutineScope(Dispatchers.Default).launch {
            jobs.forEach { it.join() }
            resultChannel.close()
            val totalTime = (System.nanoTime() - startTime)/1e9
            println("getCoverageAndIdentityFromMAFs-MF: processing to closing result channel took: ${totalTime} seconds")
        }

        val totalTime = (System.nanoTime() - startTime)/1e9
        println("getCoverageAndIdentityFromMAFs-MF: outside the loop, before combineResults: ${totalTime} seconds")
        return runBlocking { combineResults(start,end) }

    }

    /**
     * This function takes a user contig,  start/stop positions which are 1-based and inclusive/inclusive
     * and a MAF file.
     * From this input it creates 2 IntArrays which are returned as a Pair of 
     */
    private suspend fun readFile(file: File, start:Int, end:Int, userContig:String) {

        val userSpan = (start..end)
        val coverage = IntArray(userSpan.count())
        val identity = IntArray(userSpan.count())
        bufferedReader(file.toString()).use { reader ->

            var mafBlock = readMafBlock(reader)
            while (mafBlock != null) {
                // filter the strings, only keep the "s" lines
                val filteredMafBlock = mafBlock.filter { it.startsWith("s")}
                // the first entry should be the ref
                val refData = filteredMafBlock.get(0)
                // Maf files are white space separated - could be tabs or spaces
                val refSplitLine = refData.split("\\s+".toRegex())
                val alignContig = refSplitLine[1]
                val refStart = refSplitLine[2].toInt()
                val refSize = refSplitLine[3].toInt()

                // Determine if the alignment ref contig matches the user specified contig,
                // and if the alignment overlaps the user requested positions.
                var skip = false
                if ((refStart+1 > end) || (refStart + refSize < start) || (!alignContig.equals(userContig))) {
                    skip = true
                }
                if (!skip) {
                    // process the counts
                    calculateCoverageAndIdentity(filteredMafBlock, coverage, identity, userSpan)
                }
                mafBlock = readMafBlock(reader)
            }
        }

        val result: Pair = Pair(coverage, identity)

        resultChannel.send(result)

    }


    // This method takes all the coverage and identity counts from the individually
    // read files and concatenates them into 1 return result.
    private suspend fun combineResults(start:Int, end:Int): Pair {

        val startTime = System.nanoTime()
        val userSpan = (start..end)
        val coverage = IntArray(userSpan.count())
        val identity = IntArray(userSpan.count())

        for (result in resultChannel) {

            var resultCov = result.first
            var resultId = result.second
            for (idx in 0 until userSpan.count()) {
                coverage[idx] += resultCov[idx]
                identity[idx] += resultId[idx]
            }
        }
        val combined: Pair = Pair(coverage, identity)
        // This time is close to the amount of time it takes to run all the MAFs because
        // it starts as soon as the first result file appears, or rather as soon as we
        // exit the for-loop, which is nearly immediately.  A better judge of time is
        // a calling method that times how long it takes the call to this class's
        // getCoverageAndIdentityFromMAFs() function
        val totalTime = (System.nanoTime() - startTime)/1e9
        println("combineResults-MF: finished  combineResults in : ${totalTime} seconds")
        return combined

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy