au.csiro.variantspark.utils.FactorVariable.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of variant-spark_2.11 Show documentation
Show all versions of variant-spark_2.11 Show documentation
Genomic variants interpretation toolkit
The newest version!
package au.csiro.variantspark.utils
case class FactorVariable(values: Array[Int], nCategories: Int,
subset: Option[Array[Int]] = None) {
def indices: Iterator[Int] =
subset.map(_.toIterator).getOrElse(values.indices.toIterator)
def counts: Array[Int] = {
val labelCounts = Array.fill(nCategories)(0)
indices.foreach(i => labelCounts(values(i)) += 1)
labelCounts
}
def apply(indexes: Array[Int]): FactorVariable = {
new FactorVariable(values, nCategories, Some(indexes))
}
}
object FactorVariable {
def apply(values: Array[Int]): FactorVariable = apply(values, values.max + 1)
def classCounts(currentSet: Array[Int], labels: Array[Int], labelCount: Int): Array[Int] = {
val labelCounts = Array.fill(labelCount)(0)
currentSet.foreach(i => labelCounts(labels(i)) += 1)
labelCounts
}
def labelMode(currentSet: Array[Int], labels: Array[Int], labelCount: Int): Int = {
val labelCounts = classCounts(currentSet, labels, labelCount)
ArraysUtils.maxIndex(labelCounts)
}
}