commonMain.jetbrains.datalore.plot.base.stat.Bin2dStat.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lets-plot-common Show documentation
Show all versions of lets-plot-common Show documentation
Lets-Plot JVM package without rendering part
/*
* Copyright (c) 2020. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/
package jetbrains.datalore.plot.base.stat
import jetbrains.datalore.base.gcommon.collect.ClosedRange
import jetbrains.datalore.plot.base.Aes
import jetbrains.datalore.plot.base.DataFrame
import jetbrains.datalore.plot.base.StatContext
import jetbrains.datalore.plot.base.data.TransformVar
import jetbrains.datalore.plot.base.util.MutableDouble
import jetbrains.datalore.plot.common.data.SeriesUtil
import jetbrains.datalore.plot.common.data.SeriesUtil.ensureApplicableRange
import jetbrains.datalore.plot.common.data.SeriesUtil.expand
import jetbrains.datalore.plot.common.data.SeriesUtil.isSubTiny
import kotlin.math.floor
/**
* Default stat for geom_bin2d
*
* @param binCountX Number of bins (overridden by binWidth).
* @param binCountY Number of bins (overridden by binWidth).
* @param binWidthX Used to compute binCount such that bins covers the range of the data.
* @param binWidthY Used to compute binCount such that bins covers the range of the data.
* @param drop if TRUE removes all cells with 0 counts.
*
* Computed values:
*
* count - number of points in bin
* density - density of points in bin, scaled to integrate to 1
* ncount - count, scaled to maximum of 1
* ndensity - density, scaled to maximum of 1
*/
class Bin2dStat(
binCountX: Int = DEF_BINS,
binCountY: Int = DEF_BINS,
binWidthX: Double? = DEF_BINWIDTH,
binWidthY: Double? = DEF_BINWIDTH,
private val drop: Boolean = DEF_DROP
) : BaseStat(DEF_MAPPING) {
private val binOptionsX = BinStatUtil.BinOptions(binCountX, binWidthX)
private val binOptionsY = BinStatUtil.BinOptions(binCountY, binWidthY)
override fun consumes(): List> {
return listOf(Aes.X, Aes.Y, Aes.WEIGHT)
}
override fun apply(data: DataFrame, statCtx: StatContext, messageConsumer: (s: String) -> Unit): DataFrame {
if (!hasRequiredValues(data, Aes.X, Aes.Y)) {
return withEmptyStatValues()
}
val xRange = statCtx.overallXRange()
val yRange = statCtx.overallYRange()
if (xRange == null || yRange == null) {
return withEmptyStatValues()
}
// initial bin width and count
val xRangeInit = adjustRangeInitial(xRange)
val yRangeInit = adjustRangeInitial(yRange)
val xCountAndWidthInit = BinStatUtil.binCountAndWidth(SeriesUtil.span(xRangeInit), binOptionsX)
val yCountAndWidthInit = BinStatUtil.binCountAndWidth(SeriesUtil.span(yRangeInit), binOptionsY)
// final bin width and count
val xRangeFinal = adjustRangeFinal(xRange, xCountAndWidthInit.width)
val yRangeFinal = adjustRangeFinal(yRange, yCountAndWidthInit.width)
val xCountAndWidthFinal = BinStatUtil.binCountAndWidth(SeriesUtil.span(xRangeFinal), binOptionsX)
val yCountAndWidthFinal = BinStatUtil.binCountAndWidth(SeriesUtil.span(yRangeFinal), binOptionsY)
val countTotal = xCountAndWidthFinal.count * yCountAndWidthFinal.count
val densityNormalizingFactor =
densityNormalizingFactor(SeriesUtil.span(xRangeFinal), SeriesUtil.span(yRangeFinal), countTotal)
val binsData = computeBins(
data.getNumeric(TransformVar.X),
data.getNumeric(TransformVar.Y),
xRangeFinal.lowerEnd,
yRangeFinal.lowerEnd,
xCountAndWidthFinal.count,
yCountAndWidthFinal.count,
xCountAndWidthFinal.width,
yCountAndWidthFinal.width,
BinStatUtil.weightAtIndex(data),
densityNormalizingFactor
)
return DataFrame.Builder()
.putNumeric(Stats.X, binsData.x)
.putNumeric(Stats.Y, binsData.y)
.putNumeric(Stats.COUNT, binsData.count)
.putNumeric(Stats.DENSITY, binsData.density)
.build()
}
private fun computeBins(
xValues: List,
yValues: List,
xStart: Double,
yStart: Double,
binCountX: Int,
binCountY: Int,
binWidth: Double,
binHeight: Double,
weightAtIndex: (Int) -> Double,
densityNormalizingFactor: Double
): Bins2dData {
var totalCount = 0.0
val countByBinIndexKey = HashMap, MutableDouble>()
for (dataIndex in xValues.indices) {
val x = xValues[dataIndex]
val y = yValues[dataIndex]
if (!SeriesUtil.allFinite(x, y)) {
continue
}
val weight = weightAtIndex(dataIndex)
totalCount += weight
val binIndexX = floor((x!! - xStart) / binWidth).toInt()
val binIndexY = floor((y!! - yStart) / binHeight).toInt()
val binIndexKey = Pair(binIndexX, binIndexY)
if (!countByBinIndexKey.containsKey(binIndexKey)) {
countByBinIndexKey[binIndexKey] = MutableDouble(0.0)
}
countByBinIndexKey[binIndexKey]!!.getAndAdd(weight)
}
val xs = ArrayList()
val ys = ArrayList()
val counts = ArrayList()
val densities = ArrayList()
val x0 = xStart + binWidth / 2
val y0 = yStart + binHeight / 2
for (xIndex in 0 until binCountX) {
for (yIndex in 0 until binCountY) {
val binIndexKey = Pair(xIndex, yIndex)
var count = 0.0
if (countByBinIndexKey.containsKey(binIndexKey)) {
count = countByBinIndexKey[binIndexKey]!!.get()
}
if (drop && count == 0.0) {
continue
}
xs.add(x0 + xIndex * binWidth)
ys.add(y0 + yIndex * binHeight)
counts.add(count)
val density = count / totalCount * densityNormalizingFactor
densities.add(density)
}
}
return Bins2dData(xs, ys, counts, densities)
}
companion object {
const val DEF_BINS = 30
val DEF_BINWIDTH: Double? = null
const val DEF_DROP = true
private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf(
Aes.X to Stats.X,
Aes.Y to Stats.Y,
Aes.FILL to Stats.COUNT
)
private fun adjustRangeInitial(r: ClosedRange): ClosedRange {
// span can't be 0
return ensureApplicableRange(r)
}
private fun adjustRangeFinal(r: ClosedRange, binWidth: Double): ClosedRange {
return if (isSubTiny(r)) {
// 0 span allways becomes 1
expand(r, 0.5, 0.5)
} else {
// Expand range by half of bin width (arbitrary choise - can be any positive num) to
// avoid data-points on the marginal bin margines.
val exp = binWidth / 2.0
expand(r, exp, exp)
}
}
private fun densityNormalizingFactor(
xSpan: Double,
ySpan: Double,
count: Int
): Double {
// density should integrate to 1.0
val area = xSpan * ySpan
val binArea = area / count
return 1.0 / binArea
}
}
class Bins2dData(
internal val x: List,
internal val y: List,
internal val count: List,
internal val density: List
)
class Bins2dWeightedCounts(
internal val total: Double,
internal val countByBinXY: Map, Double>
)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy