
org.jetbrains.kotlinx.dataframe.api.valueCounts.kt Maven / Gradle / Ivy
package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.Column
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
import org.jetbrains.kotlinx.dataframe.impl.nameGenerator
import kotlin.reflect.KProperty
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf
// region DataColumn
@DataSchema
public interface ValueCount {
public val count: Int
}
internal val defaultCountColumnName: String = ValueCount::count.name
public fun DataColumn.valueCounts(
sort: Boolean = true,
ascending: Boolean = false,
dropNA: Boolean = true,
resultColumn: String = defaultCountColumnName
): DataFrame {
var grouped = toList().groupBy { it }.map { it.key to it.value.size }
if (sort) {
grouped = if (ascending) grouped.sortedBy { it.second }
else grouped.sortedByDescending { it.second }
}
if (dropNA) grouped = grouped.filter { !it.first.isNA }
val nulls = if (dropNA) false else hasNulls()
val values = DataColumn.create(name(), grouped.map { it.first }, type().withNullability(nulls))
val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn
val counts = DataColumn.create(countName, grouped.map { it.second }, typeOf())
return dataFrameOf(values, counts).cast()
}
// endregion
// region DataFrame
public fun DataFrame.valueCounts(
sort: Boolean = true,
ascending: Boolean = false,
dropNA: Boolean = true,
resultColumn: String = defaultCountColumnName,
columns: ColumnsSelector? = null
): DataFrame {
var df = if (columns != null) select(columns) else this
if (dropNA) df = df.dropNA()
val rows by columnGroup()
val countName = nameGenerator().addUnique(resultColumn)
return df.asColumnGroup(rows).asDataColumn().valueCounts(sort, ascending, dropNA, countName).ungroup(rows).cast()
}
public fun DataFrame.valueCounts(
vararg columns: String,
sort: Boolean = true,
ascending: Boolean = false,
dropNA: Boolean = true,
resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }
public fun DataFrame.valueCounts(
vararg columns: Column,
sort: Boolean = true,
ascending: Boolean = false,
dropNA: Boolean = true,
resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }
public fun DataFrame.valueCounts(
vararg columns: KProperty<*>,
sort: Boolean = true,
ascending: Boolean = false,
dropNA: Boolean = true,
resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }
// endregion
© 2015 - 2025 Weber Informatics LLC | Privacy Policy