All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jetbrains.kotlinx.dataframe.api.valueCounts.kt Maven / Gradle / Ivy

package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.Column
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
import org.jetbrains.kotlinx.dataframe.impl.nameGenerator
import kotlin.reflect.KProperty
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

// region DataColumn

@DataSchema
public interface ValueCount {
    public val count: Int
}

internal val defaultCountColumnName: String = ValueCount::count.name

public fun  DataColumn.valueCounts(
    sort: Boolean = true,
    ascending: Boolean = false,
    dropNA: Boolean = true,
    resultColumn: String = defaultCountColumnName
): DataFrame {
    var grouped = toList().groupBy { it }.map { it.key to it.value.size }
    if (sort) {
        grouped = if (ascending) grouped.sortedBy { it.second }
        else grouped.sortedByDescending { it.second }
    }
    if (dropNA) grouped = grouped.filter { !it.first.isNA }
    val nulls = if (dropNA) false else hasNulls()
    val values = DataColumn.create(name(), grouped.map { it.first }, type().withNullability(nulls))
    val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn
    val counts = DataColumn.create(countName, grouped.map { it.second }, typeOf())
    return dataFrameOf(values, counts).cast()
}

// endregion

// region DataFrame

public fun  DataFrame.valueCounts(
    sort: Boolean = true,
    ascending: Boolean = false,
    dropNA: Boolean = true,
    resultColumn: String = defaultCountColumnName,
    columns: ColumnsSelector? = null
): DataFrame {
    var df = if (columns != null) select(columns) else this
    if (dropNA) df = df.dropNA()

    val rows by columnGroup()
    val countName = nameGenerator().addUnique(resultColumn)
    return df.asColumnGroup(rows).asDataColumn().valueCounts(sort, ascending, dropNA, countName).ungroup(rows).cast()
}

public fun  DataFrame.valueCounts(
    vararg columns: String,
    sort: Boolean = true,
    ascending: Boolean = false,
    dropNA: Boolean = true,
    resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }
public fun  DataFrame.valueCounts(
    vararg columns: Column,
    sort: Boolean = true,
    ascending: Boolean = false,
    dropNA: Boolean = true,
    resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }
public fun  DataFrame.valueCounts(
    vararg columns: KProperty<*>,
    sort: Boolean = true,
    ascending: Boolean = false,
    dropNA: Boolean = true,
    resultColumn: String = defaultCountColumnName
): DataFrame = valueCounts(sort, ascending, dropNA, resultColumn) { columns.toColumns() }

// endregion




© 2015 - 2025 Weber Informatics LLC | Privacy Policy