All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jetbrains.kotlinx.dataframe.api.toDataFrame.kt Maven / Gradle / Ivy

package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyBaseColumn
import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.Column
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.io.read
import org.jetbrains.kotlinx.dataframe.typeClass
import java.io.File
import java.net.URL
import kotlin.reflect.KClass
import kotlin.reflect.KProperty

// region read DataFrame from objects

public inline fun  Iterable.toDataFrame(): DataFrame = toDataFrame {
    properties(depth = 1)
}

public inline fun  Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = createDataFrameImpl(T::class, body)

public inline fun  Iterable.toDataFrame(vararg props: KProperty<*>, depth: Int = 1): DataFrame =
    toDataFrame {
        properties(roots = props, depth = depth)
    }

public inline fun  DataColumn.read(): AnyCol = when (kind()) {
    ColumnKind.Group, ColumnKind.Frame -> this
    else -> when {
        isPrimitive() -> this
        typeClass == File::class -> cast().mapNotNullValues { DataFrame.read(it) }
        typeClass == URL::class -> cast().mapNotNullValues { DataFrame.read(it) }
        else -> values().createDataFrameImpl(typeClass) {
            (this as CreateDataFrameDsl).properties(depth = 1)
        }.asColumnGroup(name()).asDataColumn()
    }
}

public fun  DataFrame.read(columns: ColumnsSelector): DataFrame = replace(columns).with { it.read() }
public fun  DataFrame.read(vararg columns: String): DataFrame = read { columns.toColumns() }
public fun  DataFrame.read(vararg columns: KProperty<*>): DataFrame = read { columns.toColumns() }
public fun  DataFrame.read(vararg columns: Column): DataFrame = read { columns.toColumns() }

@JvmName("toDataFrameT")
public fun  Iterable>.toDataFrame(): DataFrame {
    var uniqueDf: DataFrame? = null
    for (row in this) {
        if (uniqueDf == null) uniqueDf = row.df()
        else {
            if (uniqueDf !== row.df()) {
                uniqueDf = null
                break
            }
        }
    }
    return if (uniqueDf != null) {
        val permutation = map { it.index }
        uniqueDf[permutation]
    } else map { it.toDataFrame() }.concat()
}

@JvmName("toDataFrameAnyColumn")
public fun Iterable.toDataFrame(): AnyFrame = dataFrameOf(this)

@JvmName("toDataFramePairColumnPathAnyCol")
public fun  Iterable>.toDataFrameFromPairs(): DataFrame {
    val nameGenerator = ColumnNameGenerator()
    val columnNames = mutableListOf()
    val columnGroups = mutableListOf>?>()
    val columns = mutableListOf()
    val columnIndices = mutableMapOf()
    val columnGroupName = mutableMapOf()

    forEach { (path, col) ->
        when (path.size) {
            0 -> {
            }
            1 -> {
                val name = path[0]
                val uniqueName = nameGenerator.addUnique(name)
                val index = columns.size
                columnNames.add(uniqueName)
                columnGroups.add(null)
                columns.add(col.rename(uniqueName))
                columnIndices[uniqueName] = index
            }
            else -> {
                val name = path[0]
                val uniqueName = columnGroupName.getOrPut(name) {
                    nameGenerator.addUnique(name)
                }
                val index = columnIndices.getOrPut(uniqueName) {
                    columnNames.add(uniqueName)
                    columnGroups.add(mutableListOf())
                    columns.add(null)
                    columns.size - 1
                }
                val list = columnGroups[index]!!
                list.add(path.drop(1) to col)
            }
        }
    }
    columns.indices.forEach { index ->
        val group = columnGroups[index]
        if (group != null) {
            val nestedDf = group.toDataFrameFromPairs()
            val col = DataColumn.createColumnGroup(columnNames[index], nestedDf)
            assert(columns[index] == null)
            columns[index] = col
        } else assert(columns[index] != null)
    }
    return columns.map { it!! }.toDataFrame().cast()
}

@JvmName("toDataFrameColumnPathAnyNullable")
public fun Iterable>>.toDataFrameFromPairs(): AnyFrame {
    return map { it.first to guessColumnType(it.first.last(), it.second.asList()) }.toDataFrameFromPairs()
}

public fun Iterable>>.toDataFrameFromPairs(): AnyFrame {
    return map { ColumnPath(it.first) to guessColumnType(it.first, it.second.asList()) }.toDataFrameFromPairs()
}

public interface TraversePropertiesDsl {

    public fun exclude(vararg properties: KProperty<*>)

    /**
     * Skip instances of given [classes] from transformation into ColumnGroups and FrameColumns and store them in ValueColumn
     */
    public fun preserve(vararg classes: KClass<*>)
}

public inline fun  TraversePropertiesDsl.preserve(): Unit = preserve(T::class)

public abstract class CreateDataFrameDsl(public val source: Iterable) {

    public abstract fun add(column: AnyBaseColumn, path: ColumnPath? = null)

    public infix fun AnyBaseColumn.into(name: String): Unit = add(this, pathOf(name))

    public infix fun AnyBaseColumn.into(path: ColumnPath): Unit = add(this, path)

    public abstract fun properties(
        vararg roots: KProperty<*>,
        depth: Int = 1,
        body: (TraversePropertiesDsl.() -> Unit)? = null
    )

    public inline fun  expr(noinline expression: (T) -> R): DataColumn =
        source.map { expression(it) }.toColumn()

    public inline fun  add(name: String, noinline expression: (T) -> R): Unit =
        add(source.map { expression(it) }.toColumn(name, Infer.Nulls))

    public inline infix fun  String.from(noinline expression: (T) -> R): Unit =
        add(this, expression)

    public inline infix fun  KProperty.from(noinline expression: (T) -> R): Unit =
        add(columnName, expression)

    public inline infix fun  KProperty.from(inferType: InferType): Unit =
        add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) }))

    public data class InferType(val expression: (T) -> R)

    public inline fun  inferType(noinline expression: (T) -> R): InferType = InferType(expression)

    public abstract operator fun String.invoke(builder: CreateDataFrameDsl.() -> Unit)
}

// endregion

// region Create DataFrame from Map

public fun Map>.toDataFrame(): AnyFrame {
    return map { DataColumn.createWithTypeInference(it.key, it.value.asList()) }.toDataFrame()
}

@JvmName("toDataFrameColumnPathAnyNullable")
public fun Map>.toDataFrame(): AnyFrame {
    return map { it.key to DataColumn.createWithTypeInference(it.key.last(), it.value.asList()) }.toDataFrameFromPairs()
}

// endregion




© 2015 - 2025 Weber Informatics LLC | Privacy Policy