
org.jetbrains.kotlinx.dataframe.api.toDataFrame.kt Maven / Gradle / Ivy
package org.jetbrains.kotlinx.dataframe.api
import org.jetbrains.kotlinx.dataframe.AnyBaseColumn
import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.Column
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.io.read
import org.jetbrains.kotlinx.dataframe.typeClass
import java.io.File
import java.net.URL
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
// region read DataFrame from objects
public inline fun Iterable.toDataFrame(): DataFrame = toDataFrame {
properties(depth = 1)
}
public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = createDataFrameImpl(T::class, body)
public inline fun Iterable.toDataFrame(vararg props: KProperty<*>, depth: Int = 1): DataFrame =
toDataFrame {
properties(roots = props, depth = depth)
}
public inline fun DataColumn.read(): AnyCol = when (kind()) {
ColumnKind.Group, ColumnKind.Frame -> this
else -> when {
isPrimitive() -> this
typeClass == File::class -> cast().mapNotNullValues { DataFrame.read(it) }
typeClass == URL::class -> cast().mapNotNullValues { DataFrame.read(it) }
else -> values().createDataFrameImpl(typeClass) {
(this as CreateDataFrameDsl).properties(depth = 1)
}.asColumnGroup(name()).asDataColumn()
}
}
public fun DataFrame.read(columns: ColumnsSelector): DataFrame = replace(columns).with { it.read() }
public fun DataFrame.read(vararg columns: String): DataFrame = read { columns.toColumns() }
public fun DataFrame.read(vararg columns: KProperty<*>): DataFrame = read { columns.toColumns() }
public fun DataFrame.read(vararg columns: Column): DataFrame = read { columns.toColumns() }
@JvmName("toDataFrameT")
public fun Iterable>.toDataFrame(): DataFrame {
var uniqueDf: DataFrame? = null
for (row in this) {
if (uniqueDf == null) uniqueDf = row.df()
else {
if (uniqueDf !== row.df()) {
uniqueDf = null
break
}
}
}
return if (uniqueDf != null) {
val permutation = map { it.index }
uniqueDf[permutation]
} else map { it.toDataFrame() }.concat()
}
@JvmName("toDataFrameAnyColumn")
public fun Iterable.toDataFrame(): AnyFrame = dataFrameOf(this)
@JvmName("toDataFramePairColumnPathAnyCol")
public fun Iterable>.toDataFrameFromPairs(): DataFrame {
val nameGenerator = ColumnNameGenerator()
val columnNames = mutableListOf()
val columnGroups = mutableListOf>?>()
val columns = mutableListOf()
val columnIndices = mutableMapOf()
val columnGroupName = mutableMapOf()
forEach { (path, col) ->
when (path.size) {
0 -> {
}
1 -> {
val name = path[0]
val uniqueName = nameGenerator.addUnique(name)
val index = columns.size
columnNames.add(uniqueName)
columnGroups.add(null)
columns.add(col.rename(uniqueName))
columnIndices[uniqueName] = index
}
else -> {
val name = path[0]
val uniqueName = columnGroupName.getOrPut(name) {
nameGenerator.addUnique(name)
}
val index = columnIndices.getOrPut(uniqueName) {
columnNames.add(uniqueName)
columnGroups.add(mutableListOf())
columns.add(null)
columns.size - 1
}
val list = columnGroups[index]!!
list.add(path.drop(1) to col)
}
}
}
columns.indices.forEach { index ->
val group = columnGroups[index]
if (group != null) {
val nestedDf = group.toDataFrameFromPairs()
val col = DataColumn.createColumnGroup(columnNames[index], nestedDf)
assert(columns[index] == null)
columns[index] = col
} else assert(columns[index] != null)
}
return columns.map { it!! }.toDataFrame().cast()
}
@JvmName("toDataFrameColumnPathAnyNullable")
public fun Iterable>>.toDataFrameFromPairs(): AnyFrame {
return map { it.first to guessColumnType(it.first.last(), it.second.asList()) }.toDataFrameFromPairs()
}
public fun Iterable>>.toDataFrameFromPairs(): AnyFrame {
return map { ColumnPath(it.first) to guessColumnType(it.first, it.second.asList()) }.toDataFrameFromPairs()
}
public interface TraversePropertiesDsl {
public fun exclude(vararg properties: KProperty<*>)
/**
* Skip instances of given [classes] from transformation into ColumnGroups and FrameColumns and store them in ValueColumn
*/
public fun preserve(vararg classes: KClass<*>)
}
public inline fun TraversePropertiesDsl.preserve(): Unit = preserve(T::class)
public abstract class CreateDataFrameDsl(public val source: Iterable) {
public abstract fun add(column: AnyBaseColumn, path: ColumnPath? = null)
public infix fun AnyBaseColumn.into(name: String): Unit = add(this, pathOf(name))
public infix fun AnyBaseColumn.into(path: ColumnPath): Unit = add(this, path)
public abstract fun properties(
vararg roots: KProperty<*>,
depth: Int = 1,
body: (TraversePropertiesDsl.() -> Unit)? = null
)
public inline fun expr(noinline expression: (T) -> R): DataColumn =
source.map { expression(it) }.toColumn()
public inline fun add(name: String, noinline expression: (T) -> R): Unit =
add(source.map { expression(it) }.toColumn(name, Infer.Nulls))
public inline infix fun String.from(noinline expression: (T) -> R): Unit =
add(this, expression)
public inline infix fun KProperty.from(noinline expression: (T) -> R): Unit =
add(columnName, expression)
public inline infix fun KProperty.from(inferType: InferType): Unit =
add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) }))
public data class InferType(val expression: (T) -> R)
public inline fun inferType(noinline expression: (T) -> R): InferType = InferType(expression)
public abstract operator fun String.invoke(builder: CreateDataFrameDsl.() -> Unit)
}
// endregion
// region Create DataFrame from Map
public fun Map>.toDataFrame(): AnyFrame {
return map { DataColumn.createWithTypeInference(it.key, it.value.asList()) }.toDataFrame()
}
@JvmName("toDataFrameColumnPathAnyNullable")
public fun Map>.toDataFrame(): AnyFrame {
return map { it.key to DataColumn.createWithTypeInference(it.key.last(), it.value.asList()) }.toDataFrameFromPairs()
}
// endregion
© 2015 - 2025 Weber Informatics LLC | Privacy Policy