All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.infobip.kafkistry.recordstructure.MergingContext.kt Maven / Gradle / Ivy

package com.infobip.kafkistry.recordstructure

import com.infobip.kafkistry.model.*
import kotlin.math.absoluteValue
import kotlin.math.min

open class MergingContext(
    properties: RecordAnalyzerProperties,
    now: Long = generateTimestamp(),
) : ProcessingContext(properties, now) {

    fun timestampWrappedUnknownRecordsStructure(
        headersFields: List>?,
        nullable: Boolean,
        size: RecordTimedSize,
    ) = TimestampWrappedRecordsStructure(
        PayloadType.UNKNOWN,
        timestampWrappedHeaderFields = headersFields,
        timestampWrappedJsonFields = null,
        nullable = wrapNow(nullable),
        size = size,
    )

    /**
     * Simply add the newer fields to the existent ones
     * from: @TimestampWrappedRecordsStructure
     * to: @TimestampWrappedRecordsStructure
     */
    infix fun TimestampWrappedRecordsStructure.merge(
        other: TimestampWrappedRecordsStructure
    ): TimestampWrappedRecordsStructure {
        if (payloadType != PayloadType.JSON && other.payloadType != PayloadType.JSON) {
            return timestampWrappedUnknownRecordsStructure(
                headersFields = timestampWrappedHeaderFields merge other.timestampWrappedHeaderFields,
                nullable = (nullable mergeBoolean other.nullable).field,
                size = size merge other.size,
            )
        }
        return TimestampWrappedRecordsStructure(
            PayloadType.JSON,
            timestampWrappedHeaderFields = timestampWrappedHeaderFields merge other.timestampWrappedHeaderFields,
            timestampWrappedJsonFields = timestampWrappedJsonFields merge other.timestampWrappedJsonFields,
            nullable = nullable mergeBoolean other.nullable,
            size = size merge other.size,
        )
    }

    infix fun RecordTimedSize.merge(other: RecordTimedSize): RecordTimedSize {
        return RecordTimedSize(
            keySize = keySize merge other.keySize,
            valueSize = valueSize merge other.valueSize,
            headersSize = headersSize merge other.headersSize,
        )
    }

    infix fun TimedHistory.merge(
        other: TimedHistory
    ): TimedHistory {
        return this.merge(other) { first, second ->
            TimestampWrapper(
                field = first.field merge second.field,
                timestamp = min(first.timestamp, second.timestamp),
            )
        }
    }

    infix fun List>?.merge(
        other: List>?
    ): List>? {
        if (this == null || other == null || this == other) {
            return other ?: this
        }

        val thisIsTooOldDynamicField = size == 1 && first().let {
            it.field.name == null && it.timestamp.tooOld()
        }
        if (thisIsTooOldDynamicField) {
            return other
        }
        return this doMerge other
    }

    private infix fun List>.doMerge(
        other: List>
    ): List> {
        val thisNames = this.map { it.field.name }.toSet()
        val otherNames = other.map { it.field.name }.toSet()

        if (isNotEmpty() && other.isNotEmpty()) {
            maybeCollapseAsDynamicFieldNames(other, thisNames, otherNames)?.run { return this }
        }

        val thisNulls = filter { it.field.type == RecordFieldType.NULL }.associateBy { it.field.name }
        val otherNulls = other.filter { it.field.type == RecordFieldType.NULL }.associateBy { it.field.name }
        val thisFields = associateBy { it.field.name to it.field.type }
        val otherFields = other.associateBy { it.field.name to it.field.type }

        return (thisFields.keys + otherFields.keys).distinct().mapNotNull { key ->
            val fieldName = key.first
            val thisField = thisFields[key]
            val otherField = otherFields[key]
            val thisNull = thisNulls[fieldName]
            val otherNull = otherNulls[fieldName]
            when {
                //don't output NULL type if there is matching name of non-NULL type
                thisField.isNullType() && (fieldName in otherNames) && !otherField.isNullType() -> null
                otherField.isNullType() && (fieldName in thisNames) && !thisField.isNullType() -> null
                //do actual merge
                thisField != null && otherField != null -> thisField merge otherField
                //mark as nullable
                thisField != null -> if (otherNull != null || fieldName !in otherNames) thisField.asNullable() else thisField
                otherField != null -> if (thisNull != null || fieldName !in thisNames) otherField.asNullable() else otherField
                else -> null
            }
        }
    }

    private fun List>.maybeCollapseAsDynamicFieldNames(
        other: List>,
        thisNames: Set,
        otherNames: Set,
    ): List>? {
        infix fun Int.differentMagnitude(other: Int): Boolean {
            val thresholdDiff = properties.cardinalityDiffThreshold
            val thresholdFactor = properties.cardinalityMagnitudeFactorThreshold
            return (this - other).absoluteValue > thresholdDiff && (this > thresholdFactor * other || thresholdFactor * this < other)
        }

        fun Collection.hasDynamicFieldNames() = any { it?.isDynamicName() ?: true }
        fun commonNames() = thisNames.intersect(otherNames)
        fun namesCountDiffTooMuch() = thisNames.size.differentMagnitude(otherNames.size)
        fun isDynamic() = thisNames.hasDynamicFieldNames() || otherNames.hasDynamicFieldNames() ||
                namesCountDiffTooMuch() || commonNames().isEmpty()

        return when (isDynamic()) {
            true -> (this + other)
                .groupBy { it.field.type }
                .map { (_, values) ->
                    values.squashAsVariable()
                }
            false -> null
        }
    }

    private fun TimestampWrapper?.isNullType() = this?.field?.type == RecordFieldType.NULL

    private fun String.isDynamicName(): Boolean {
        if (isBlank()) return true
        if (!get(0).isJavaIdentifierStart()) return true
        return !all { it == '-' || it.isJavaIdentifierPart() }
    }

    private fun List>.squashAsVariable(): TimestampWrapper {
        return first().withField { field ->
            field.copy(
                name = null,
                value = asSequence().map { it.field.value }.reduce { acc, wrappers ->
                    acc merge wrappers
                },
                children = asSequence().map { it.field.children }.reduce { acc, wrappers ->
                    acc merge wrappers
                }
            )
        }
    }

    private fun TimestampWrapper.asNullable() =
        withField { it.copy(nullable = wrapNow(true)) }

    infix fun TimestampWrapper.merge(
        other: TimestampWrapper
    ): TimestampWrapper {
        if (field == other.field) {
            return other
        }
        return wrapNow(
            TimestampWrappedRecordField(
                nullable = field.nullable mergeBoolean other.field.nullable,
                name = other.field.name,
                type = when (other.field.type) {
                    RecordFieldType.NULL -> field.type
                    else -> other.field.type
                },
                children = when {
                    field.children != other.field.children -> field.children merge other.field.children
                    else -> other.field.children
                },
                value = field.value merge other.field.value,
            )
        )
    }

    private infix fun TimestampWrapper.mergeBoolean(
        other: TimestampWrapper
    ): TimestampWrapper = when {
        field -> when (!other.field && timestamp.tooOld()) {
            true -> wrapNow(false)
            false -> this
        }
        else -> wrapNow(field || other.field)
    }

    infix fun TimestampWrappedFieldValue?.merge(
        other: TimestampWrappedFieldValue?
    ): TimestampWrappedFieldValue? {
        if (this == null || other == null || this == other) {
            return other ?: this
        }
        val mergedHighCardinality = highCardinality mergeBoolean other.highCardinality
        fun  List.takeIfNotHighCardinality(): List = when {
            isHighCardinality() -> emptyList()
            mergedHighCardinality.field -> emptyList()
            else -> this
        }

        val mergedValues = when (mergedHighCardinality.field) {
            true -> emptyList()
            false -> values mergeValueSet other.values
        }
        return TimestampWrappedFieldValue(
            highCardinality = mergedHighCardinality mergeBoolean wrapNow(mergedValues.isHighCardinality()),
            tooBig = tooBig mergeBoolean other.tooBig,
            values = mergedValues.takeIfNotHighCardinality(),
        )
    }

    private fun List<*>.isHighCardinality() = size > properties.valueSampling.maxCardinality

    private infix fun  List>.mergeValueSet(
        other: List>
    ): List> {
        return sequenceOf(this, other)
            .flatten()
            .filter { !it.timestamp.tooOld() }
            .groupingBy { it.field }
            .reduce { _, accumulator, element ->
                if (accumulator.timestamp > element.timestamp) accumulator else element
            }
            .values.toList()
    }


}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy