
com.infobip.kafkistry.recordstructure.MergingContext.kt Maven / Gradle / Ivy
package com.infobip.kafkistry.recordstructure
import com.infobip.kafkistry.model.*
import kotlin.math.absoluteValue
import kotlin.math.min
open class MergingContext(
properties: RecordAnalyzerProperties,
now: Long = generateTimestamp(),
) : ProcessingContext(properties, now) {
fun timestampWrappedUnknownRecordsStructure(
headersFields: List>?,
nullable: Boolean,
size: RecordTimedSize,
) = TimestampWrappedRecordsStructure(
PayloadType.UNKNOWN,
timestampWrappedHeaderFields = headersFields,
timestampWrappedJsonFields = null,
nullable = wrapNow(nullable),
size = size,
)
/**
* Simply add the newer fields to the existent ones
* from: @TimestampWrappedRecordsStructure
* to: @TimestampWrappedRecordsStructure
*/
infix fun TimestampWrappedRecordsStructure.merge(
other: TimestampWrappedRecordsStructure
): TimestampWrappedRecordsStructure {
if (payloadType != PayloadType.JSON && other.payloadType != PayloadType.JSON) {
return timestampWrappedUnknownRecordsStructure(
headersFields = timestampWrappedHeaderFields merge other.timestampWrappedHeaderFields,
nullable = (nullable mergeBoolean other.nullable).field,
size = size merge other.size,
)
}
return TimestampWrappedRecordsStructure(
PayloadType.JSON,
timestampWrappedHeaderFields = timestampWrappedHeaderFields merge other.timestampWrappedHeaderFields,
timestampWrappedJsonFields = timestampWrappedJsonFields merge other.timestampWrappedJsonFields,
nullable = nullable mergeBoolean other.nullable,
size = size merge other.size,
)
}
infix fun RecordTimedSize.merge(other: RecordTimedSize): RecordTimedSize {
return RecordTimedSize(
keySize = keySize merge other.keySize,
valueSize = valueSize merge other.valueSize,
headersSize = headersSize merge other.headersSize,
)
}
infix fun TimedHistory.merge(
other: TimedHistory
): TimedHistory {
return this.merge(other) { first, second ->
TimestampWrapper(
field = first.field merge second.field,
timestamp = min(first.timestamp, second.timestamp),
)
}
}
infix fun List>?.merge(
other: List>?
): List>? {
if (this == null || other == null || this == other) {
return other ?: this
}
val thisIsTooOldDynamicField = size == 1 && first().let {
it.field.name == null && it.timestamp.tooOld()
}
if (thisIsTooOldDynamicField) {
return other
}
return this doMerge other
}
private infix fun List>.doMerge(
other: List>
): List> {
val thisNames = this.map { it.field.name }.toSet()
val otherNames = other.map { it.field.name }.toSet()
if (isNotEmpty() && other.isNotEmpty()) {
maybeCollapseAsDynamicFieldNames(other, thisNames, otherNames)?.run { return this }
}
val thisNulls = filter { it.field.type == RecordFieldType.NULL }.associateBy { it.field.name }
val otherNulls = other.filter { it.field.type == RecordFieldType.NULL }.associateBy { it.field.name }
val thisFields = associateBy { it.field.name to it.field.type }
val otherFields = other.associateBy { it.field.name to it.field.type }
return (thisFields.keys + otherFields.keys).distinct().mapNotNull { key ->
val fieldName = key.first
val thisField = thisFields[key]
val otherField = otherFields[key]
val thisNull = thisNulls[fieldName]
val otherNull = otherNulls[fieldName]
when {
//don't output NULL type if there is matching name of non-NULL type
thisField.isNullType() && (fieldName in otherNames) && !otherField.isNullType() -> null
otherField.isNullType() && (fieldName in thisNames) && !thisField.isNullType() -> null
//do actual merge
thisField != null && otherField != null -> thisField merge otherField
//mark as nullable
thisField != null -> if (otherNull != null || fieldName !in otherNames) thisField.asNullable() else thisField
otherField != null -> if (thisNull != null || fieldName !in thisNames) otherField.asNullable() else otherField
else -> null
}
}
}
private fun List>.maybeCollapseAsDynamicFieldNames(
other: List>,
thisNames: Set,
otherNames: Set,
): List>? {
infix fun Int.differentMagnitude(other: Int): Boolean {
val thresholdDiff = properties.cardinalityDiffThreshold
val thresholdFactor = properties.cardinalityMagnitudeFactorThreshold
return (this - other).absoluteValue > thresholdDiff && (this > thresholdFactor * other || thresholdFactor * this < other)
}
fun Collection.hasDynamicFieldNames() = any { it?.isDynamicName() ?: true }
fun commonNames() = thisNames.intersect(otherNames)
fun namesCountDiffTooMuch() = thisNames.size.differentMagnitude(otherNames.size)
fun isDynamic() = thisNames.hasDynamicFieldNames() || otherNames.hasDynamicFieldNames() ||
namesCountDiffTooMuch() || commonNames().isEmpty()
return when (isDynamic()) {
true -> (this + other)
.groupBy { it.field.type }
.map { (_, values) ->
values.squashAsVariable()
}
false -> null
}
}
private fun TimestampWrapper?.isNullType() = this?.field?.type == RecordFieldType.NULL
private fun String.isDynamicName(): Boolean {
if (isBlank()) return true
if (!get(0).isJavaIdentifierStart()) return true
return !all { it == '-' || it.isJavaIdentifierPart() }
}
private fun List>.squashAsVariable(): TimestampWrapper {
return first().withField { field ->
field.copy(
name = null,
value = asSequence().map { it.field.value }.reduce { acc, wrappers ->
acc merge wrappers
},
children = asSequence().map { it.field.children }.reduce { acc, wrappers ->
acc merge wrappers
}
)
}
}
private fun TimestampWrapper.asNullable() =
withField { it.copy(nullable = wrapNow(true)) }
infix fun TimestampWrapper.merge(
other: TimestampWrapper
): TimestampWrapper {
if (field == other.field) {
return other
}
return wrapNow(
TimestampWrappedRecordField(
nullable = field.nullable mergeBoolean other.field.nullable,
name = other.field.name,
type = when (other.field.type) {
RecordFieldType.NULL -> field.type
else -> other.field.type
},
children = when {
field.children != other.field.children -> field.children merge other.field.children
else -> other.field.children
},
value = field.value merge other.field.value,
)
)
}
private infix fun TimestampWrapper.mergeBoolean(
other: TimestampWrapper
): TimestampWrapper = when {
field -> when (!other.field && timestamp.tooOld()) {
true -> wrapNow(false)
false -> this
}
else -> wrapNow(field || other.field)
}
infix fun TimestampWrappedFieldValue?.merge(
other: TimestampWrappedFieldValue?
): TimestampWrappedFieldValue? {
if (this == null || other == null || this == other) {
return other ?: this
}
val mergedHighCardinality = highCardinality mergeBoolean other.highCardinality
fun List.takeIfNotHighCardinality(): List = when {
isHighCardinality() -> emptyList()
mergedHighCardinality.field -> emptyList()
else -> this
}
val mergedValues = when (mergedHighCardinality.field) {
true -> emptyList()
false -> values mergeValueSet other.values
}
return TimestampWrappedFieldValue(
highCardinality = mergedHighCardinality mergeBoolean wrapNow(mergedValues.isHighCardinality()),
tooBig = tooBig mergeBoolean other.tooBig,
values = mergedValues.takeIfNotHighCardinality(),
)
}
private fun List<*>.isHighCardinality() = size > properties.valueSampling.maxCardinality
private infix fun List>.mergeValueSet(
other: List>
): List> {
return sequenceOf(this, other)
.flatten()
.filter { !it.timestamp.tooOld() }
.groupingBy { it.field }
.reduce { _, accumulator, element ->
if (accumulator.timestamp > element.timestamp) accumulator else element
}
.values.toList()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy