
com.infobip.kafkistry.recordstructure.AnalyzeContext.kt Maven / Gradle / Ivy
package com.infobip.kafkistry.recordstructure
import com.fasterxml.jackson.core.JsonParseException
import com.fasterxml.jackson.core.JsonProcessingException
import com.fasterxml.jackson.databind.ObjectMapper
import com.google.common.base.Utf8
import com.infobip.kafkistry.model.*
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.header.Headers
import java.io.CharConversionException
import java.util.concurrent.ConcurrentHashMap
import kotlin.math.absoluteValue
open class AnalyzeContext(
properties: RecordAnalyzerProperties,
private val analyzeFilter: AnalyzeFilter,
private val objectMapper: ObjectMapper,
now: Long = generateTimestamp(),
private val topic: TopicName,
private val cluster: ClusterRef,
) : MergingContext(properties, now) {
private val shouldSampleValues = analyzeFilter.shouldSampleValues(cluster, topic)
fun analyzeRecord(
consumerRecord: ConsumerRecord,
clusterRecordsStructures: ClusterRecordsStructuresMap,
) {
val recordsStructure = analyzeRecordStructure(
consumerRecord.headers(), consumerRecord.key(), consumerRecord.value()
)
val recordsStructures = clusterRecordsStructures.computeIfAbsent(cluster.identifier) { ConcurrentHashMap() }
recordsStructures.merge(consumerRecord.topic(), wrapNow(recordsStructure)) { old, new ->
wrapNow(old.field merge new.field)
}
}
private fun analyzeRecordStructure(
headers: Headers, recordKey: ByteArray?, recordPayload: ByteArray?
): TimestampWrappedRecordsStructure {
val headersMap = headers.associate { it.key() to it.value() }
val headersValue = analyzeValue(headersMap, null)
val recordSize = RecordTimedSize(
valueSize = timedValueOf(recordPayload?.size ?: 0),
keySize = timedValueOf(recordKey?.size ?: 0),
headersSize = timedValueOf(headers.sumOf { it.key().length + (it.value()?.size ?: 0) }),
)
if (recordPayload == null) {
return TimestampWrappedRecordsStructure(
PayloadType.NULL,
timestampWrappedHeaderFields = listOf(headersValue),
timestampWrappedJsonFields = null,
nullable = wrapNow(true),
size = recordSize,
)
}
return try {
val input = objectMapper.readValue(recordPayload, Any::class.java)
val value = analyzeValue(input, null)
TimestampWrappedRecordsStructure(
PayloadType.JSON,
timestampWrappedHeaderFields = listOf(headersValue),
timestampWrappedJsonFields = listOf(value),
nullable = wrapNow(false),
size = recordSize,
)
} catch (e: Exception) {
when (e) {
is JsonParseException, is JsonProcessingException, is CharConversionException -> {
timestampWrappedUnknownRecordsStructure(listOf(headersValue), false, recordSize)
}
else -> throw e
}
}
}
private fun timedValueOf(value: Int): TimedHistory {
return TimedHistory.of(wrapNow(IntNumberSummary.ofSingle(value)))
}
private fun analyzeValue(
input: Any?,
name: String?,
parentPath: List> = emptyList()
): TimestampWrapper {
return when (input) {
null -> {
TimestampWrappedRecordField(
name = name,
type = RecordFieldType.NULL,
nullable = wrapNow(true)
)
}
is Map<*, *> -> {
val objectFields = input.map { (key, value) ->
analyzeValue(value, key.toString(), parentPath + (RecordFieldType.OBJECT to name))
}
TimestampWrappedRecordField(
name = name,
type = RecordFieldType.OBJECT,
nullable = wrapNow(false),
children = objectFields
)
}
is List<*> -> {
val listElementStruct = input
.map { analyzeValue(it, null, parentPath + (RecordFieldType.ARRAY to name)) }
.takeIf { it.isNotEmpty() }
?.groupBy { it.field.type }
?.mapValues { (_, valuesOfType) ->
valuesOfType.reduce { acc, timestampWrapper ->
acc merge timestampWrapper
}
}
?.values?.toList().orEmpty()
TimestampWrappedRecordField(
name = name,
type = RecordFieldType.ARRAY,
nullable = wrapNow(false),
children = listElementStruct,
)
}
else -> {
val aInput = if (input is ByteArray) {
val stringInput = input.takeIf { Utf8.isWellFormed(input) }?.decodeToString()
stringInput?.toLongOrNull()?.let {
when (it in (Int.MIN_VALUE..Int.MAX_VALUE)) {
true -> it.toInt()
false -> it
}
} ?: stringInput ?: input
} else {
input
}
val type = TypeMappings.forClass(aInput.javaClass)
TimestampWrappedRecordField(
name = name,
type = type,
nullable = wrapNow(false),
value = when (shouldSampleValues(parentPath + (type to name))) {
true -> sampleValue(aInput)
false -> null
}
)
}
}.let { wrapNow(it) }
}
private fun shouldSampleValues(jsonPath: List>): Boolean {
return shouldSampleValues && analyzeFilter.shouldSampleValuesForPath(jsonPath)
}
private fun sampleValue(value: Any): TimestampWrappedFieldValue {
val nowFalse = wrapNow(false)
if (value is String && value.length > properties.valueSampling.maxStringLength) {
return TimestampWrappedFieldValue(nowFalse, tooBig = wrapNow(true))
}
if (value is Number && value.toLong().absoluteValue > properties.valueSampling.maxNumberAbs) {
return TimestampWrappedFieldValue(nowFalse, tooBig = wrapNow(true))
}
return TimestampWrappedFieldValue(
highCardinality = nowFalse,
tooBig = nowFalse,
values = listOf(wrapNow(value)),
)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy