All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.avrokotlin.avro4k.AvroSingleObject.kt Maven / Gradle / Ivy

Go to download

Avro binary format support for kotlin, built on top of kotlinx-serialization

The newest version!
package com.github.avrokotlin.avro4k

import kotlinx.serialization.BinaryFormat
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerializationException
import kotlinx.serialization.SerializationStrategy
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.serializer
import org.apache.avro.Schema
import org.apache.avro.SchemaNormalization
import java.io.ByteArrayOutputStream
import java.io.InputStream
import java.io.OutputStream
import java.nio.ByteBuffer
import java.nio.ByteOrder

/**
 * Single Avro objects are encoded as follows:
 * - A two-byte marker, C3 01, to show that the message is Avro and uses this single-record format (version 1).
 * - The 8-byte little-endian CRC-64-AVRO fingerprint of the object’s schema.
 * - The Avro object encoded using Avro’s binary encoding.
 *
 * [spec](https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding)
 *
 * @param schemaRegistry a function to find a schema by its fingerprint, and returns null when not found. You should use [SchemaNormalization.parsingFingerprint64] to generate the fingerprint.
 */
@ExperimentalSerializationApi
public class AvroSingleObject(
    private val schemaRegistry: (fingerprint: Long) -> Schema?,
    @PublishedApi
    internal val avro: Avro = Avro,
) : BinaryFormat {
    override val serializersModule: SerializersModule
        get() = avro.serializersModule

    private fun Schema.crc64avro(): ByteArray = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", this)

    public fun  encodeToStream(
        writerSchema: Schema,
        serializer: SerializationStrategy,
        value: T,
        outputStream: OutputStream,
    ) {
        outputStream.write(MAGIC_BYTE)
        outputStream.write(FORMAT_VERSION)
        outputStream.write(writerSchema.crc64avro())
        avro.encodeToStream(writerSchema, serializer, value, outputStream)
    }

    public fun  decodeFromStream(
        deserializer: DeserializationStrategy,
        inputStream: InputStream,
    ): T {
        check(inputStream.read() == MAGIC_BYTE) { "Not a valid single-object avro format, bad magic byte" }
        check(inputStream.read() == FORMAT_VERSION) { "Not a valid single-object avro format, bad version byte" }
        val fingerprint = ByteBuffer.wrap(ByteArray(8).apply { inputStream.read(this) }).order(ByteOrder.LITTLE_ENDIAN).getLong()
        val writerSchema =
            schemaRegistry(fingerprint) ?: throw SerializationException("schema not found for the given object's schema fingerprint 0x${fingerprint.toString(16)}")

        return avro.decodeFromStream(writerSchema, deserializer, inputStream)
    }

    public override fun  decodeFromByteArray(
        deserializer: DeserializationStrategy,
        bytes: ByteArray,
    ): T {
        return bytes.inputStream().use {
            decodeFromStream(deserializer, it)
        }
    }

    override fun  encodeToByteArray(
        serializer: SerializationStrategy,
        value: T,
    ): ByteArray {
        return encodeToByteArray(avro.schema(serializer.descriptor), serializer, value)
    }
}

private const val MAGIC_BYTE: Int = 0xC3
private const val FORMAT_VERSION: Int = 1

public fun  AvroSingleObject.encodeToByteArray(
    writerSchema: Schema,
    serializer: SerializationStrategy,
    value: T,
): ByteArray =
    ByteArrayOutputStream().apply {
        encodeToStream(writerSchema, serializer, value, this)
    }.toByteArray()

public inline fun  AvroSingleObject.encodeToByteArray(
    writerSchema: Schema,
    value: T,
): ByteArray = encodeToByteArray(writerSchema, avro.serializersModule.serializer(), value)

public inline fun  AvroSingleObject.encodeToByteArray(value: T): ByteArray {
    val serializer = avro.serializersModule.serializer()
    return encodeToByteArray(avro.schema(serializer), serializer, value)
}

public inline fun  AvroSingleObject.decodeFromByteArray(bytes: ByteArray): T = decodeFromByteArray(avro.serializersModule.serializer(), bytes)




© 2015 - 2024 Weber Informatics LLC | Privacy Policy