com.github.avrokotlin.avro4k.AvroObjectContainer.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of avro4k-core Show documentation
Show all versions of avro4k-core Show documentation
Avro binary format support for kotlin, built on top of kotlinx-serialization
The newest version!
package com.github.avrokotlin.avro4k
import com.github.avrokotlin.avro4k.internal.decodeWithBinaryDecoder
import com.github.avrokotlin.avro4k.internal.encodeWithBinaryEncoder
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerializationStrategy
import kotlinx.serialization.serializer
import org.apache.avro.Schema
import org.apache.avro.file.CodecFactory
import org.apache.avro.file.DataFileStream
import org.apache.avro.file.DataFileWriter
import org.apache.avro.io.DatumReader
import org.apache.avro.io.DatumWriter
import java.io.Closeable
import java.io.InputStream
import java.io.OutputStream
/**
* Encode and decode values in object container files, also known as avro data file format.
*
* [spec](https://avro.apache.org/docs/1.11.1/specification/#object-container-files)
*/
@ExperimentalSerializationApi
public sealed class AvroObjectContainer(
@PublishedApi
internal val avro: Avro,
) {
public companion object Default : AvroObjectContainer(Avro)
/**
* Opens a writer to allow encoding values to avro and writing them to the output stream.
*
* Note that the output stream is not closed after the operation, which means you need to handle it to avoid resource leaks.
*/
public fun openWriter(
schema: Schema,
serializer: SerializationStrategy,
outputStream: OutputStream,
builder: AvroObjectContainerBuilder.() -> Unit = {},
): AvroObjectContainerWriter {
val datumWriter: DatumWriter = KotlinxSerializationDatumWriter(serializer, avro)
val dataFileWriter = DataFileWriter(datumWriter)
builder(AvroObjectContainerBuilder(dataFileWriter))
dataFileWriter.create(schema, outputStream)
return AvroObjectContainerWriter(dataFileWriter)
}
public fun decodeFromStream(
deserializer: DeserializationStrategy,
inputStream: InputStream,
metadataDumper: AvroObjectContainerMetadataDumper.() -> Unit = {},
): Sequence {
return sequence {
val datumReader: DatumReader = KotlinxSerializationDatumReader(deserializer, avro)
val dataFileStream = DataFileStream(inputStream, datumReader)
metadataDumper(AvroObjectContainerMetadataDumper(dataFileStream))
yieldAll(dataFileStream.iterator())
}.constrainOnce()
}
}
public class AvroObjectContainerWriter internal constructor(
private val writer: DataFileWriter,
) : Closeable {
public fun writeValue(value: T) {
writer.append(value)
}
override fun close() {
writer.flush()
}
}
private class AvroObjectContainerImpl(avro: Avro) : AvroObjectContainer(avro)
public fun AvroObjectContainer(
from: Avro = Avro,
builderAction: AvroBuilder.() -> Unit,
): AvroObjectContainer {
return AvroObjectContainerImpl(Avro(from, builderAction))
}
@ExperimentalSerializationApi
public inline fun AvroObjectContainer.openWriter(
outputStream: OutputStream,
noinline builder: AvroObjectContainerBuilder.() -> Unit = {},
): AvroObjectContainerWriter {
val serializer = avro.serializersModule.serializer()
return openWriter(avro.schema(serializer), serializer, outputStream, builder)
}
@ExperimentalSerializationApi
public inline fun AvroObjectContainer.decodeFromStream(
inputStream: InputStream,
noinline metadataDumper: AvroObjectContainerMetadataDumper.() -> Unit = {},
): Sequence {
val serializer = avro.serializersModule.serializer()
return decodeFromStream(serializer, inputStream, metadataDumper)
}
public class AvroObjectContainerBuilder internal constructor(private val fileWriter: DataFileWriter<*>) {
public fun metadata(
key: String,
value: ByteArray,
) {
fileWriter.setMeta(key, value)
}
public fun metadata(
key: String,
value: String,
) {
fileWriter.setMeta(key, value)
}
public fun metadata(
key: String,
value: Long,
) {
fileWriter.setMeta(key, value)
}
public fun syncInterval(value: Int) {
fileWriter.setSyncInterval(value)
}
public fun codec(codec: CodecFactory) {
fileWriter.setCodec(codec)
}
}
public class AvroObjectContainerMetadataDumper internal constructor(private val fileStream: DataFileStream<*>) {
public fun metadata(key: String): MetadataAccessor? {
return fileStream.getMeta(key)?.let { MetadataAccessor(it) }
}
public inner class MetadataAccessor(private val value: ByteArray) {
public fun asBytes(): ByteArray = value
public fun asString(): String = value.decodeToString()
public fun asLong(): Long = asString().toLong()
}
}
private class KotlinxSerializationDatumWriter(
private val serializer: SerializationStrategy,
private val avro: Avro,
) : DatumWriter {
private lateinit var writerSchema: Schema
override fun setSchema(schema: Schema) {
writerSchema = schema
}
override fun write(
datum: T,
encoder: org.apache.avro.io.Encoder,
) {
avro.encodeWithBinaryEncoder(writerSchema, serializer, datum, encoder)
}
}
private class KotlinxSerializationDatumReader(
private val deserializer: DeserializationStrategy,
private val avro: Avro,
) : DatumReader {
private lateinit var writerSchema: Schema
override fun setSchema(schema: Schema) {
writerSchema = schema
}
override fun read(
reuse: T?,
decoder: org.apache.avro.io.Decoder,
): T {
return avro.decodeWithBinaryDecoder(writerSchema, deserializer, decoder)
}
}