io.deephaven.kafka.ProtobufDeserializers Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-extensions-kafka Show documentation
Kafka: Integrating Engine tables with Kafka
There is a newer version: 0.37.1
Show newest version
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.kafka;

import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.DynamicMessage;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Message;
import com.google.protobuf.Parser;
import io.confluent.kafka.serializers.protobuf.KafkaProtobufDeserializer;
import io.deephaven.kafka.protobuf.Protocol;
import org.apache.kafka.common.errors.SerializationException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.utils.ByteUtils;

import java.nio.ByteBuffer;
import java.util.Objects;

class ProtobufDeserializers {

    static  Deserializer of(Protocol protocol, Parser parser) {
        if (protocol == Protocol.serdes()) {
            return new Serdes<>(ParseFrom.of(parser));
        }
        if (protocol == Protocol.raw()) {
            return new Raw<>(ParseFrom.of(parser));
        }
        throw new IllegalStateException("Unexpected protocol: " + protocol);
    }

    static Deserializer of(Protocol protocol, Descriptor descriptor) {
        if (protocol == Protocol.serdes()) {
            return new Serdes<>(ParseFrom.of(descriptor));
        }
        if (protocol == Protocol.raw()) {
            return new Raw<>(ParseFrom.of(descriptor));
        }
        throw new IllegalStateException("Unexpected protocol: " + protocol);
    }

    /**
     * A simple abstraction that allows us to easily adapt {@link Parser} and {@link Descriptor} based parsing logic
     * because it's not as simple to adapt {@link Descriptor} into a {@link Parser}.
     */
    private interface ParseFrom {
        static  ParseFrom of(Parser parser) {
            return parser::parseFrom;
        }

        static ParseFrom of(Descriptor descriptor) {
            return (data, offset, len) -> DynamicMessage.newBuilder(descriptor).mergeFrom(data, offset, len).build();
        }

        T parseFrom(byte[] data, int offset, int len) throws InvalidProtocolBufferException;
    }


    private static final class Raw implements Deserializer {
        private final ParseFrom parser;

        Raw(ParseFrom parser) {
            this.parser = Objects.requireNonNull(parser);
        }

        @Override
        public T deserialize(String topic, byte[] data) {
            if (data == null) {
                return null;
            }
            try {
                return parser.parseFrom(data, 0, data.length);
            } catch (InvalidProtocolBufferException e) {
                throw new SerializationException(e);
            }
        }
    }


    /**
     * An implementation of the kafka serdes protocol that uses a fixed protobuf Protobuf {@link Descriptor} or
     * {@link Parser}. This guarantees that all parsed messages have exactly the same
     * {@link Message#getDescriptorForType() Message Descriptor}. This is in contrast with
     * {@link KafkaProtobufDeserializer}, whose parsing logic is dynamic depending on the schema ID from {@link Message}
     * to {@link Message}. For {@link KafkaProtobufDeserializer}, only messages with the same schema ID will have the
     * same {@link Message#getDescriptorForType() Message Descriptor}.
     *
     * 
     * The benefit of guaranteeing a fixed {@link Descriptor} is that downstream consumer logic can be greatly
     * simplified.
     *
     * 

     * The potential drawback is that downstream consumers are no longer aware of schema changes.
     *
     * 
     * In the context of Deephaven, this means that we don't need to be responsible for handling schema-change logic and
     * can instead rely on the underlying compatibility guarantees that the Protobuf encoding format provides (and any
     * additional guarantees that the schema registry may enforce). Given that our underlying Deephaven table
     * definitions are immutable and we don't have a notion of "table definition changes", in the current kafka
     * architecture, we can't pass schema change information through a {@link io.deephaven.engine.table.Table} anyways.
     *
     * @param  the message type.
     */
    private static final class Serdes implements Deserializer {
        private static final byte MAGIC_BYTE = 0x0;
        private static final int MAGIC_SIZE = Byte.BYTES;
        private static final int SCHEMA_ID_SIZE = Integer.BYTES;
        private static final int MESSAGE_IX_MIN_SIZE = Byte.BYTES;
        private static final int MESSAGE_IX_OFFSET = MAGIC_SIZE + SCHEMA_ID_SIZE;
        private static final int SERDES_MIN_SIZE = MAGIC_SIZE + SCHEMA_ID_SIZE + MESSAGE_IX_MIN_SIZE;

        private final ParseFrom parser;

        Serdes(ParseFrom parser) {
            this.parser = Objects.requireNonNull(parser);
        }

        @Override
        public T deserialize(String topic, byte[] data) {
            if (data == null) {
                return null;
            }
            if (data.length < SERDES_MIN_SIZE) {
                throw new SerializationException(
                        String.format("Not serdes protocol, expected at least %d bytes", SERDES_MIN_SIZE));
            }
            if (data[0] != MAGIC_BYTE) {
                throw new SerializationException("Not serdes protocol, magic byte is incorrect");
            }
            // No need to parse schema id
            // No need to parse io.confluent.kafka.schemaregistry.protobuf.MessageIndexes
            final int protobufOffset = protobufOffset(data);
            try {
                return parser.parseFrom(data, protobufOffset, data.length - protobufOffset);
            } catch (InvalidProtocolBufferException e) {
                throw new SerializationException(e);
            }
        }

        private static int protobufOffset(byte[] data) {
            final ByteBuffer bb = ByteBuffer.wrap(data, MESSAGE_IX_OFFSET, data.length - MESSAGE_IX_OFFSET);
            // We need to go through the motions of parsing io.confluent.kafka.schemaregistry.protobuf.MessageIndexes,
            // it's variable-sized. See io.confluent.kafka.schemaregistry.protobuf.MessageIndexes.readFrom, impl driven
            // by org.apache.kafka.common.utils.ByteUtils.readVarint.
            final int numMessageIxs = ByteUtils.readVarint(bb);
            for (int i = 0; i < numMessageIxs; ++i) {
                ByteUtils.readVarint(bb);
            }
            return bb.position();
        }
    }
}