All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.debezium.converters.CloudEventsConverter Maven / Gradle / Ivy

/*
 * Copyright Debezium Authors.
 *
 * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
 */
package io.debezium.converters;

import static io.debezium.converters.SerializerType.withName;
import static org.apache.kafka.connect.data.Schema.Type.STRUCT;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

import org.apache.kafka.common.errors.SerializationException;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Schema.Type;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.json.JsonConverterConfig;
import org.apache.kafka.connect.json.JsonDeserializer;
import org.apache.kafka.connect.storage.Converter;
import org.apache.kafka.connect.storage.ConverterConfig;
import org.apache.kafka.connect.storage.ConverterType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;

import io.debezium.annotation.VisibleForTesting;
import io.debezium.config.Configuration;
import io.debezium.config.Instantiator;
import io.debezium.data.Envelope;
import io.debezium.pipeline.txmetadata.TransactionMonitor;
import io.debezium.util.SchemaNameAdjuster;

/**
 * Implementation of Converter that express schemas and objects with CloudEvents specification. The serialization
 * format can be Json or Avro.
 * 

* The serialization format of CloudEvents is configured with * {@link CloudEventsConverterConfig#CLOUDEVENTS_SERIALIZER_TYPE_CONFIG cloudevents.serializer.type} option. *

* The serialization format of the data attribute in CloudEvents is configured with * {@link CloudEventsConverterConfig#CLOUDEVENTS_DATA_SERIALIZER_TYPE_CONFIG cloudevents.data.serializer.type} option. *

* Configuration options of the underlying converters can be passed through using the {@code json} and {@code avro} * prefixes, respectively. *

* There are two modes for transferring CloudEvents as Kafka messages: structured and binary. In the structured content * mode, event metadata attributes and event data are placed into the Kafka message value section using an event format. * In the binary content mode, the value of the event data is placed into the Kafka message's value section as-is, * with the content-type header value declaring its media type; all other event attributes are mapped to the Kafka * message's header section. *

* Since Kafka converters has not support headers yet, right now CloudEvents converter use structured mode as the * default. */ public class CloudEventsConverter implements Converter { private static final String EXTENSION_NAME_PREFIX = "iodebezium"; private static final String TX_ATTRIBUTE_PREFIX = "tx"; /** * Instantiated reflectively to avoid hard dependency to Avro converter. */ private static final String CONFLUENT_AVRO_CONVERTER_CLASS = "io.confluent.connect.avro.AvroConverter"; private static final String CONFLUENT_SCHEMA_REGISTRY_URL_CONFIG = "schema.registry.url"; private static String APICURIO_AVRO_CONVERTER_CLASS = "io.apicurio.registry.utils.converter.AvroConverter"; private static final String APICURIO_SCHEMA_REGISTRY_URL_CONFIG = "apicurio.registry.url"; /** * Suffix appended to schema names of data schema in case of Avro/Avro, to keep * data schema and CE schema apart from each other */ private static final String DATA_SCHEMA_SUFFIX = "-data"; private static final Logger LOGGER = LoggerFactory.getLogger(CloudEventsConverter.class); private static Method CONVERT_TO_CONNECT_METHOD; static { try { CONVERT_TO_CONNECT_METHOD = JsonConverter.class.getDeclaredMethod("convertToConnect", Schema.class, JsonNode.class); CONVERT_TO_CONNECT_METHOD.setAccessible(true); } catch (NoSuchMethodException e) { throw new DataException(e.getCause()); } } private SerializerType ceSerializerType = withName(CloudEventsConverterConfig.CLOUDEVENTS_SERIALIZER_TYPE_DEFAULT); private SerializerType dataSerializerType = withName(CloudEventsConverterConfig.CLOUDEVENTS_DATA_SERIALIZER_TYPE_DEFAULT); private final JsonConverter jsonCloudEventsConverter = new JsonConverter(); private final JsonConverter jsonDataConverter = new JsonConverter(); private boolean enableJsonSchemas; private final JsonDeserializer jsonDeserializer = new JsonDeserializer(); private Converter avroConverter; private List schemaRegistryUrls; public CloudEventsConverter() { this(null); } public CloudEventsConverter(Converter avroConverter) { this.avroConverter = avroConverter; } @Override public void configure(Map configs, boolean isKey) { Map conf = new HashMap<>(configs); Configuration jsonConfig = Configuration.from(configs).subset("json", true); conf.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); CloudEventsConverterConfig ceConfig = new CloudEventsConverterConfig(conf); ceSerializerType = ceConfig.cloudeventsSerializerType(); dataSerializerType = ceConfig.cloudeventsDataSerializerTypeConfig(); boolean usingAvro = false; if (ceSerializerType == SerializerType.JSON) { Map ceJsonConfig = jsonConfig.asMap(); ceJsonConfig.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, "false"); jsonCloudEventsConverter.configure(ceJsonConfig, isKey); } else { usingAvro = true; if (dataSerializerType == SerializerType.JSON) { throw new IllegalStateException("Cannot use 'application/json' data content type within Avro events"); } } if (dataSerializerType == SerializerType.JSON) { enableJsonSchemas = jsonConfig.getBoolean(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, JsonConverterConfig.SCHEMAS_ENABLE_DEFAULT); jsonDataConverter.configure(jsonConfig.asMap(), true); } else { usingAvro = true; } if (usingAvro) { Configuration avroConfig = Configuration.from(configs).subset("avro", true); boolean useApicurio = true; if (avroConfig.hasKey(APICURIO_SCHEMA_REGISTRY_URL_CONFIG)) { schemaRegistryUrls = avroConfig.getStrings(APICURIO_SCHEMA_REGISTRY_URL_CONFIG, ","); } else if (avroConfig.hasKey(CONFLUENT_SCHEMA_REGISTRY_URL_CONFIG)) { schemaRegistryUrls = avroConfig.getStrings(CONFLUENT_SCHEMA_REGISTRY_URL_CONFIG, ","); useApicurio = false; } if (schemaRegistryUrls == null || schemaRegistryUrls.isEmpty()) { throw new DataException("Need URL(s) for schema registry instances for CloudEvents when using Apache Avro"); } if (avroConverter == null) { avroConverter = Instantiator.getInstance(useApicurio ? APICURIO_AVRO_CONVERTER_CLASS : CONFLUENT_AVRO_CONVERTER_CLASS, null, null); LOGGER.info("Using Avro converter {}", avroConverter.getClass().getName()); avroConverter.configure(avroConfig.asMap(), false); } } } @Override public byte[] fromConnectData(String topic, Schema schema, Object value) { if (schema == null || value == null) { return null; } if (!Envelope.isEnvelopeSchema(schema)) { // TODO Handling of non-data messages like schema change or transaction metadata return null; } if (schema.type() != STRUCT) { throw new DataException("Mismatching schema"); } RecordParser parser = RecordParser.create(schema, value); CloudEventsMaker maker = CloudEventsMaker.create(parser, dataSerializerType, (schemaRegistryUrls == null) ? null : String.join(",", schemaRegistryUrls)); if (ceSerializerType == SerializerType.JSON) { if (dataSerializerType == SerializerType.JSON) { // JSON - JSON (with schema in data) if (enableJsonSchemas) { SchemaBuilder dummy = SchemaBuilder.struct(); SchemaAndValue cloudEvent = convertToCloudEventsFormat(parser, maker, dummy, null, new Struct(dummy)); // need to create a JSON node with schema + payload first byte[] data = jsonDataConverter.fromConnectData(topic, maker.ceDataAttributeSchema(), maker.ceDataAttribute()); // replace the dummy '{}' in '"data" : {}' with the schema + payload JSON node; // the event itself must not have schema enabled, so to be a proper CloudEvent byte[] cloudEventJson = jsonCloudEventsConverter.fromConnectData(topic, cloudEvent.schema(), cloudEvent.value()); ByteBuffer cloudEventWithData = ByteBuffer.allocate(cloudEventJson.length + data.length - 2); cloudEventWithData.put(cloudEventJson, 0, cloudEventJson.length - 3); cloudEventWithData.put(data); cloudEventWithData.put((byte) '}'); return cloudEventWithData.array(); } // JSON - JSON (without schema); can just use the regular JSON converter for the entire event else { SchemaAndValue cloudEvent = convertToCloudEventsFormat(parser, maker, maker.ceDataAttributeSchema(), null, maker.ceDataAttribute()); return jsonCloudEventsConverter.fromConnectData(topic, cloudEvent.schema(), cloudEvent.value()); } } // JSON - Avro; need to convert "data" to Avro first else { SchemaAndValue cloudEvent = convertToCloudEventsFormatWithDataAsAvro(topic, parser, maker); return jsonCloudEventsConverter.fromConnectData(topic, cloudEvent.schema(), cloudEvent.value()); } } // Avro - Avro; need to convert "data" to Avro first else { SchemaAndValue cloudEvent = convertToCloudEventsFormatWithDataAsAvro(topic + DATA_SCHEMA_SUFFIX, parser, maker); return avroConverter.fromConnectData(topic, cloudEvent.schema(), cloudEvent.value()); } } /** * Creates a CloudEvents wrapper, converting the "data" to Avro. */ private SchemaAndValue convertToCloudEventsFormatWithDataAsAvro(String topic, RecordParser parser, CloudEventsMaker maker) { Schema dataSchemaType = Schema.BYTES_SCHEMA; byte[] serializedData = avroConverter.fromConnectData(topic, maker.ceDataAttributeSchema(), maker.ceDataAttribute()); String dataSchemaUri = maker.ceDataschemaUri(getSchemaIdFromAvroMessage(serializedData)); return convertToCloudEventsFormat(parser, maker, dataSchemaType, dataSchemaUri, serializedData); } /** * Obtains the schema id from the given Avro record. They are prefixed by one magic byte, * followed by an int for the schem id. */ private String getSchemaIdFromAvroMessage(byte[] serializedData) { return String.valueOf(ByteBuffer.wrap(serializedData, 1, 5).getInt()); } @Override public SchemaAndValue toConnectData(String topic, byte[] value) { switch (ceSerializerType) { case JSON: JsonNode jsonValue; try { jsonValue = jsonDeserializer.deserialize(topic, value); byte[] data = jsonValue.get(CloudEventsMaker.FieldName.DATA).binaryValue(); SchemaAndValue dataField = reconvertData(topic, data, dataSerializerType, enableJsonSchemas); Schema incompleteSchema = jsonCloudEventsConverter.asConnectSchema(jsonValue); SchemaBuilder builder = SchemaBuilder.struct(); for (Field ceField : incompleteSchema.fields()) { if (ceField.name().equals(CloudEventsMaker.FieldName.DATA)) { builder.field(ceField.name(), dataField.schema()); } else { builder.field(ceField.name(), ceField.schema()); } } builder.name(incompleteSchema.name()); builder.version(incompleteSchema.version()); builder.doc(incompleteSchema.doc()); for (Map.Entry entry : incompleteSchema.parameters().entrySet()) { builder.parameter(entry.getKey(), entry.getValue()); } Schema schema = builder.build(); Struct incompleteStruct = (Struct) CONVERT_TO_CONNECT_METHOD.invoke(jsonCloudEventsConverter, incompleteSchema, jsonValue); Struct struct = new Struct(schema); for (Field ceField : incompleteSchema.fields()) { if (ceField.name().equals(CloudEventsMaker.FieldName.DATA)) { struct.put(ceField, dataField.value()); } struct.put(ceField, incompleteStruct.get(ceField)); } return new SchemaAndValue(schema, value); } catch (SerializationException | IOException | IllegalAccessException | InvocationTargetException e) { throw new DataException("Converting byte[] to Kafka Connect data failed due to serialization error: ", e); } case AVRO: // First reconvert the whole CloudEvents // Then reconvert the "data" field SchemaAndValue ceSchemaAndValue = avroConverter.toConnectData(topic, value); Schema incompleteSchema = ceSchemaAndValue.schema(); Struct ceValue = (Struct) ceSchemaAndValue.value(); byte[] data = ceValue.getBytes(CloudEventsMaker.FieldName.DATA); SchemaAndValue dataSchemaAndValue = avroConverter.toConnectData(topic + DATA_SCHEMA_SUFFIX, data); SchemaBuilder builder = SchemaBuilder.struct(); for (Field ceField : incompleteSchema.fields()) { if (ceField.name().equals(CloudEventsMaker.FieldName.DATA)) { builder.field(ceField.name(), dataSchemaAndValue.schema()); } else { builder.field(ceField.name(), ceField.schema()); } } builder.name(incompleteSchema.name()); builder.version(incompleteSchema.version()); builder.doc(incompleteSchema.doc()); if (incompleteSchema.parameters() != null) { for (Map.Entry entry : incompleteSchema.parameters().entrySet()) { builder.parameter(entry.getKey(), entry.getValue()); } } Schema schema = builder.build(); Struct struct = new Struct(schema); for (Field field : schema.fields()) { if (field.name().equals(CloudEventsMaker.FieldName.DATA)) { struct.put(field, dataSchemaAndValue.value()); } else { struct.put(field, ceValue.get(field)); } } return new SchemaAndValue(schema, struct); } return SchemaAndValue.NULL; } private SchemaAndValue reconvertData(String topic, byte[] serializedData, SerializerType dataType, Boolean enableSchemas) { switch (dataType) { case JSON: JsonNode jsonValue; try { jsonValue = jsonDeserializer.deserialize(topic, serializedData); } catch (SerializationException e) { throw new DataException("Converting byte[] to Kafka Connect data failed due to serialization error: ", e); } if (!enableSchemas) { ObjectNode envelope = JsonNodeFactory.instance.objectNode(); envelope.set(CloudEventsMaker.FieldName.SCHEMA_FIELD_NAME, null); envelope.set(CloudEventsMaker.FieldName.PAYLOAD_FIELD_NAME, jsonValue); jsonValue = envelope; } Schema schema = jsonCloudEventsConverter.asConnectSchema(jsonValue.get(CloudEventsMaker.FieldName.SCHEMA_FIELD_NAME)); try { return new SchemaAndValue( schema, CONVERT_TO_CONNECT_METHOD.invoke(jsonCloudEventsConverter, schema, jsonValue.get(CloudEventsMaker.FieldName.PAYLOAD_FIELD_NAME))); } catch (IllegalAccessException | InvocationTargetException e) { throw new DataException(e.getCause()); } case AVRO: return avroConverter.toConnectData(topic, serializedData); default: throw new DataException("No such serializer for \"" + dataSerializerType + "\" format"); } } private SchemaAndValue convertToCloudEventsFormat(RecordParser parser, CloudEventsMaker maker, Schema dataSchemaType, String dataSchema, Object serializedData) { SchemaNameAdjuster schemaNameAdjuster = SchemaNameAdjuster.create(); Struct source = parser.source(); Schema sourceSchema = parser.source().schema(); final Struct transaction = parser.transaction(); // construct schema of CloudEvents envelope CESchemaBuilder ceSchemaBuilder = defineSchema() .withName(schemaNameAdjuster.adjust(maker.ceEnvelopeSchemaName())) .withSchema(CloudEventsMaker.FieldName.ID, Schema.STRING_SCHEMA) .withSchema(CloudEventsMaker.FieldName.SOURCE, Schema.STRING_SCHEMA) .withSchema(CloudEventsMaker.FieldName.SPECVERSION, Schema.STRING_SCHEMA) .withSchema(CloudEventsMaker.FieldName.TYPE, Schema.STRING_SCHEMA) .withSchema(CloudEventsMaker.FieldName.TIME, Schema.STRING_SCHEMA) .withSchema(CloudEventsMaker.FieldName.DATACONTENTTYPE, Schema.STRING_SCHEMA); if (dataSchema != null) { ceSchemaBuilder.withSchema(CloudEventsMaker.FieldName.DATASCHEMA, Schema.STRING_SCHEMA); } ceSchemaBuilder.withSchema(adjustExtensionName(Envelope.FieldName.OPERATION), Schema.STRING_SCHEMA); ceSchemaFromSchema(sourceSchema, ceSchemaBuilder, CloudEventsConverter::adjustExtensionName, false); // transaction attributes ceSchemaFromSchema(TransactionMonitor.TRANSACTION_BLOCK_SCHEMA, ceSchemaBuilder, CloudEventsConverter::txExtensionName, true); ceSchemaBuilder.withSchema(CloudEventsMaker.FieldName.DATA, dataSchemaType); Schema ceSchema = ceSchemaBuilder.build(); // construct value of CloudEvents Envelope CEValueBuilder ceValueBuilder = withValue(ceSchema) .withValue(CloudEventsMaker.FieldName.ID, maker.ceId()) .withValue(CloudEventsMaker.FieldName.SOURCE, maker.ceSource(source.getString("name"))) .withValue(CloudEventsMaker.FieldName.SPECVERSION, maker.ceSpecversion()) .withValue(CloudEventsMaker.FieldName.TYPE, maker.ceType()) .withValue(CloudEventsMaker.FieldName.TIME, maker.ceTime()) .withValue(CloudEventsMaker.FieldName.DATACONTENTTYPE, maker.ceDatacontenttype()); if (dataSchema != null) { ceValueBuilder.withValue(CloudEventsMaker.FieldName.DATASCHEMA, dataSchema); } ceValueBuilder.withValue(adjustExtensionName(Envelope.FieldName.OPERATION), parser.op()); ceValueFromStruct(source, sourceSchema, ceValueBuilder, CloudEventsConverter::adjustExtensionName); if (transaction != null) { ceValueFromStruct(transaction, TransactionMonitor.TRANSACTION_BLOCK_SCHEMA, ceValueBuilder, CloudEventsConverter::txExtensionName); } ceValueBuilder.withValue(CloudEventsMaker.FieldName.DATA, serializedData); return new SchemaAndValue(ceSchema, ceValueBuilder.build()); } private void ceValueFromStruct(Struct struct, Schema schema, CEValueBuilder ceValueBuilder, Function nameMapper) { for (Field field : schema.fields()) { Object value = struct.get(field); if (field.schema().type() == Type.INT64 && value != null) { value = String.valueOf((long) value); } ceValueBuilder.withValue(nameMapper.apply(field.name()), value); } } private void ceSchemaFromSchema(Schema schema, CESchemaBuilder ceSchemaBuilder, Function nameMapper, boolean alwaysOptional) { for (Field field : schema.fields()) { ceSchemaBuilder.withSchema(nameMapper.apply(field.name()), convertToCeExtensionSchema(field.schema(), alwaysOptional)); } } /** * Converts the given source attribute schema into a corresponding CE extension schema. * The types supported there are limited, e.g. int64 can only be represented as string. */ private Schema convertToCeExtensionSchema(Schema schema, boolean alwaysOptional) { SchemaBuilder ceExtensionSchema; if (schema.type() == Type.BOOLEAN) { ceExtensionSchema = SchemaBuilder.bool(); } // all numbers up to int32 go as int32 else if (schema.type() == Type.INT8 || schema.type() == Type.INT16 || schema.type() == Type.INT16 || schema.type() == Type.INT32) { ceExtensionSchema = SchemaBuilder.int32(); } // int64 isn't supported as per CE spec else if (schema.type() == Type.STRING || schema.type() == Type.INT64) { ceExtensionSchema = SchemaBuilder.string(); } // further attribute types may be supported in the future, but the ones above are the ones // currently used in the "source" block of Debezium events else { throw new IllegalArgumentException("Source field of type " + schema.type() + " cannot be converted into CloudEvents extension attribute."); } if (alwaysOptional || schema.isOptional()) { ceExtensionSchema.optional(); } return ceExtensionSchema.build(); } private Schema convertToCeExtensionSchema(Schema schema) { return convertToCeExtensionSchema(schema, false); } private static CESchemaBuilder defineSchema() { return new CESchemaBuilder() { private final SchemaBuilder builder = SchemaBuilder.struct(); @Override public CESchemaBuilder withName(String name) { builder.name(name); return this; } @Override public CESchemaBuilder withSchema(String fieldName, Schema fieldSchema) { builder.field(fieldName, fieldSchema); return this; } @Override public Schema build() { return builder.build(); } }; } private static CEValueBuilder withValue(Schema schema) { return new CEValueBuilder() { private final Schema ceSchema = schema; private final Struct ceValue = new Struct(ceSchema); @Override public CEValueBuilder withValue(String fieldName, Object value) { if (ceSchema.field(fieldName) == null) { throw new DataException(fieldName + " is not a valid field name"); } ceValue.put(fieldName, value); return this; } @Override public Struct build() { return ceValue; } }; } /** * Builder of a CloudEvents envelope schema. */ public interface CESchemaBuilder { CESchemaBuilder withName(String name); CESchemaBuilder withSchema(String fieldName, Schema fieldSchema); Schema build(); } /** * Builder of a CloudEvents value. */ public interface CEValueBuilder { CEValueBuilder withValue(String fieldName, Object value); Struct build(); } /** * Adjust the name of CloudEvents attributes for Debezium events, following CloudEvents * attribute * naming convention as follows: * *

    *
  • prefixed with {@link #EXTENSION_NAME_PREFIX}
  • *
  • CloudEvents attribute names MUST consist of lower-case letters ('a' to 'z') or digits ('0' to '9') from the ASCII * character set, so any other characters are removed
  • *
* * @param original the original field name * @return the valid extension attribute name */ @VisibleForTesting static String adjustExtensionName(String original) { StringBuilder sb = new StringBuilder(EXTENSION_NAME_PREFIX); char c; for (int i = 0; i != original.length(); ++i) { c = original.charAt(i); if (isValidExtensionNameCharacter(c)) { sb.append(c); } } return sb.toString(); } private static String txExtensionName(String name) { return adjustExtensionName(TX_ATTRIBUTE_PREFIX + name); } private static boolean isValidExtensionNameCharacter(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy