All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.services.schemaregistry.kafkaconnect.avrodata.AvroData Maven / Gradle / Ivy

/*
 * Copyright 2019 Confluent Inc.
 * Portions Copyright 2020 Amazon.com, Inc. or its affiliates.
 * All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.amazonaws.services.schemaregistry.kafkaconnect.avrodata;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.IntNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.avro.JsonProperties;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericEnumSymbol;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.util.internal.JacksonUtils;
import org.apache.kafka.common.cache.Cache;
import org.apache.kafka.common.cache.LRUCache;
import org.apache.kafka.common.cache.SynchronizedCache;
import org.apache.kafka.connect.data.ConnectSchema;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.errors.DataException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
 * Utilities for converting between our runtime data format and Avro, and (de)serializing that data.
 */
public class AvroData {

    private static final Logger log = LoggerFactory.getLogger(AvroData.class);

    public static final String NAMESPACE = "com.amazonaws.services.schemaregistry.kafkaconnect.avrodata";
    // Avro does not permit empty schema names, which might be the ideal default since we also are
    // not permitted to simply omit the name. Instead, make it very clear where the default is
    // coming from.
    public static final String DEFAULT_SCHEMA_NAME = "ConnectDefault";
    public static final String DEFAULT_SCHEMA_FULL_NAME = NAMESPACE + "." + DEFAULT_SCHEMA_NAME;
    public static final String MAP_ENTRY_TYPE_NAME = "MapEntry";
    public static final String KEY_FIELD = "key";
    public static final String VALUE_FIELD = "value";

    public static final String CONNECT_NAME_PROP = "connect.name";
    public static final String CONNECT_DOC_PROP = "connect.doc";
    public static final String CONNECT_VERSION_PROP = "connect.version";
    public static final String CONNECT_DEFAULT_VALUE_PROP = "connect.default";
    public static final String CONNECT_PARAMETERS_PROP = "connect.parameters";
    public static final String CONNECT_INTERNAL_TYPE_NAME = "connect.internal.type";
    public static final String AVRO_RECORD_DOC_PROP = NAMESPACE + ".record.doc";
    public static final String AVRO_ENUM_DOC_PREFIX_PROP = NAMESPACE + ".enum.doc.";
    public static final String AVRO_FIELD_DOC_PREFIX_PROP = NAMESPACE + ".field.doc.";
    //This property is used to determine whether a default value in the Connect schema originated
    //from an Avro field default
    public static final String AVRO_FIELD_DEFAULT_FLAG_PROP = NAMESPACE + ".field.default";
    public static final String AVRO_ENUM_DEFAULT_PREFIX_PROP = NAMESPACE + ".enum.default.";

    public static final String CONNECT_TYPE_PROP = "connect.type";

    public static final String CONNECT_TYPE_INT8 = "int8";
    public static final String CONNECT_TYPE_INT16 = "int16";

    public static final String AVRO_TYPE_UNION = NAMESPACE + ".Union";
    public static final String AVRO_TYPE_ENUM = NAMESPACE + ".Enum";

    public static final String AVRO_TYPE_ANYTHING = NAMESPACE + ".Anything";

    private static final Map NON_AVRO_TYPES_BY_TYPE_CODE = new HashMap<>();

    static {
        NON_AVRO_TYPES_BY_TYPE_CODE.put(CONNECT_TYPE_INT8, Schema.Type.INT8);
        NON_AVRO_TYPES_BY_TYPE_CODE.put(CONNECT_TYPE_INT16, Schema.Type.INT16);
    }

    // Avro Java object types used by Connect schema types
    private static final Map> SIMPLE_AVRO_SCHEMA_TYPES = new HashMap<>();

    static {
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.INT32, Arrays.asList((Class) Integer.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.INT64, Arrays.asList((Class) Long.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.FLOAT32, Arrays.asList((Class) Float.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.FLOAT64, Arrays.asList((Class) Double.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.BOOLEAN, Arrays.asList((Class) Boolean.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.STRING, Arrays.asList((Class) CharSequence.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(
                Schema.Type.BYTES,
                Arrays.asList((Class) ByteBuffer.class, (Class) byte[].class, (Class) GenericFixed.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.ARRAY, Arrays.asList((Class) Collection.class));
        SIMPLE_AVRO_SCHEMA_TYPES.put(Schema.Type.MAP, Arrays.asList((Class) Map.class));
    }

    private static final Map CONNECT_TYPES_TO_AVRO_TYPES
            = new HashMap<>();

    static {
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.INT32, org.apache.avro.Schema.Type.INT);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.INT64, org.apache.avro.Schema.Type.LONG);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.FLOAT32, org.apache.avro.Schema.Type.FLOAT);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.FLOAT64, org.apache.avro.Schema.Type.DOUBLE);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.BOOLEAN, org.apache.avro.Schema.Type.BOOLEAN);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.STRING, org.apache.avro.Schema.Type.STRING);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.BYTES, org.apache.avro.Schema.Type.BYTES);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.ARRAY, org.apache.avro.Schema.Type.ARRAY);
        CONNECT_TYPES_TO_AVRO_TYPES.put(Schema.Type.MAP, org.apache.avro.Schema.Type.MAP);
    }


    private static final String ANYTHING_SCHEMA_BOOLEAN_FIELD = "boolean";
    private static final String ANYTHING_SCHEMA_BYTES_FIELD = "bytes";
    private static final String ANYTHING_SCHEMA_DOUBLE_FIELD = "double";
    private static final String ANYTHING_SCHEMA_FLOAT_FIELD = "float";
    private static final String ANYTHING_SCHEMA_INT_FIELD = "int";
    private static final String ANYTHING_SCHEMA_LONG_FIELD = "long";
    private static final String ANYTHING_SCHEMA_STRING_FIELD = "string";
    private static final String ANYTHING_SCHEMA_ARRAY_FIELD = "array";
    private static final String ANYTHING_SCHEMA_MAP_FIELD = "map";

    public static final org.apache.avro.Schema ANYTHING_SCHEMA_MAP_ELEMENT;
    public static final org.apache.avro.Schema ANYTHING_SCHEMA;

    private static final org.apache.avro.Schema
            NULL_AVRO_SCHEMA =
            org.apache.avro.Schema.create(org.apache.avro.Schema.Type.NULL);

    static {
        // Intuitively this should be a union schema. However, unions can't be named in Avro and this
        // is a self-referencing type, so we need to use a format in which we can name the entire schema

        ANYTHING_SCHEMA =
                org.apache.avro.SchemaBuilder.record(AVRO_TYPE_ANYTHING).namespace(NAMESPACE).fields()
                        .optionalBoolean(ANYTHING_SCHEMA_BOOLEAN_FIELD)
                        .optionalBytes(ANYTHING_SCHEMA_BYTES_FIELD)
                        .optionalDouble(ANYTHING_SCHEMA_DOUBLE_FIELD)
                        .optionalFloat(ANYTHING_SCHEMA_FLOAT_FIELD)
                        .optionalInt(ANYTHING_SCHEMA_INT_FIELD)
                        .optionalLong(ANYTHING_SCHEMA_LONG_FIELD)
                        .optionalString(ANYTHING_SCHEMA_STRING_FIELD)
                        .name(ANYTHING_SCHEMA_ARRAY_FIELD).type().optional().array()
                        .items().type(AVRO_TYPE_ANYTHING)
                        .name(ANYTHING_SCHEMA_MAP_FIELD).type().optional().array()
                        .items().record(MAP_ENTRY_TYPE_NAME).namespace(NAMESPACE).fields()
                        .name(KEY_FIELD).type(AVRO_TYPE_ANYTHING).noDefault()
                        .name(VALUE_FIELD).type(AVRO_TYPE_ANYTHING).noDefault()
                        .endRecord()
                        .endRecord();
        // This is convenient to have extracted; we can't define it before ANYTHING_SCHEMA because it
        // uses ANYTHING_SCHEMA in its definition.
        ANYTHING_SCHEMA_MAP_ELEMENT = ANYTHING_SCHEMA.getField("map").schema()
                .getTypes().get(1) // The "map" field is optional, get the schema from the union type
                .getElementType();
    }


    // Convert values in Connect form into their logical types. These logical converters are
    // discovered by logical type names specified in the field
    private static final HashMap TO_CONNECT_LOGICAL_CONVERTERS
            = new HashMap<>();

    static {
        TO_CONNECT_LOGICAL_CONVERTERS.put(Decimal.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (value instanceof byte[]) {
                    return Decimal.toLogical(schema, (byte[]) value);
                } else if (value instanceof ByteBuffer) {
                    return Decimal.toLogical(schema, ((ByteBuffer) value).array());
                }
                throw new DataException(
                        "Invalid type for Decimal, underlying representation should be bytes but was "
                                + value.getClass());
            }
        });

        TO_CONNECT_LOGICAL_CONVERTERS.put(Date.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof Integer)) {
                    throw new DataException(
                            "Invalid type for Date, underlying representation should be int32 but was "
                                    + value.getClass());
                }
                return Date.toLogical(schema, (int) value);
            }
        });

        TO_CONNECT_LOGICAL_CONVERTERS.put(Time.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof Integer)) {
                    throw new DataException(
                            "Invalid type for Time, underlying representation should be int32 but was "
                                    + value.getClass());
                }
                return Time.toLogical(schema, (int) value);
            }
        });

        TO_CONNECT_LOGICAL_CONVERTERS.put(Timestamp.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof Long)) {
                    throw new DataException(
                            "Invalid type for Timestamp, underlying representation should be int64 but was "
                                    + value.getClass());
                }
                return Timestamp.toLogical(schema, (long) value);
            }
        });
    }

    static final String AVRO_PROP = "avro";
    static final String AVRO_LOGICAL_TYPE_PROP = "logicalType";
    static final String AVRO_LOGICAL_TIMESTAMP_MILLIS = "timestamp-millis";
    static final String AVRO_LOGICAL_TIME_MILLIS = "time-millis";
    static final String AVRO_LOGICAL_DATE = "date";
    static final String AVRO_LOGICAL_DECIMAL = "decimal";
    static final String AVRO_LOGICAL_DECIMAL_SCALE_PROP = "scale";
    static final String AVRO_LOGICAL_DECIMAL_PRECISION_PROP = "precision";
    static final String CONNECT_AVRO_DECIMAL_PRECISION_PROP = "connect.decimal.precision";
    static final Integer CONNECT_AVRO_DECIMAL_PRECISION_DEFAULT = 64;

    private static final HashMap TO_AVRO_LOGICAL_CONVERTERS
            = new HashMap<>();

    static {
        TO_AVRO_LOGICAL_CONVERTERS.put(Decimal.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof BigDecimal)) {
                    throw new DataException(
                            "Invalid type for Decimal, expected BigDecimal but was " + value.getClass());
                }
                return Decimal.fromLogical(schema, (BigDecimal) value);
            }
        });

        TO_AVRO_LOGICAL_CONVERTERS.put(Date.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof java.util.Date)) {
                    throw new DataException(
                            "Invalid type for Date, expected Date but was " + value.getClass());
                }
                return Date.fromLogical(schema, (java.util.Date) value);
            }
        });

        TO_AVRO_LOGICAL_CONVERTERS.put(Time.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof java.util.Date)) {
                    throw new DataException(
                            "Invalid type for Time, expected Date but was " + value.getClass());
                }
                return Time.fromLogical(schema, (java.util.Date) value);
            }
        });

        TO_AVRO_LOGICAL_CONVERTERS.put(Timestamp.LOGICAL_NAME, new LogicalTypeConverter() {
            @Override
            public Object convert(Schema schema, Object value) {
                if (!(value instanceof java.util.Date)) {
                    throw new DataException(
                            "Invalid type for Timestamp, expected Date but was " + value.getClass());
                }
                return Timestamp.fromLogical(schema, (java.util.Date) value);
            }
        });
    }

    private Cache fromConnectSchemaCache;
    private Cache toConnectSchemaCache;
    private boolean connectMetaData;
    private boolean enhancedSchemaSupport;

    public AvroData(int cacheSize) {
        this(new AvroDataConfig.Builder()
                .with(AvroDataConfig.SCHEMAS_CACHE_SIZE_CONFIG, cacheSize)
                .build());
    }

    public AvroData(AvroDataConfig avroDataConfig) {
        fromConnectSchemaCache =
                new SynchronizedCache<>(new LRUCache<>(
                        avroDataConfig.getSchemasCacheSize()));
        toConnectSchemaCache =
                new SynchronizedCache<>(new LRUCache<>(
                        avroDataConfig.getSchemasCacheSize()));
        this.connectMetaData = avroDataConfig.isConnectMetaData();
        this.enhancedSchemaSupport = avroDataConfig.isEnhancedAvroSchemaSupport();
    }

    /**
     * Convert this object, in Connect data format, into an Avro object.
     */
    public Object fromConnectData(Schema schema, Object value) {
        org.apache.avro.Schema avroSchema = fromConnectSchema(schema);
        return fromConnectData(schema, avroSchema, value);
    }

    protected Object fromConnectData(Schema schema, org.apache.avro.Schema avroSchema, Object value) {
        return fromConnectData(schema, avroSchema, value, true, false, enhancedSchemaSupport);
    }

    /**
     * Convert from Connect data format to Avro. This version assumes the Avro schema has already
     * been converted and makes the use of NonRecordContainer optional
     *
     * @param schema                         the Connect schema
     * @param avroSchema                     the corresponding
     * @param logicalValue                   the Connect data to convert, which may be a value for
     *                                       a logical type
     * @param requireContainer               if true, wrap primitives, maps, and arrays in a
     *                                       NonRecordContainer before returning them
     * @param requireSchemalessContainerNull if true, use a container representation of null because
     *                                       this is part of struct/array/map and we cannot represent
     *                                       nulls as true null because Anything cannot be a union
     *                                       type; otherwise, this is a top-level value and can return
     *                                       null
     * @return the converted data
     */
    private static Object fromConnectData(
            Schema schema, org.apache.avro.Schema avroSchema,
            Object logicalValue, boolean requireContainer,
            boolean requireSchemalessContainerNull, boolean enhancedSchemaSupport
    ) {
        Schema.Type schemaType = schema != null
                ? schema.type()
                : schemaTypeForSchemalessJavaType(logicalValue);
        if (schemaType == null) {
            // Schemaless null data since schema is null and we got a null schema type from the value
            if (requireSchemalessContainerNull) {
                return new GenericRecordBuilder(ANYTHING_SCHEMA).build();
            } else {
                return null;
            }
        }

        validateSchemaValue(schema, logicalValue);

        if (logicalValue == null) {
            // But if this is schemaless, we may not be able to return null directly
            if (schema == null && requireSchemalessContainerNull) {
                return new GenericRecordBuilder(ANYTHING_SCHEMA).build();
            } else {
                return null;
            }
        }

        // If this is a logical type, convert it from the convenient Java type to the underlying
        // serializeable format
        Object value = logicalValue;
        if (schema != null && schema.name() != null) {
            LogicalTypeConverter logicalConverter = TO_AVRO_LOGICAL_CONVERTERS.get(schema.name());
            if (logicalConverter != null) {
                value = logicalConverter.convert(schema, logicalValue);
            }
        }

        try {
            switch (schemaType) {
                case INT8: {
                    Byte byteValue = (Byte) value; // Check for correct type
                    Integer convertedByteValue = byteValue == null ? null : byteValue.intValue();
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, convertedByteValue, ANYTHING_SCHEMA_INT_FIELD),
                            requireContainer);
                }
                case INT16: {
                    Short shortValue = (Short) value; // Check for correct type
                    Integer convertedShortValue = shortValue == null ? null : shortValue.intValue();
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, convertedShortValue, ANYTHING_SCHEMA_INT_FIELD),
                            requireContainer);
                }

                case INT32:
                    Integer intValue = (Integer) value; // Check for correct type
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_INT_FIELD),
                            requireContainer);
                case INT64:
                    Long longValue = (Long) value; // Check for correct type
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_LONG_FIELD),
                            requireContainer);
                case FLOAT32:
                    Float floatValue = (Float) value; // Check for correct type
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_FLOAT_FIELD),
                            requireContainer);
                case FLOAT64:
                    Double doubleValue = (Double) value; // Check for correct type
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_DOUBLE_FIELD),
                            requireContainer);
                case BOOLEAN:
                    Boolean boolValue = (Boolean) value; // Check for correct type
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_BOOLEAN_FIELD),
                            requireContainer);
                case STRING:
                    if (enhancedSchemaSupport && schema != null && schema.parameters() != null
                            && schema.parameters().containsKey(AVRO_TYPE_ENUM)) {
                        String enumSchemaName = schema.parameters().get(AVRO_TYPE_ENUM);
                        org.apache.avro.Schema enumSchema;
                        if (avroSchema.getType() == org.apache.avro.Schema.Type.UNION) {
                            int enumIndex = avroSchema.getIndexNamed(enumSchemaName);
                            enumSchema = avroSchema.getTypes().get(enumIndex);
                        } else {
                            enumSchema = avroSchema;
                        }
                        value = new GenericData.EnumSymbol(enumSchema, (String) value);
                    } else {
                        String stringValue = (String) value; // Check for correct type
                    }
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, value, ANYTHING_SCHEMA_STRING_FIELD),
                            requireContainer);

                case BYTES: {
                    ByteBuffer bytesValue = value instanceof byte[] ? ByteBuffer.wrap((byte[]) value) :
                            (ByteBuffer) value;
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, bytesValue, ANYTHING_SCHEMA_BYTES_FIELD),
                            requireContainer);
                }

                case ARRAY: {
                    Collection list = (Collection) value;
                    List converted = new ArrayList<>(list.size());
                    Schema elementSchema = schema != null ? schema.valueSchema() : null;
                    org.apache.avro.Schema underlyingAvroSchema = avroSchemaForUnderlyingTypeIfOptional(
                            schema, avroSchema);
                    org.apache.avro.Schema elementAvroSchema =
                            schema != null ? underlyingAvroSchema.getElementType() : ANYTHING_SCHEMA;
                    for (Object val : list) {
                        converted.add(
                                fromConnectData(
                                        elementSchema,
                                        elementAvroSchema,
                                        val,
                                        false,
                                        true,
                                        enhancedSchemaSupport
                                )
                        );
                    }
                    return maybeAddContainer(
                            avroSchema,
                            maybeWrapSchemaless(schema, converted, ANYTHING_SCHEMA_ARRAY_FIELD),
                            requireContainer);
                }

                case MAP: {
                    Map map = (Map) value;
                    org.apache.avro.Schema underlyingAvroSchema;
                    if (schema != null && schema.keySchema().type() == Schema.Type.STRING
                            && !schema.keySchema().isOptional()) {

                        underlyingAvroSchema = avroSchemaForUnderlyingTypeIfOptional(schema, avroSchema);
                        Map converted = new HashMap<>();
                        for (Map.Entry entry : map.entrySet()) {
                            // Key is a String, no conversion needed
                            Object convertedValue = fromConnectData(schema.valueSchema(),
                                    underlyingAvroSchema.getValueType(),
                                    entry.getValue(), false, true, enhancedSchemaSupport
                            );
                            converted.put((String) entry.getKey(), convertedValue);
                        }
                        return maybeAddContainer(avroSchema, converted, requireContainer);
                    } else {
                        List converted = new ArrayList<>(map.size());
                        underlyingAvroSchema = avroSchemaForUnderlyingMapEntryType(schema, avroSchema);
                        org.apache.avro.Schema elementSchema =
                                schema != null
                                        ? underlyingAvroSchema.getElementType()
                                        : ANYTHING_SCHEMA_MAP_ELEMENT;
                        org.apache.avro.Schema avroKeySchema = elementSchema.getField(KEY_FIELD).schema();
                        org.apache.avro.Schema avroValueSchema = elementSchema.getField(VALUE_FIELD).schema();
                        for (Map.Entry entry : map.entrySet()) {
                            Object keyConverted = fromConnectData(schema != null ? schema.keySchema() : null,
                                    avroKeySchema, entry.getKey(), false, true,
                                    enhancedSchemaSupport);
                            Object valueConverted = fromConnectData(schema != null ? schema.valueSchema() : null,
                                    avroValueSchema, entry.getValue(), false,
                                    true, enhancedSchemaSupport);
                            converted.add(
                                    new GenericRecordBuilder(elementSchema)
                                            .set(KEY_FIELD, keyConverted)
                                            .set(VALUE_FIELD, valueConverted)
                                            .build()
                            );
                        }
                        return maybeAddContainer(
                                avroSchema, maybeWrapSchemaless(schema, converted, ANYTHING_SCHEMA_MAP_FIELD),
                                requireContainer);
                    }
                }

                case STRUCT: {
                    Struct struct = (Struct) value;
                    if (!struct.schema().equals(schema)) {
                        throw new DataException("Mismatching struct schema");
                    }
                    //This handles the inverting of a union which is held as a struct, where each field is
                    // one of the union types.
                    if (AVRO_TYPE_UNION.equals(schema.name())) {
                        for (Field field : schema.fields()) {
                            Object object = struct.get(field);
                            if (object != null) {
                                return fromConnectData(
                                        field.schema(),
                                        avroSchema,
                                        object,
                                        false,
                                        true,
                                        enhancedSchemaSupport
                                );
                            }
                        }
                        return fromConnectData(schema, avroSchema, null, false, true, enhancedSchemaSupport);
                    } else {
                        org.apache.avro.Schema underlyingAvroSchema = avroSchemaForUnderlyingTypeIfOptional(
                                schema, avroSchema);
                        GenericRecordBuilder convertedBuilder = new GenericRecordBuilder(underlyingAvroSchema);
                        for (Field field : schema.fields()) {
                            org.apache.avro.Schema.Field theField = underlyingAvroSchema.getField(field.name());
                            org.apache.avro.Schema fieldAvroSchema = theField.schema();
                            convertedBuilder.set(
                                    field.name(),
                                    fromConnectData(field.schema(), fieldAvroSchema, struct.get(field), false,
                                            true, enhancedSchemaSupport)
                            );
                        }
                        return convertedBuilder.build();
                    }
                }

                default:
                    throw new DataException("Unknown schema type: " + schema.type());
            }
        } catch (ClassCastException e) {
            throw new DataException("Invalid type for " + schema.type() + ": " + value.getClass());
        }
    }

    /**
     * MapEntry types in connect Schemas are represented as Arrays of record.
     * Return the array type from the union instead of the union itself.
     */
    private static org.apache.avro.Schema avroSchemaForUnderlyingMapEntryType(
            Schema schema,
            org.apache.avro.Schema avroSchema) {

        if (schema != null && schema.isOptional()) {
            if (avroSchema.getType() == org.apache.avro.Schema.Type.UNION) {
                for (org.apache.avro.Schema typeSchema : avroSchema.getTypes()) {
                    if (!typeSchema.getType().equals(org.apache.avro.Schema.Type.NULL)
                            && Schema.Type.ARRAY.getName().equals(typeSchema.getType().getName())) {
                        return typeSchema;
                    }
                }
            } else {
                throw new DataException(
                        "An optional schema should have an Avro Union type, not "
                                + schema.type());
            }
        }
        return avroSchema;
    }

    private static boolean crossReferenceSchemaNames(final Schema schema,
                                                     final org.apache.avro.Schema avroSchema) {
        return Objects.equals(avroSchema.getFullName(), schema.name())
                || Objects.equals(avroSchema.getType().getName(), schema.type().getName())
                || (schema.name() == null && avroSchema.getFullName().equals(DEFAULT_SCHEMA_FULL_NAME));
    }

    /**
     * Connect optional fields are represented as a unions (null & type) in Avro
     * Return the Avro schema of the actual type in the Union (instead of the union itself)
     */
    private static org.apache.avro.Schema avroSchemaForUnderlyingTypeIfOptional(
            Schema schema, org.apache.avro.Schema avroSchema) {

        if (schema != null && schema.isOptional()) {
            if (avroSchema.getType() == org.apache.avro.Schema.Type.UNION) {
                for (org.apache.avro.Schema typeSchema : avroSchema
                        .getTypes()) {
                    if (!typeSchema.getType().equals(org.apache.avro.Schema.Type.NULL)
                            && crossReferenceSchemaNames(schema, typeSchema)) {
                        return typeSchema;
                    }
                }
            } else {
                throw new DataException(
                        "An optinal schema should have an Avro Union type, not "
                                + schema.type());
            }
        }
        return avroSchema;
    }

    private static Schema.Type schemaTypeForSchemalessJavaType(Object value) {
        if (value == null) {
            return null;
        } else if (value instanceof Byte) {
            return Schema.Type.INT8;
        } else if (value instanceof Short) {
            return Schema.Type.INT16;
        } else if (value instanceof Integer) {
            return Schema.Type.INT32;
        } else if (value instanceof Long) {
            return Schema.Type.INT64;
        } else if (value instanceof Float) {
            return Schema.Type.FLOAT32;
        } else if (value instanceof Double) {
            return Schema.Type.FLOAT64;
        } else if (value instanceof Boolean) {
            return Schema.Type.BOOLEAN;
        } else if (value instanceof String) {
            return Schema.Type.STRING;
        } else if (value instanceof Collection) {
            return Schema.Type.ARRAY;
        } else if (value instanceof Map) {
            return Schema.Type.MAP;
        } else {
            throw new DataException("Unknown Java type for schemaless data: " + value.getClass());
        }
    }

    private static Object maybeAddContainer(org.apache.avro.Schema avroSchema, Object value,
                                            boolean wrap) {
        return wrap ? new NonRecordContainer(avroSchema, value) : value;
    }

    private static Object maybeWrapSchemaless(Schema schema, Object value, String typeField) {
        if (schema != null) {
            return value;
        }

        GenericRecordBuilder builder = new GenericRecordBuilder(ANYTHING_SCHEMA);
        if (value != null) {
            builder.set(typeField, value);
        }
        return builder.build();
    }

    public org.apache.avro.Schema fromConnectSchema(Schema schema) {
        return fromConnectSchema(schema, new HashMap());
    }

    public org.apache.avro.Schema fromConnectSchema(Schema schema,
                                                    Map schemaMap) {
        FromConnectContext fromConnectContext = new FromConnectContext(schemaMap);
        return fromConnectSchema(schema, fromConnectContext, false);
    }

    /**
     * SchemaMap is a map of already resolved internal schemas, this avoids type re-declaration if a
     * type is reused, this actually blows up if you don't do this and have a type used in multiple
     * places.
     *
     * 

Also it only holds reference the non-optional schemas as technically an optional is * actually a union of null and the non-opitonal, which if used in multiple places some optional * some non-optional will cause error as you redefine type. * *

This is different to the global schema cache which is used to hold/cache fully resolved * schemas used to avoid re-resolving when presented with the same source schema. */ public org.apache.avro.Schema fromConnectSchema(Schema schema, FromConnectContext fromConnectContext, boolean ignoreOptional) { if (schema == null) { return ANYTHING_SCHEMA; } org.apache.avro.Schema cached = fromConnectSchemaCache.get(schema); if (cached == null && !AVRO_TYPE_UNION.equals(schema.name()) && !schema.isOptional()) { cached = fromConnectContext.schemaMap.get(schema); } if (cached != null) { return cached; } String namespace = NAMESPACE; String name = DEFAULT_SCHEMA_NAME; if (schema.name() != null) { String[] split = splitName(schema.name()); namespace = split[0]; name = split[1]; } // Extra type annotation information for otherwise lossy conversions String connectType = null; final org.apache.avro.Schema baseSchema; switch (schema.type()) { case INT8: connectType = CONNECT_TYPE_INT8; baseSchema = org.apache.avro.SchemaBuilder.builder().intType(); break; case INT16: connectType = CONNECT_TYPE_INT16; baseSchema = org.apache.avro.SchemaBuilder.builder().intType(); break; case INT32: baseSchema = org.apache.avro.SchemaBuilder.builder().intType(); break; case INT64: baseSchema = org.apache.avro.SchemaBuilder.builder().longType(); break; case FLOAT32: baseSchema = org.apache.avro.SchemaBuilder.builder().floatType(); break; case FLOAT64: baseSchema = org.apache.avro.SchemaBuilder.builder().doubleType(); break; case BOOLEAN: baseSchema = org.apache.avro.SchemaBuilder.builder().booleanType(); break; case STRING: if (enhancedSchemaSupport && schema.parameters() != null && schema.parameters().containsKey(AVRO_TYPE_ENUM)) { List symbols = new ArrayList<>(); for (Map.Entry entry : schema.parameters().entrySet()) { if (entry.getKey().startsWith(AVRO_TYPE_ENUM + ".")) { symbols.add(entry.getValue()); } } String enumDoc = schema.parameters().get(AVRO_ENUM_DOC_PREFIX_PROP + name); String enumDefault = schema.parameters().get(AVRO_ENUM_DEFAULT_PREFIX_PROP + name); baseSchema = org.apache.avro.SchemaBuilder.builder().enumeration( schema.parameters().get(AVRO_TYPE_ENUM)) .doc(enumDoc) .defaultSymbol(enumDefault) .symbols(symbols.toArray(new String[symbols.size()])); } else { baseSchema = org.apache.avro.SchemaBuilder.builder().stringType(); } break; case BYTES: baseSchema = org.apache.avro.SchemaBuilder.builder().bytesType(); if (Decimal.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { int scale = Integer.parseInt(schema.parameters().get(Decimal.SCALE_FIELD)); baseSchema.addProp(AVRO_LOGICAL_DECIMAL_SCALE_PROP, new IntNode(scale)); if (schema.parameters().containsKey(CONNECT_AVRO_DECIMAL_PRECISION_PROP)) { String precisionValue = schema.parameters().get(CONNECT_AVRO_DECIMAL_PRECISION_PROP); int precision = Integer.parseInt(precisionValue); baseSchema.addProp(AVRO_LOGICAL_DECIMAL_PRECISION_PROP, new IntNode(precision)); } else { baseSchema .addProp(AVRO_LOGICAL_DECIMAL_PRECISION_PROP, new IntNode(CONNECT_AVRO_DECIMAL_PRECISION_DEFAULT)); } } break; case ARRAY: baseSchema = org.apache.avro.SchemaBuilder.builder().array() .items(fromConnectSchemaWithCycle(schema.valueSchema(), fromConnectContext, false)); break; case MAP: // Avro only supports string keys, so we match the representation when possible, but // otherwise fall back on a record representation if (schema.keySchema().type() == Schema.Type.STRING && !schema.keySchema().isOptional()) { baseSchema = org.apache.avro.SchemaBuilder.builder().map().values( fromConnectSchemaWithCycle(schema.valueSchema(), fromConnectContext, false)); } else { // Special record name indicates format List fields = new ArrayList<>(); final org.apache.avro.Schema mapSchema; if (schema.name() == null) { mapSchema = org.apache.avro.Schema.createRecord( MAP_ENTRY_TYPE_NAME, null, namespace, false ); } else { mapSchema = org.apache.avro.Schema.createRecord(name, null, namespace, false); mapSchema.addProp(CONNECT_INTERNAL_TYPE_NAME, MAP_ENTRY_TYPE_NAME); } addAvroRecordField( fields, KEY_FIELD, schema.keySchema(), null, fromConnectContext); addAvroRecordField( fields, VALUE_FIELD, schema.valueSchema(), null, fromConnectContext); mapSchema.setFields(fields); baseSchema = org.apache.avro.Schema.createArray(mapSchema); } break; case STRUCT: if (AVRO_TYPE_UNION.equals(schema.name())) { List unionSchemas = new ArrayList<>(); if (schema.isOptional()) { unionSchemas.add(org.apache.avro.SchemaBuilder.builder().nullType()); } for (Field field : schema.fields()) { unionSchemas.add( fromConnectSchemaWithCycle(nonOptional(field.schema()), fromConnectContext, true)); } baseSchema = org.apache.avro.Schema.createUnion(unionSchemas); } else if (schema.isOptional()) { List unionSchemas = new ArrayList<>(); unionSchemas.add(org.apache.avro.SchemaBuilder.builder().nullType()); unionSchemas.add( fromConnectSchemaWithCycle(nonOptional(schema), fromConnectContext, false)); baseSchema = org.apache.avro.Schema.createUnion(unionSchemas); } else { String doc = schema.parameters() != null ? schema.parameters().get(AVRO_RECORD_DOC_PROP) : null; baseSchema = org.apache.avro.Schema.createRecord( name != null ? name : DEFAULT_SCHEMA_NAME, doc, namespace, false); if (schema.name() != null) { fromConnectContext.cycleReferences.put(schema.name(), baseSchema); } List fields = new ArrayList<>(); for (Field field : schema.fields()) { String fieldDoc = schema.parameters() != null ? schema.parameters() .get(AVRO_FIELD_DOC_PREFIX_PROP + field.name()) : null; addAvroRecordField(fields, field.name(), field.schema(), fieldDoc, fromConnectContext); } baseSchema.setFields(fields); } break; default: throw new DataException("Unknown schema type: " + schema.type()); } org.apache.avro.Schema finalSchema = baseSchema; if (!baseSchema.getType().equals(org.apache.avro.Schema.Type.UNION)) { if (connectMetaData) { if (schema.doc() != null) { baseSchema.addProp(CONNECT_DOC_PROP, schema.doc()); } if (schema.version() != null) { baseSchema.addProp(CONNECT_VERSION_PROP, JsonNodeFactory.instance.numberNode(schema.version())); } if (schema.parameters() != null) { JsonNode params = parametersFromConnect(schema.parameters()); if (!params.isEmpty()) { baseSchema.addProp(CONNECT_PARAMETERS_PROP, params); } } if (schema.defaultValue() != null) { if (schema.parameters() == null || !schema.parameters().containsKey(AVRO_FIELD_DEFAULT_FLAG_PROP)) { baseSchema.addProp(CONNECT_DEFAULT_VALUE_PROP, defaultValueFromConnect(schema, schema.defaultValue())); } } if (schema.name() != null) { baseSchema.addProp(CONNECT_NAME_PROP, schema.name()); } // Some Connect types need special annotations to preserve the types accurate due to // limitations in Avro. These types get an extra annotation with their Connect type if (connectType != null) { baseSchema.addProp(CONNECT_TYPE_PROP, connectType); } } boolean forceLegacyDecimal = false; // the new and correct way to handle logical types if (schema.name() != null) { if (Decimal.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { String precisionString = schema.parameters().get(CONNECT_AVRO_DECIMAL_PRECISION_PROP); String scaleString = schema.parameters().get(Decimal.SCALE_FIELD); int precision = precisionString == null ? CONNECT_AVRO_DECIMAL_PRECISION_DEFAULT : Integer.parseInt(precisionString); int scale = scaleString == null ? 0 : Integer.parseInt(scaleString); if (scale < 0 || scale > precision) { log.trace( "Scale and precision of {} and {} cannot be serialized as native Avro logical " + "decimal type; reverting to legacy serialization method", scale, precision ); // We cannot use the Avro Java library's support for the decimal logical type when the // scale is either negative or greater than the precision as this violates the Avro spec // and causes the Avro library to throw an exception, so we fall back in this case to // using the legacy method for encoding decimal logical type information. // Can't add a key/value pair with the CONNECT_AVRO_DECIMAL_PRECISION_PROP key to the // schema's parameters since the parameters for Connect schemas are immutable, so we // just track this in a local boolean variable instead. forceLegacyDecimal = true; } else { org.apache.avro.LogicalTypes.decimal(precision, scale).addToSchema(baseSchema); } } else if (Time.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { org.apache.avro.LogicalTypes.timeMillis().addToSchema(baseSchema); } else if (Timestamp.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { org.apache.avro.LogicalTypes.timestampMillis().addToSchema(baseSchema); } else if (Date.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { org.apache.avro.LogicalTypes.date().addToSchema(baseSchema); } } // Initially, to add support for logical types a new property was added // with key `logicalType`. This enabled logical types for avro schemas but not others, // such as parquet. The use of 'addToSchema` above supersedes this method here, // which should eventually be removed. // Keeping for backwards compatibility until a major version upgrade happens. // Below follows the older method of supporting logical types via properties. // It is retained for now and will be deprecated eventually. // Only Avro named types (record, enum, fixed) may contain namespace + name. Only Connect's // struct converts to one of those (record), so for everything else that has a name we store // the full name into a special property. For uniformity, we also duplicate this info into // the same field in records as well even though it will also be available in the namespace() // and name(). if (schema.name() != null) { if (Decimal.LOGICAL_NAME.equalsIgnoreCase(schema.name()) && (schema.parameters().containsKey(CONNECT_AVRO_DECIMAL_PRECISION_PROP) || forceLegacyDecimal)) { baseSchema.addProp(AVRO_LOGICAL_TYPE_PROP, AVRO_LOGICAL_DECIMAL); } else if (Time.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { baseSchema.addProp(AVRO_LOGICAL_TYPE_PROP, AVRO_LOGICAL_TIME_MILLIS); } else if (Timestamp.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { baseSchema.addProp(AVRO_LOGICAL_TYPE_PROP, AVRO_LOGICAL_TIMESTAMP_MILLIS); } else if (Date.LOGICAL_NAME.equalsIgnoreCase(schema.name())) { baseSchema.addProp(AVRO_LOGICAL_TYPE_PROP, AVRO_LOGICAL_DATE); } } if (schema.parameters() != null) { for (Map.Entry entry : schema.parameters().entrySet()) { if (entry.getKey().startsWith(AVRO_PROP)) { baseSchema.addProp(entry.getKey(), entry.getValue()); } } } // Note that all metadata has already been processed and placed on the baseSchema because we // can't store any metadata on the actual top-level schema when it's a union because of Avro // constraints on the format of schemas. if (!ignoreOptional) { if (schema.isOptional()) { if (schema.defaultValue() != null) { finalSchema = org.apache.avro.SchemaBuilder.builder().unionOf() .type(baseSchema).and() .nullType() .endUnion(); } else { finalSchema = org.apache.avro.SchemaBuilder.builder().unionOf() .nullType().and() .type(baseSchema) .endUnion(); } } } } if (!schema.isOptional()) { fromConnectContext.schemaMap.put(schema, finalSchema); } fromConnectSchemaCache.put(schema, finalSchema); return finalSchema; } public org.apache.avro.Schema fromConnectSchemaWithCycle( Schema schema, FromConnectContext fromConnectContext, boolean ignoreOptional) { org.apache.avro.Schema resolvedSchema; if (fromConnectContext.cycleReferences.containsKey(schema.name())) { resolvedSchema = fromConnectContext.cycleReferences.get(schema.name()); } else { resolvedSchema = fromConnectSchema(schema, fromConnectContext, ignoreOptional); } return resolvedSchema; } private void addAvroRecordField( List fields, String fieldName, Schema fieldSchema, String fieldDoc, FromConnectContext fromConnectContext) { Object defaultVal = null; if (fieldSchema.defaultValue() != null) { defaultVal = JacksonUtils.toObject( defaultValueFromConnect(fieldSchema, fieldSchema.defaultValue())); } else if (fieldSchema.isOptional()) { defaultVal = JsonProperties.NULL_VALUE; } org.apache.avro.Schema.Field field = new org.apache.avro.Schema.Field( fieldName, fromConnectSchema(fieldSchema, fromConnectContext, false), fieldDoc, defaultVal); fields.add(field); } private static Object toAvroLogical(Schema schema, Object value) { if (schema != null && schema.name() != null) { LogicalTypeConverter logicalConverter = TO_AVRO_LOGICAL_CONVERTERS.get(schema.name()); if (logicalConverter != null && value != null) { return logicalConverter.convert(schema, value); } } return value; } private static Object toConnectLogical(Schema schema, Object value) { if (schema != null && schema.name() != null) { LogicalTypeConverter logicalConverter = TO_CONNECT_LOGICAL_CONVERTERS.get(schema.name()); if (logicalConverter != null && value != null) { return logicalConverter.convert(schema, value); } } return value; } // Convert default values from Connect data format to Avro's format, which is an // org.codehaus.jackson.JsonNode. The default value is provided as an argument because even // though you can get a default value from the schema, default values for complex structures need // to perform the same translation but those defaults will be part of the original top-level // (complex type) default value, not part of the child schema. private static JsonNode defaultValueFromConnect(Schema schema, Object value) { try { // If this is a logical type, convert it from the convenient Java type to the underlying // serializable format Object defaultVal = toAvroLogical(schema, value); switch (schema.type()) { case INT8: return JsonNodeFactory.instance.numberNode(((Byte) defaultVal).intValue()); case INT16: return JsonNodeFactory.instance.numberNode(((Short) defaultVal).intValue()); case INT32: return JsonNodeFactory.instance.numberNode((Integer) defaultVal); case INT64: return JsonNodeFactory.instance.numberNode((Long) defaultVal); case FLOAT32: return JsonNodeFactory.instance.numberNode((Float) defaultVal); case FLOAT64: return JsonNodeFactory.instance.numberNode((Double) defaultVal); case BOOLEAN: return JsonNodeFactory.instance.booleanNode((Boolean) defaultVal); case STRING: return JsonNodeFactory.instance.textNode((String) defaultVal); case BYTES: if (defaultVal instanceof byte[]) { return JsonNodeFactory.instance.textNode(new String((byte[]) defaultVal, StandardCharsets.ISO_8859_1)); } else { return JsonNodeFactory.instance.textNode(new String(((ByteBuffer) defaultVal).array(), StandardCharsets.ISO_8859_1)); } case ARRAY: { ArrayNode array = JsonNodeFactory.instance.arrayNode(); for (Object elem : (Collection) defaultVal) { array.add(defaultValueFromConnect(schema.valueSchema(), elem)); } return array; } case MAP: if (schema.keySchema().type() == Schema.Type.STRING && !schema.keySchema().isOptional()) { ObjectNode node = JsonNodeFactory.instance.objectNode(); for (Map.Entry entry : ((Map) defaultVal).entrySet()) { JsonNode entryDef = defaultValueFromConnect(schema.valueSchema(), entry.getValue()); node.put(entry.getKey(), entryDef); } return node; } else { ArrayNode array = JsonNodeFactory.instance.arrayNode(); for (Map.Entry entry : ((Map) defaultVal).entrySet()) { JsonNode keyDefault = defaultValueFromConnect(schema.keySchema(), entry.getKey()); JsonNode valDefault = defaultValueFromConnect(schema.valueSchema(), entry.getValue()); ArrayNode jsonEntry = JsonNodeFactory.instance.arrayNode(); jsonEntry.add(keyDefault); jsonEntry.add(valDefault); array.add(jsonEntry); } return array; } case STRUCT: { ObjectNode node = JsonNodeFactory.instance.objectNode(); Struct struct = ((Struct) defaultVal); for (Field field : (schema.fields())) { JsonNode fieldDef = defaultValueFromConnect(field.schema(), struct.get(field)); node.put(field.name(), fieldDef); } return node; } default: throw new DataException("Unknown schema type:" + schema.type()); } } catch (ClassCastException e) { throw new DataException("Invalid type used for default value of " + schema.type() + " field: " + schema.defaultValue().getClass()); } } private static JsonNode parametersFromConnect(Map params) { ObjectNode result = JsonNodeFactory.instance.objectNode(); for (Map.Entry entry : params.entrySet()) { if (!entry.getKey().equals(AVRO_FIELD_DEFAULT_FLAG_PROP)) { result.put(entry.getKey(), entry.getValue()); } } return result; } private static void validateSchemaValue(Schema schema, Object value) throws DataException { if (value == null && schema != null && !schema.isOptional()) { throw new DataException("Found null value for non-optional schema"); } } private boolean isMapEntry(final org.apache.avro.Schema elemSchema) { if (!elemSchema.getType().equals(org.apache.avro.Schema.Type.RECORD)) { return false; } if (NAMESPACE.equals(elemSchema.getNamespace()) && MAP_ENTRY_TYPE_NAME.equals(elemSchema.getName())) { return true; } if (Objects.equals(elemSchema.getProp(CONNECT_INTERNAL_TYPE_NAME), MAP_ENTRY_TYPE_NAME)) { return true; } return false; } /** * Convert the given object, in Avro format, into a Connect data object. * @param avroSchema the Avro schema * @param value the value to convert into a Connect data object * @return the Connect schema and value */ public SchemaAndValue toConnectData(org.apache.avro.Schema avroSchema, Object value) { return toConnectData(avroSchema, value, null); } /** * Convert the given object, in Avro format, into a Connect data object. * @param avroSchema the Avro schema * @param value the value to convert into a Connect data object * @param version the version to set on the Connect schema if the avroSchema does not have a * property named "connect.version", may be null * @return the Connect schema and value */ public SchemaAndValue toConnectData(org.apache.avro.Schema avroSchema, Object value, Integer version) { if (value == null) { return null; } ToConnectContext toConnectContext = new ToConnectContext(); Schema schema = (avroSchema.equals(ANYTHING_SCHEMA)) ? null : toConnectSchema(avroSchema, version, toConnectContext); return new SchemaAndValue(schema, toConnectData(schema, value, toConnectContext)); } private Object toConnectData(Schema schema, Object value, ToConnectContext toConnectContext) { return toConnectData(schema, value, toConnectContext, true); } private Object toConnectData(Schema schema, Object value, ToConnectContext toConnectContext, boolean doLogicalConversion) { validateSchemaValue(schema, value); if (value == null) { return null; } try { // If we're decoding schemaless data, we need to unwrap it into just the single value if (schema == null) { if (!(value instanceof IndexedRecord)) { throw new DataException("Invalid Avro data for schemaless Connect data"); } IndexedRecord recordValue = (IndexedRecord) value; Object boolVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_BOOLEAN_FIELD).pos()); if (boolVal != null) { return toConnectData(Schema.BOOLEAN_SCHEMA, boolVal, toConnectContext); } Object bytesVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_BYTES_FIELD).pos()); if (bytesVal != null) { return toConnectData(Schema.BYTES_SCHEMA, bytesVal, toConnectContext); } Object dblVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_DOUBLE_FIELD).pos()); if (dblVal != null) { return toConnectData(Schema.FLOAT64_SCHEMA, dblVal, toConnectContext); } Object fltVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_FLOAT_FIELD).pos()); if (fltVal != null) { return toConnectData(Schema.FLOAT32_SCHEMA, fltVal, toConnectContext); } Object intVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_INT_FIELD).pos()); if (intVal != null) { return toConnectData(Schema.INT32_SCHEMA, intVal, toConnectContext); } Object longVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_LONG_FIELD).pos()); if (longVal != null) { return toConnectData(Schema.INT64_SCHEMA, longVal, toConnectContext); } Object stringVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_STRING_FIELD).pos()); if (stringVal != null) { return toConnectData(Schema.STRING_SCHEMA, stringVal, toConnectContext); } Object arrayVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_ARRAY_FIELD).pos()); if (arrayVal != null) { // We cannot reuse the logic like we do in other cases because it is not possible to // construct an array schema with a null item schema, but the items have no schema. if (!(arrayVal instanceof Collection)) { throw new DataException( "Expected a Collection for schemaless array field but found a " + arrayVal.getClass().getName() ); } Collection original = (Collection) arrayVal; List result = new ArrayList<>(original.size()); for (Object elem : original) { result.add(toConnectData((Schema) null, elem, toConnectContext)); } return result; } Object mapVal = recordValue.get(ANYTHING_SCHEMA.getField(ANYTHING_SCHEMA_MAP_FIELD).pos()); if (mapVal != null) { // We cannot reuse the logic like we do in other cases because it is not possible to // construct a map schema with a null item schema, but the items have no schema. if (!(mapVal instanceof Collection)) { throw new DataException( "Expected a List for schemaless map field but found a " + mapVal.getClass().getName() ); } Collection original = (Collection) mapVal; Map result = new HashMap<>(original.size()); for (IndexedRecord entry : original) { int avroKeyFieldIndex = entry.getSchema().getField(KEY_FIELD).pos(); int avroValueFieldIndex = entry.getSchema().getField(VALUE_FIELD).pos(); Object convertedKey = toConnectData( null, entry.get(avroKeyFieldIndex), toConnectContext); Object convertedValue = toConnectData( null, entry.get(avroValueFieldIndex), toConnectContext); result.put(convertedKey, convertedValue); } return result; } // If nothing was set, it's null return null; } Object converted = null; switch (schema.type()) { // Pass through types case INT32: { Integer intValue = (Integer) value; // Validate type converted = value; break; } case INT64: { Long longValue = (Long) value; // Validate type converted = value; break; } case FLOAT32: { Float floatValue = (Float) value; // Validate type converted = value; break; } case FLOAT64: { Double doubleValue = (Double) value; // Validate type converted = value; break; } case BOOLEAN: { Boolean boolValue = (Boolean) value; // Validate type converted = value; break; } case INT8: // Encoded as an Integer converted = ((Integer) value).byteValue(); break; case INT16: // Encoded as an Integer converted = ((Integer) value).shortValue(); break; case STRING: if (value instanceof String) { converted = value; } else if (value instanceof CharSequence || value instanceof GenericEnumSymbol || value instanceof Enum) { converted = value.toString(); } else { throw new DataException("Invalid class for string type, expecting String or " + "CharSequence but found " + value.getClass()); } break; case BYTES: if (value instanceof byte[]) { converted = ByteBuffer.wrap((byte[]) value); } else if (value instanceof ByteBuffer) { converted = value; } else if (value instanceof GenericFixed) { converted = ByteBuffer.wrap(((GenericFixed) value).bytes()); } else { throw new DataException("Invalid class for bytes type, expecting byte[] or ByteBuffer " + "but found " + value.getClass()); } break; case ARRAY: { Schema valueSchema = schema.valueSchema(); Collection original = (Collection) value; List result = new ArrayList<>(original.size()); for (Object elem : original) { result.add(toConnectData(valueSchema, elem, toConnectContext)); } converted = result; break; } case MAP: { Schema keySchema = schema.keySchema(); Schema valueSchema = schema.valueSchema(); if (keySchema != null && keySchema.type() == Schema.Type.STRING && !keySchema .isOptional()) { // Non-optional string keys Map original = (Map) value; Map result = new HashMap<>(original.size()); for (Map.Entry entry : original.entrySet()) { result.put(entry.getKey().toString(), toConnectData(valueSchema, entry.getValue(), toConnectContext)); } converted = result; } else { // Arbitrary keys Collection original = (Collection) value; Map result = new HashMap<>(original.size()); for (IndexedRecord entry : original) { int avroKeyFieldIndex = entry.getSchema().getField(KEY_FIELD).pos(); int avroValueFieldIndex = entry.getSchema().getField(VALUE_FIELD).pos(); Object convertedKey = toConnectData( keySchema, entry.get(avroKeyFieldIndex), toConnectContext); Object convertedValue = toConnectData( valueSchema, entry.get(avroValueFieldIndex), toConnectContext); result.put(convertedKey, convertedValue); } converted = result; } break; } case STRUCT: { // Special case support for union types if (schema.name() != null && schema.name().equals(AVRO_TYPE_UNION)) { Schema valueRecordSchema = null; if (value instanceof IndexedRecord) { IndexedRecord valueRecord = ((IndexedRecord) value); valueRecordSchema = toConnectSchemaWithCycles( valueRecord.getSchema(), true, null, null, toConnectContext); } for (Field field : schema.fields()) { Schema fieldSchema = field.schema(); if (isInstanceOfAvroSchemaTypeForSimpleSchema(fieldSchema, value) || (valueRecordSchema != null && schemaEquals(valueRecordSchema, fieldSchema))) { converted = new Struct(schema).put( unionMemberFieldName(fieldSchema), toConnectData(fieldSchema, value, toConnectContext)); break; } } if (converted == null) { throw new DataException( "Did not find matching union field for data: " + value.toString()); } } else if (value instanceof Map) { // Default values from Avro are returned as Map Map original = (Map) value; Struct result = new Struct(schema); for (Field field : schema.fields()) { Object convertedFieldValue = toConnectData(field.schema(), original.get(field.name()), toConnectContext); result.put(field, convertedFieldValue); } return result; } else { IndexedRecord original = (IndexedRecord) value; Struct result = new Struct(schema); for (Field field : schema.fields()) { int avroFieldIndex = original.getSchema().getField(field.name()).pos(); Object convertedFieldValue = toConnectData(field.schema(), original.get(avroFieldIndex), toConnectContext); result.put(field, convertedFieldValue); } converted = result; } break; } default: throw new DataException("Unknown Connect schema type: " + schema.type()); } if (schema.name() != null && doLogicalConversion) { LogicalTypeConverter logicalConverter = TO_CONNECT_LOGICAL_CONVERTERS.get(schema.name()); if (logicalConverter != null) { converted = logicalConverter.convert(schema, converted); } } return converted; } catch (ClassCastException e) { String schemaType = schema != null ? schema.type().toString() : "null"; throw new DataException("Invalid type for " + schemaType + ": " + value.getClass()); } } protected boolean getForceOptionalDefault() { return false; } public Schema toConnectSchema(org.apache.avro.Schema schema) { return toConnectSchema(schema, null, new ToConnectContext()); } private Schema toConnectSchema(org.apache.avro.Schema schema, Integer version, ToConnectContext toConnectContext) { // We perform caching only at this top level. While it might be helpful to cache some more of // the internal conversions, this is the safest place to add caching since some of the internal // conversions take extra flags (like forceOptional) which means the resulting schema might not // exactly match the Avro schema. AvroSchema schemaAndVersion = new AvroSchema(schema, version); Schema cachedSchema = toConnectSchemaCache.get(schemaAndVersion); if (cachedSchema != null) { if (schema.getType() == org.apache.avro.Schema.Type.RECORD) { // cycleReferences is only populated with record type schemas. We need to initialize it here // with the top-level record schema, as would happen if we did not hit the cache. This // schema has the version information set, thus it properly works with schemaEquals. toConnectContext.cycleReferences.put(schema, new CyclicSchemaWrapper(cachedSchema)); } return cachedSchema; } Schema resultSchema = toConnectSchema(schema, getForceOptionalDefault(), null, null, version, toConnectContext); toConnectSchemaCache.put(schemaAndVersion, resultSchema); return resultSchema; } /** * @param schema schema to convert * @param forceOptional make the resulting schema optional, for converting Avro unions to a * record format and simple Avro unions of null + type to optional schemas * @param fieldDefaultVal if non-null, override any connect-annotated default values with this * one; used when converting Avro record fields since they define default * values with the field spec, but Connect specifies them with the field's * schema * @param docDefaultVal if non-null, override any connect-annotated documentation with this * one; * used when converting Avro record fields since they define doc values * @param toConnectContext context object that holds state while doing the conversion */ private Schema toConnectSchema(org.apache.avro.Schema schema, boolean forceOptional, Object fieldDefaultVal, String docDefaultVal, ToConnectContext toConnectContext) { return toConnectSchema( schema, forceOptional, fieldDefaultVal, docDefaultVal, null, toConnectContext); } private Schema toConnectSchema(org.apache.avro.Schema schema, boolean forceOptional, Object fieldDefaultVal, String docDefaultVal, Integer version, ToConnectContext toConnectContext) { String type = schema.getProp(CONNECT_TYPE_PROP); String logicalType = schema.getProp(AVRO_LOGICAL_TYPE_PROP); final SchemaBuilder builder; switch (schema.getType()) { case BOOLEAN: builder = SchemaBuilder.bool(); break; case BYTES: case FIXED: if (AVRO_LOGICAL_DECIMAL.equalsIgnoreCase(logicalType)) { Object scaleNode = schema.getObjectProp(AVRO_LOGICAL_DECIMAL_SCALE_PROP); if (null == scaleNode || !(scaleNode instanceof Number)) { throw new DataException("scale must be specified and must be a number."); } Number scale = (Number) scaleNode; builder = Decimal.builder(scale.intValue()); Object precisionNode = schema.getObjectProp(AVRO_LOGICAL_DECIMAL_PRECISION_PROP); if (null != precisionNode) { if (!(precisionNode instanceof Number)) { throw new DataException(AVRO_LOGICAL_DECIMAL_PRECISION_PROP + " property must be a JSON Integer." + " https://avro.apache.org/docs/1.9.1/spec.html#Decimal"); } // Capture the precision as a parameter only if it is not the default Integer precision = ((Number) precisionNode).intValue(); if (precision != CONNECT_AVRO_DECIMAL_PRECISION_DEFAULT) { builder.parameter(CONNECT_AVRO_DECIMAL_PRECISION_PROP, precision.toString()); } } } else { builder = SchemaBuilder.bytes(); } break; case DOUBLE: builder = SchemaBuilder.float64(); break; case FLOAT: builder = SchemaBuilder.float32(); break; case INT: // INT is used for Connect's INT8, INT16, and INT32 if (type == null && logicalType == null) { builder = SchemaBuilder.int32(); } else if (logicalType != null) { if (AVRO_LOGICAL_DATE.equalsIgnoreCase(logicalType)) { builder = Date.builder(); } else if (AVRO_LOGICAL_TIME_MILLIS.equalsIgnoreCase(logicalType)) { builder = Time.builder(); } else { builder = SchemaBuilder.int32(); } } else { Schema.Type connectType = NON_AVRO_TYPES_BY_TYPE_CODE.get(type); if (connectType == null) { throw new DataException("Connect type annotation for Avro int field is null"); } builder = SchemaBuilder.type(connectType); } break; case LONG: if (AVRO_LOGICAL_TIMESTAMP_MILLIS.equalsIgnoreCase(logicalType)) { builder = Timestamp.builder(); } else { builder = SchemaBuilder.int64(); } break; case STRING: builder = SchemaBuilder.string(); break; case ARRAY: org.apache.avro.Schema elemSchema = schema.getElementType(); // Special case for custom encoding of non-string maps as list of key-value records if (isMapEntry(elemSchema)) { if (elemSchema.getFields().size() != 2 || elemSchema.getField(KEY_FIELD) == null || elemSchema.getField(VALUE_FIELD) == null) { throw new DataException("Found map encoded as array of key-value pairs, but array " + "elements do not match the expected format."); } builder = SchemaBuilder.map( toConnectSchema(elemSchema.getField(KEY_FIELD).schema()), toConnectSchema(elemSchema.getField(VALUE_FIELD).schema()) ); } else { Schema arraySchema = toConnectSchemaWithCycles( schema.getElementType(), getForceOptionalDefault(), null, null, toConnectContext); builder = SchemaBuilder.array(arraySchema); } break; case MAP: builder = SchemaBuilder.map( Schema.STRING_SCHEMA, toConnectSchemaWithCycles(schema.getValueType(), getForceOptionalDefault(), null, null, toConnectContext) ); break; case RECORD: { builder = SchemaBuilder.struct(); toConnectContext.cycleReferences.put(schema, new CyclicSchemaWrapper(builder)); if (connectMetaData && schema.getDoc() != null) { builder.parameter(AVRO_RECORD_DOC_PROP, schema.getDoc()); } for (org.apache.avro.Schema.Field field : schema.getFields()) { if (connectMetaData && field.doc() != null) { builder.parameter(AVRO_FIELD_DOC_PREFIX_PROP + field.name(), field.doc()); } Schema fieldSchema = toConnectSchema(field.schema(), getForceOptionalDefault(), field.defaultVal(), field.doc(), toConnectContext); builder.field(field.name(), fieldSchema); } break; } case ENUM: // enums are unwrapped to strings and the original enum is not preserved builder = SchemaBuilder.string(); if (connectMetaData) { if (schema.getDoc() != null) { builder.parameter(AVRO_ENUM_DOC_PREFIX_PROP + schema.getName(), schema.getDoc()); } if (schema.getEnumDefault() != null) { builder.parameter(AVRO_ENUM_DEFAULT_PREFIX_PROP + schema.getName(), schema.getEnumDefault()); } } builder.parameter(AVRO_TYPE_ENUM, schema.getFullName()); for (String enumSymbol : schema.getEnumSymbols()) { builder.parameter(AVRO_TYPE_ENUM + "." + enumSymbol, enumSymbol); } break; case UNION: { if (schema.getTypes().size() == 2) { if (schema.getTypes().contains(NULL_AVRO_SCHEMA)) { for (org.apache.avro.Schema memberSchema : schema.getTypes()) { if (!memberSchema.equals(NULL_AVRO_SCHEMA)) { return toConnectSchemaWithCycles( memberSchema, true, null, docDefaultVal, toConnectContext); } } } } builder = SchemaBuilder.struct().name(AVRO_TYPE_UNION); Set fieldNames = new HashSet<>(); for (org.apache.avro.Schema memberSchema : schema.getTypes()) { if (memberSchema.getType() == org.apache.avro.Schema.Type.NULL) { builder.optional(); } else { String fieldName = unionMemberFieldName(memberSchema); if (fieldNames.contains(fieldName)) { throw new DataException("Multiple union schemas map to the Connect union field name"); } fieldNames.add(fieldName); builder.field( fieldName, toConnectSchemaWithCycles(memberSchema, true, null, null, toConnectContext) ); } } break; } case NULL: // There's no dedicated null type in Connect. However, it also doesn't make sense to have a // standalone null type -- it wouldn't provide any useful information. Instead, it should // only be used in union types. throw new DataException("Standalone null schemas are not supported by this converter"); default: throw new DataException("Couldn't translate unsupported schema type " + schema.getType().getName() + "."); } String docVal = schema.getProp(CONNECT_DOC_PROP); if (connectMetaData && docVal != null) { builder.doc(docVal); } // Included Kafka Connect version takes priority, fall back to schema registry version int versionInt = -1; // A valid version must be a positive integer (assumed throughout SR) Object versionNode = schema.getObjectProp(CONNECT_VERSION_PROP); if (versionNode != null) { if (!(versionNode instanceof Number)) { throw new DataException("Invalid Connect version found: " + versionNode.toString()); } versionInt = ((Number) versionNode).intValue(); } else if (version != null) { versionInt = version.intValue(); } if (versionInt >= 0) { if (builder.version() != null) { if (versionInt != builder.version()) { throw new DataException("Mismatched versions: version already added to SchemaBuilder " + "(" + builder.version() + ") differs from version in source schema (" + versionInt + ")"); } } else { builder.version(versionInt); } } Object parameters = schema.getObjectProp(CONNECT_PARAMETERS_PROP); if (connectMetaData && parameters != null) { if (!(parameters instanceof Map)) { throw new DataException("Expected JSON object for schema parameters but found: " + parameters); } Iterator> paramIt = ((Map) parameters).entrySet().iterator(); while (paramIt.hasNext()) { Map.Entry field = paramIt.next(); Object jsonValue = field.getValue(); if (!(jsonValue instanceof String)) { throw new DataException("Expected schema parameter values to be strings but found: " + jsonValue); } builder.parameter(field.getKey(), (String) jsonValue); } } for (Map.Entry entry : schema.getObjectProps().entrySet()) { if (entry.getKey().startsWith(AVRO_PROP)) { builder.parameter(entry.getKey(), entry.getValue().toString()); } } Object connectDefault = schema.getObjectProp(CONNECT_DEFAULT_VALUE_PROP); if (fieldDefaultVal == null) { fieldDefaultVal = JacksonUtils.toJsonNode(connectDefault); } else if (connectDefault == null) { builder.parameter(AVRO_FIELD_DEFAULT_FLAG_PROP, "true"); } if (fieldDefaultVal != null) { builder.defaultValue( defaultValueFromAvro(builder, schema, fieldDefaultVal, toConnectContext)); } Object connectNameJson = schema.getObjectProp(CONNECT_NAME_PROP); String name = null; if (connectNameJson != null) { if (!(connectNameJson instanceof String)) { throw new DataException("Invalid schema name: " + connectNameJson.toString()); } name = (String) connectNameJson; } else if (schema.getType() == org.apache.avro.Schema.Type.RECORD || schema.getType() == org.apache.avro.Schema.Type.ENUM) { name = schema.getFullName(); } if (name != null && !name.equals(DEFAULT_SCHEMA_FULL_NAME)) { if (builder.name() != null) { if (!name.equals(builder.name())) { throw new DataException("Mismatched names: name already added to SchemaBuilder (" + builder.name() + ") differs from name in source schema (" + name + ")"); } } else { builder.name(name); } } if (forceOptional) { builder.optional(); } if (!toConnectContext.detectedCycles.contains(schema) && toConnectContext.cycleReferences.containsKey(schema)) { toConnectContext.cycleReferences.remove(schema); } return builder.build(); } private Schema toConnectSchemaWithCycles(org.apache.avro.Schema schema, boolean forceOptional, Object fieldDefaultVal, String docDefaultVal, ToConnectContext toConnectContext) { Schema resolvedSchema; if (toConnectContext.cycleReferences.containsKey(schema)) { toConnectContext.detectedCycles.add(schema); resolvedSchema = cyclicSchemaWrapper(toConnectContext.cycleReferences, schema, forceOptional); } else { resolvedSchema = toConnectSchema( schema, forceOptional, fieldDefaultVal, docDefaultVal, toConnectContext); } return resolvedSchema; } private CyclicSchemaWrapper cyclicSchemaWrapper( Map toConnectCycles, org.apache.avro.Schema memberSchema, boolean optional) { return new CyclicSchemaWrapper(toConnectCycles.get(memberSchema).schema(), optional); } private Object defaultValueFromAvro(Schema schema, org.apache.avro.Schema avroSchema, Object value, ToConnectContext toConnectContext) { Object result = defaultValueFromAvroWithoutLogical(schema, avroSchema, value, toConnectContext); // If the schema is a logical type, convert the primitive Avro default into the logical form return toConnectLogical(schema, result); } private Object defaultValueFromAvroWithoutLogical(Schema schema, org.apache.avro.Schema avroSchema, Object value, ToConnectContext toConnectContext) { if (value == null || value == JsonProperties.NULL_VALUE) { return null; } // The type will be JsonNode if this default was pulled from a Connect default field, or an // Object if it's the actual Avro-specified default. If it's a regular Java object, we can // use our existing conversion tools. if (!(value instanceof JsonNode)) { return toConnectData(schema, value, toConnectContext, false); } JsonNode jsonValue = (JsonNode) value; switch (avroSchema.getType()) { case INT: if (schema.type() == Schema.Type.INT8) { return (byte) jsonValue.intValue(); } else if (schema.type() == Schema.Type.INT16) { return jsonValue.shortValue(); } else if (schema.type() == Schema.Type.INT32) { return jsonValue.intValue(); } else { break; } case LONG: return jsonValue.longValue(); case FLOAT: return (float) jsonValue.doubleValue(); case DOUBLE: return jsonValue.doubleValue(); case BOOLEAN: return jsonValue.asBoolean(); case NULL: return null; case STRING: case ENUM: return jsonValue.asText(); case BYTES: case FIXED: try { byte[] bytes; if (jsonValue.isTextual()) { // Avro's JSON form may be a quoted string, so decode the binary value String encoded = jsonValue.textValue(); bytes = encoded.getBytes(StandardCharsets.ISO_8859_1); } else { bytes = jsonValue.binaryValue(); } return bytes == null ? null : ByteBuffer.wrap(bytes); } catch (IOException e) { throw new DataException("Invalid binary data in default value", e); } case ARRAY: { if (!jsonValue.isArray()) { throw new DataException("Invalid JSON for array default value: " + jsonValue.toString()); } List result = new ArrayList<>(jsonValue.size()); for (JsonNode elem : jsonValue) { result.add( defaultValueFromAvro(schema, avroSchema.getElementType(), elem, toConnectContext)); } return result; } case MAP: { if (!jsonValue.isObject()) { throw new DataException("Invalid JSON for map default value: " + jsonValue.toString()); } Map result = new HashMap<>(jsonValue.size()); Iterator> fieldIt = jsonValue.fields(); while (fieldIt.hasNext()) { Map.Entry field = fieldIt.next(); Object converted = defaultValueFromAvro( schema.valueSchema(), avroSchema.getValueType(), field.getValue(), toConnectContext); result.put(field.getKey(), converted); } return result; } case RECORD: { if (!jsonValue.isObject()) { throw new DataException("Invalid JSON for record default value: " + jsonValue.toString()); } Struct result = new Struct(schema); for (org.apache.avro.Schema.Field avroField : avroSchema.getFields()) { Field field = schema.field(avroField.name()); JsonNode fieldJson = ((JsonNode) value).get(field.name()); Object converted = defaultValueFromAvro( field.schema(), avroField.schema(), fieldJson, toConnectContext); result.put(avroField.name(), converted); } return result; } case UNION: { // Defaults must match first type org.apache.avro.Schema memberAvroSchema = avroSchema.getTypes().get(0); if (memberAvroSchema.getType() == org.apache.avro.Schema.Type.NULL) { return null; } else { return defaultValueFromAvro( schema.field(unionMemberFieldName(memberAvroSchema)).schema(), memberAvroSchema, value, toConnectContext); } } default: { return null; } } return null; } private String unionMemberFieldName(org.apache.avro.Schema schema) { if (schema.getType() == org.apache.avro.Schema.Type.RECORD || schema.getType() == org.apache.avro.Schema.Type.ENUM) { if (enhancedSchemaSupport) { return schema.getFullName(); } else { return splitName(schema.getName())[1]; } } return schema.getType().getName(); } private String unionMemberFieldName(Schema schema) { if (schema.type() == Schema.Type.STRUCT || isEnumSchema(schema)) { if (enhancedSchemaSupport) { return schema.name(); } else { return splitName(schema.name())[1]; } } return CONNECT_TYPES_TO_AVRO_TYPES.get(schema.type()).getName(); } private static boolean isEnumSchema(Schema schema) { return schema.type() == Schema.Type.STRING && schema.parameters() != null && schema.parameters().containsKey(AVRO_TYPE_ENUM); } private static boolean isInstanceOfAvroSchemaTypeForSimpleSchema(Schema fieldSchema, Object value) { List classes = SIMPLE_AVRO_SCHEMA_TYPES.get(fieldSchema.type()); if (classes == null) { return false; } for (Class type : classes) { if (type.isInstance(value)) { return true; } } return false; } /** * Split a full dotted-syntax name into a namespace and a single-component name. */ private static String[] splitName(String fullName) { String[] result = new String[2]; int indexLastDot = fullName.lastIndexOf('.'); if (indexLastDot >= 0) { result[0] = fullName.substring(0, indexLastDot); result[1] = fullName.substring(indexLastDot + 1); } else { result[0] = null; result[1] = fullName; } return result; } private interface LogicalTypeConverter { Object convert(Schema schema, Object value); } public static Schema nonOptional(Schema schema) { return new ConnectSchema(schema.type(), false, schema.defaultValue(), schema.name(), schema.version(), schema.doc(), schema.parameters(), fields(schema), keySchema(schema), valueSchema(schema)); } public static List fields(Schema schema) { Schema.Type type = schema.type(); if (Schema.Type.STRUCT.equals(type)) { return schema.fields(); } else { return null; } } public static Schema keySchema(Schema schema) { Schema.Type type = schema.type(); if (Schema.Type.MAP.equals(type)) { return schema.keySchema(); } else { return null; } } public static Schema valueSchema(Schema schema) { Schema.Type type = schema.type(); if (Schema.Type.MAP.equals(type) || Schema.Type.ARRAY.equals(type)) { return schema.valueSchema(); } else { return null; } } private static boolean fieldListEquals(List one, List two, Map cache) { if (one == two) { return true; } else if (one == null || two == null) { return false; } else { ListIterator itOne = one.listIterator(); ListIterator itTwo = two.listIterator(); while (itOne.hasNext() && itTwo.hasNext()) { if (!fieldEquals(itOne.next(), itTwo.next(), cache)) { return false; } } return itOne.hasNext() == itTwo.hasNext(); } } private static boolean fieldEquals(Field one, Field two, Map cache) { if (one == two) { return true; } else if (one == null || two == null) { return false; } else { return one.getClass() == two.getClass() && Objects.equals(one.index(), two.index()) && Objects.equals(one.name(), two.name()) && schemaEquals(one.schema(), two.schema(), cache); } } private static class SchemaPair { public Schema one; public Schema two; public SchemaPair(Schema one, Schema two) { this.one = one; this.two = two; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } SchemaPair that = (SchemaPair) o; return Objects.equals(one, that.one) && Objects.equals(two, that.two); } @Override public int hashCode() { return Objects.hash(one, two); } } private static boolean schemaEquals(Schema src, Schema that) { return schemaEquals(src, that, new HashMap<>()); } private static boolean schemaEquals(Schema src, Schema that, Map cache) { if (src == that) { return true; } else if (src == null || that == null) { return false; } // Add a temporary value to the cache to avoid cycles. As long as we recurse only at the end of // the method, we can safely default to true here. The cache is updated at the end of the method // with the actual comparison result. SchemaPair sp = new SchemaPair(src, that); Boolean cacheHit = cache.putIfAbsent(sp, true); if (cacheHit != null) { return cacheHit; } boolean equals = Objects.equals(src.isOptional(), that.isOptional()) && Objects.equals(src.version(), that.version()) && Objects.equals(src.name(), that.name()) && Objects.equals(src.doc(), that.doc()) && Objects.equals(src.type(), that.type()) && Objects.deepEquals(src.defaultValue(), that.defaultValue()) && Objects.equals(src.parameters(), that.parameters()); switch (src.type()) { case STRUCT: equals = equals && fieldListEquals(src.fields(), that.fields(), cache); break; case ARRAY: equals = equals && schemaEquals(src.valueSchema(), that.valueSchema(), cache); break; case MAP: equals = equals && schemaEquals(src.valueSchema(), that.valueSchema(), cache) && schemaEquals(src.keySchema(), that.keySchema(), cache); break; default: break; } cache.put(sp, equals); return equals; } private static class CyclicSchemaWrapper implements Schema { private final Schema schema; private final boolean optional; public CyclicSchemaWrapper(Schema schema) { this(schema, schema.isOptional()); } public CyclicSchemaWrapper(Schema schema, boolean optional) { this.schema = schema; this.optional = optional; } @Override public Type type() { return schema.type(); } @Override public boolean isOptional() { return optional; } @Override public Object defaultValue() { return schema.defaultValue(); } @Override public String name() { return schema.name(); } @Override public Integer version() { return schema.version(); } @Override public String doc() { return schema.doc(); } @Override public Map parameters() { return schema.parameters(); } @Override public Schema keySchema() { return schema.keySchema(); } @Override public Schema valueSchema() { return schema.valueSchema(); } @Override public List fields() { return schema.fields(); } @Override public Field field(String s) { return schema.field(s); } @Override public Schema schema() { return schema; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } CyclicSchemaWrapper other = (CyclicSchemaWrapper) o; return Objects.equals(optional, other.optional) && Objects.equals(schema, other.schema); } @Override public int hashCode() { return Objects.hashCode(optional) + Objects.hashCode(schema); } } /** * Class that holds the context for performing {@code toConnectSchema} */ private static class ToConnectContext { private final Map cycleReferences; private final Set detectedCycles; /** * cycleReferences - map that holds connect Schema references to resolve cycles * detectedCycles - avro schemas that have been detected to have cycles */ private ToConnectContext() { this.cycleReferences = new IdentityHashMap<>(); this.detectedCycles = new HashSet<>(); } } /** * Class that holds the context for performing {@code fromConnectSchema} */ private static class FromConnectContext { //SchemaMap is used to resolve references that need to mapped as types private final Map schemaMap; //schema name to Schema reference to resolve cycles private final Map cycleReferences; private FromConnectContext(Map schemaMap) { this.schemaMap = schemaMap; this.cycleReferences = new IdentityHashMap<>(); } } }