All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.streamnative.pulsar.handlers.kop.schemaregistry.providers.avro.AvroSchema Maven / Gradle / Ivy

The newest version!
/**
 * Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
 */
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.streamnative.pulsar.handlers.kop.schemaregistry.providers.avro;

import com.google.common.collect.EnumHashBiMap;
import io.streamnative.pulsar.handlers.kop.schemaregistry.model.ParsedSchema;
import io.streamnative.pulsar.handlers.kop.schemaregistry.model.rest.SchemaReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.SchemaCompatibility;

@Slf4j
public class AvroSchema implements ParsedSchema {

    public static final String TYPE = "AVRO";

    private final org.apache.avro.Schema schemaObj;
    private String canonicalString;
    private final Integer version;
    private final List references;
    private final Map resolvedReferences;
    private final boolean isNew;

    private transient int hashCode = NO_HASHCODE;

    private static final int NO_HASHCODE = Integer.MIN_VALUE;

    public AvroSchema(String schemaString,
                      List references,
                      Map resolvedReferences,
                      Integer version,
                      boolean isNew) {
        this.isNew = isNew;
        org.apache.avro.Schema.Parser parser = getParser();
        for (String schema : resolvedReferences.values()) {
            parser.parse(schema);
        }
        this.schemaObj = schemaString != null ? parser.parse(schemaString) : null;
        this.references = Collections.unmodifiableList(references);
        this.resolvedReferences = Collections.unmodifiableMap(resolvedReferences);
        this.version = version;
    }

    private AvroSchema(
        org.apache.avro.Schema schemaObj,
        String canonicalString,
        List references,
        Map resolvedReferences,
        Integer version,
        boolean isNew
    ) {
        this.isNew = isNew;
        this.schemaObj = schemaObj;
        this.canonicalString = canonicalString;
        this.references = references;
        this.resolvedReferences = resolvedReferences;
        this.version = version;
    }

    @Override
    public AvroSchema copy() {
        return new AvroSchema(
            this.schemaObj,
            this.canonicalString,
            this.references,
            this.resolvedReferences,
            this.version,
            this.isNew
        );
    }

    @Override
    public AvroSchema copy(Integer version) {
        return new AvroSchema(
            this.schemaObj,
            this.canonicalString,
            this.references,
            this.resolvedReferences,
            version,
            this.isNew
        );
    }

    protected org.apache.avro.Schema.Parser getParser() {
        org.apache.avro.Schema.Parser parser = new org.apache.avro.Schema.Parser();
        parser.setValidateDefaults(isNew());
        return parser;
    }

    @Override
    public org.apache.avro.Schema rawSchema() {
        return schemaObj;
    }

    @Override
    public String schemaType() {
        return TYPE;
    }

    @Override
    public String name() {
        if (schemaObj != null && schemaObj.getType() == org.apache.avro.Schema.Type.RECORD) {
            return schemaObj.getFullName();
        }
        return null;
    }

    @Override
    public String canonicalString() {
        if (schemaObj == null) {
            return null;
        }
        if (canonicalString == null) {
            org.apache.avro.Schema.Parser parser = getParser();
            List schemaRefs = new ArrayList<>();
            for (String schema : resolvedReferences.values()) {
                org.apache.avro.Schema schemaRef = parser.parse(schema);
                schemaRefs.add(schemaRef);
            }
            canonicalString = schemaObj.toString(schemaRefs, false);
        }
        return canonicalString;
    }

    @Override
    public String formattedString(String format) {
        if (format == null || format.trim().isEmpty()) {
            return canonicalString();
        }
        Format formatEnum = Format.get(format);
        switch (formatEnum) {
            case DEFAULT:
                return canonicalString();
            case RESOLVED:
                return schemaObj != null ? schemaObj.toString() : null;
            default:
                // Don't throw an exception for forward compatibility of formats
                log.warn("Unsupported format {}", format);
                return canonicalString();
        }
    }

    public Integer version() {
        return version;
    }

    @Override
    public List references() {
        return references;
    }

    public Map resolvedReferences() {
        return resolvedReferences;
    }

    public boolean isNew() {
        return isNew;
    }

    @Override
    public AvroSchema normalize() {
        String normalized = AvroSchemaUtils.toNormalizedString(this);
        return new AvroSchema(
            normalized,
            this.references.stream().sorted().distinct().collect(Collectors.toList()),
            this.resolvedReferences,
            this.version,
            this.isNew
        );
    }

    @Override
    public List isBackwardCompatible(ParsedSchema previousSchema) {
        if (!schemaType().equals(previousSchema.schemaType())) {
            return Collections.singletonList("Incompatible because of different schema type");
        }
        try {
            SchemaCompatibility.SchemaPairCompatibility result =
                SchemaCompatibility.checkReaderWriterCompatibility(
                    this.schemaObj,
                    ((AvroSchema) previousSchema).schemaObj);
            return result.getResult().getIncompatibilities().stream()
                .map(Difference::new)
                .map(Difference::toString)
                .collect(Collectors.toCollection(ArrayList::new));
        } catch (Exception e) {
            log.error("Unexpected exception during compatibility check", e);
            return Collections.singletonList(
                "Unexpected exception during compatibility check: " + e.getMessage());
        }
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        AvroSchema that = (AvroSchema) o;
        return Objects.equals(version, that.version)
            && Objects.equals(references, that.references)
            && Objects.equals(schemaObj, that.schemaObj)
            && metaEqual(schemaObj, that.schemaObj, new HashMap<>());
    }

    private boolean metaEqual(
        org.apache.avro.Schema schema1,
        org.apache.avro.Schema schema2,
        Map, Boolean> cache) {
        if (schema1 == schema2) {
            return true;
        }

        if (schema1 == null || schema2 == null) {
            return false;
        }

        org.apache.avro.Schema.Type type1 = schema1.getType();
        org.apache.avro.Schema.Type type2 = schema2.getType();
        if (type1 != type2) {
            return false;
        }

        switch (type1) {
            case RECORD:
                // Add a temporary value to the cache to avoid cycles.
                // As long as we recurse only at the end of the method, we can safely default to true here.
                // The cache is updated at the end of the method with the actual comparison result.
                IdentityPair sp = new IdentityPair<>(schema1, schema2);
                Boolean cacheHit = cache.putIfAbsent(sp, true);
                if (cacheHit != null) {
                    return cacheHit;
                }

                boolean equals = Objects.equals(schema1.getAliases(), schema2.getAliases())
                    && Objects.equals(schema1.getDoc(), schema2.getDoc())
                    && fieldMetaEqual(schema1.getFields(), schema2.getFields(), cache);

                cache.put(sp, equals);
                return equals;
            case ENUM:
                return Objects.equals(schema1.getAliases(), schema2.getAliases())
                    && Objects.equals(schema1.getDoc(), schema2.getDoc())
                    && Objects.equals(schema1.getEnumDefault(), schema2.getEnumDefault());
            case FIXED:
                return Objects.equals(schema1.getAliases(), schema2.getAliases())
                    && Objects.equals(schema1.getDoc(), schema2.getDoc());
            case UNION:
                List types1 = schema1.getTypes();
                List types2 = schema2.getTypes();
                if (types1.size() != types2.size()) {
                    return false;
                }
                for (int i = 0; i < types1.size(); i++) {
                    if (!metaEqual(types1.get(i), types2.get(i), cache)) {
                        return false;
                    }
                }
                return true;
            default:
                return true;
        }
    }

    private boolean fieldMetaEqual(
        List fields1,
        List fields2,
        Map, Boolean> cache) {
        if (fields1.size() != fields2.size()) {
            return false;
        }
        for (int i = 0; i < fields1.size(); i++) {
            org.apache.avro.Schema.Field field1 = fields1.get(i);
            org.apache.avro.Schema.Field field2 = fields2.get(i);
            if (field1 == field2) {
                continue;
            }
            if (!Objects.equals(field1.aliases(), field2.aliases())
                || !Objects.equals(field1.doc(), field2.doc())) {
                return false;
            }
            boolean fieldSchemaMetaEqual = metaEqual(field1.schema(), field2.schema(), cache);
            if (!fieldSchemaMetaEqual) {
                return false;
            }
        }
        return true;
    }

    @Override
    public int hashCode() {
        if (hashCode == NO_HASHCODE) {
            hashCode = Objects.hash(schemaObj, references, version)
                + metaHash(schemaObj, new IdentityHashMap<>());
        }
        return hashCode;
    }

    private int metaHash(org.apache.avro.Schema schema, Map cache) {
        if (schema == null) {
            return 0;
        }
        switch (schema.getType()) {
            case RECORD:
                // Add a temporary value to the cache to avoid cycles.
                // As long as we recurse only at the end of the method, we can safely default to 0 here.
                // The cache is updated at the end of the method with the actual comparison result.
                Integer cacheHit = cache.putIfAbsent(schema, 0);
                if (cacheHit != null) {
                    return cacheHit;
                }

                int result = Objects.hash(schema.getAliases(), schema.getDoc())
                    + fieldMetaHash(schema.getFields(), cache);

                cache.put(schema, result);
                return result;
            case ENUM:
                return Objects.hash(schema.getAliases(), schema.getDoc(), schema.getEnumDefault());
            case FIXED:
                return Objects.hash(schema.getAliases(), schema.getDoc());
            case UNION:
                int hash = 0;
                List types = schema.getTypes();
                for (org.apache.avro.Schema type : types) {
                    hash += metaHash(type, cache);
                }
                return hash;
            default:
                return 0;
        }
    }

    private int fieldMetaHash(List fields, Map cache) {
        int hash = 0;
        for (org.apache.avro.Schema.Field field : fields) {
            hash += Objects.hash(field.aliases(), field.doc()) + metaHash(field.schema(), cache);
        }
        return hash;
    }

    @Override
    public String toString() {
        return canonicalString();
    }

    @Getter
    static class IdentityPair {
        private final K key;
        private final V value;

        public IdentityPair(K key, V value) {
            this.key = key;
            this.value = value;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            IdentityPair pair = (IdentityPair) o;
            // Only perform identity check
            return key == pair.key && value == pair.value;
        }

        @Override
        public int hashCode() {
            return System.identityHashCode(key) + System.identityHashCode(value);
        }

        @Override
        public String toString() {
            return "IdentityPair{"
                + "key=" + key
                + ", value=" + value
                + '}';
        }
    }

    public enum Format {
        DEFAULT("default"),
        RESOLVED("resolved");

        private static final EnumHashBiMap lookup =
            EnumHashBiMap.create(Format.class);

        static {
            for (Format type : Format.values()) {
                lookup.put(type, type.symbol());
            }
        }

        private final String symbol;

        Format(String symbol) {
            this.symbol = symbol;
        }

        public String symbol() {
            return symbol;
        }

        public static Format get(String symbol) {
            return lookup.inverse().get(symbol);
        }

        public static Set symbols() {
            return lookup.inverse().keySet();
        }

        @Override
        public String toString() {
            return symbol();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy