io.streamnative.pulsar.handlers.kop.schemaregistry.providers.avro.AvroSchema Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pulsar-kafka-schema-registry Show documentation
Show all versions of pulsar-kafka-schema-registry Show documentation
Kafka Compatible Schema Registry
The newest version!
/**
* Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
*/
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.streamnative.pulsar.handlers.kop.schemaregistry.providers.avro;
import com.google.common.collect.EnumHashBiMap;
import io.streamnative.pulsar.handlers.kop.schemaregistry.model.ParsedSchema;
import io.streamnative.pulsar.handlers.kop.schemaregistry.model.rest.SchemaReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.SchemaCompatibility;
@Slf4j
public class AvroSchema implements ParsedSchema {
public static final String TYPE = "AVRO";
private final org.apache.avro.Schema schemaObj;
private String canonicalString;
private final Integer version;
private final List references;
private final Map resolvedReferences;
private final boolean isNew;
private transient int hashCode = NO_HASHCODE;
private static final int NO_HASHCODE = Integer.MIN_VALUE;
public AvroSchema(String schemaString,
List references,
Map resolvedReferences,
Integer version,
boolean isNew) {
this.isNew = isNew;
org.apache.avro.Schema.Parser parser = getParser();
for (String schema : resolvedReferences.values()) {
parser.parse(schema);
}
this.schemaObj = schemaString != null ? parser.parse(schemaString) : null;
this.references = Collections.unmodifiableList(references);
this.resolvedReferences = Collections.unmodifiableMap(resolvedReferences);
this.version = version;
}
private AvroSchema(
org.apache.avro.Schema schemaObj,
String canonicalString,
List references,
Map resolvedReferences,
Integer version,
boolean isNew
) {
this.isNew = isNew;
this.schemaObj = schemaObj;
this.canonicalString = canonicalString;
this.references = references;
this.resolvedReferences = resolvedReferences;
this.version = version;
}
@Override
public AvroSchema copy() {
return new AvroSchema(
this.schemaObj,
this.canonicalString,
this.references,
this.resolvedReferences,
this.version,
this.isNew
);
}
@Override
public AvroSchema copy(Integer version) {
return new AvroSchema(
this.schemaObj,
this.canonicalString,
this.references,
this.resolvedReferences,
version,
this.isNew
);
}
protected org.apache.avro.Schema.Parser getParser() {
org.apache.avro.Schema.Parser parser = new org.apache.avro.Schema.Parser();
parser.setValidateDefaults(isNew());
return parser;
}
@Override
public org.apache.avro.Schema rawSchema() {
return schemaObj;
}
@Override
public String schemaType() {
return TYPE;
}
@Override
public String name() {
if (schemaObj != null && schemaObj.getType() == org.apache.avro.Schema.Type.RECORD) {
return schemaObj.getFullName();
}
return null;
}
@Override
public String canonicalString() {
if (schemaObj == null) {
return null;
}
if (canonicalString == null) {
org.apache.avro.Schema.Parser parser = getParser();
List schemaRefs = new ArrayList<>();
for (String schema : resolvedReferences.values()) {
org.apache.avro.Schema schemaRef = parser.parse(schema);
schemaRefs.add(schemaRef);
}
canonicalString = schemaObj.toString(schemaRefs, false);
}
return canonicalString;
}
@Override
public String formattedString(String format) {
if (format == null || format.trim().isEmpty()) {
return canonicalString();
}
Format formatEnum = Format.get(format);
switch (formatEnum) {
case DEFAULT:
return canonicalString();
case RESOLVED:
return schemaObj != null ? schemaObj.toString() : null;
default:
// Don't throw an exception for forward compatibility of formats
log.warn("Unsupported format {}", format);
return canonicalString();
}
}
public Integer version() {
return version;
}
@Override
public List references() {
return references;
}
public Map resolvedReferences() {
return resolvedReferences;
}
public boolean isNew() {
return isNew;
}
@Override
public AvroSchema normalize() {
String normalized = AvroSchemaUtils.toNormalizedString(this);
return new AvroSchema(
normalized,
this.references.stream().sorted().distinct().collect(Collectors.toList()),
this.resolvedReferences,
this.version,
this.isNew
);
}
@Override
public List isBackwardCompatible(ParsedSchema previousSchema) {
if (!schemaType().equals(previousSchema.schemaType())) {
return Collections.singletonList("Incompatible because of different schema type");
}
try {
SchemaCompatibility.SchemaPairCompatibility result =
SchemaCompatibility.checkReaderWriterCompatibility(
this.schemaObj,
((AvroSchema) previousSchema).schemaObj);
return result.getResult().getIncompatibilities().stream()
.map(Difference::new)
.map(Difference::toString)
.collect(Collectors.toCollection(ArrayList::new));
} catch (Exception e) {
log.error("Unexpected exception during compatibility check", e);
return Collections.singletonList(
"Unexpected exception during compatibility check: " + e.getMessage());
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
AvroSchema that = (AvroSchema) o;
return Objects.equals(version, that.version)
&& Objects.equals(references, that.references)
&& Objects.equals(schemaObj, that.schemaObj)
&& metaEqual(schemaObj, that.schemaObj, new HashMap<>());
}
private boolean metaEqual(
org.apache.avro.Schema schema1,
org.apache.avro.Schema schema2,
Map, Boolean> cache) {
if (schema1 == schema2) {
return true;
}
if (schema1 == null || schema2 == null) {
return false;
}
org.apache.avro.Schema.Type type1 = schema1.getType();
org.apache.avro.Schema.Type type2 = schema2.getType();
if (type1 != type2) {
return false;
}
switch (type1) {
case RECORD:
// Add a temporary value to the cache to avoid cycles.
// As long as we recurse only at the end of the method, we can safely default to true here.
// The cache is updated at the end of the method with the actual comparison result.
IdentityPair sp = new IdentityPair<>(schema1, schema2);
Boolean cacheHit = cache.putIfAbsent(sp, true);
if (cacheHit != null) {
return cacheHit;
}
boolean equals = Objects.equals(schema1.getAliases(), schema2.getAliases())
&& Objects.equals(schema1.getDoc(), schema2.getDoc())
&& fieldMetaEqual(schema1.getFields(), schema2.getFields(), cache);
cache.put(sp, equals);
return equals;
case ENUM:
return Objects.equals(schema1.getAliases(), schema2.getAliases())
&& Objects.equals(schema1.getDoc(), schema2.getDoc())
&& Objects.equals(schema1.getEnumDefault(), schema2.getEnumDefault());
case FIXED:
return Objects.equals(schema1.getAliases(), schema2.getAliases())
&& Objects.equals(schema1.getDoc(), schema2.getDoc());
case UNION:
List types1 = schema1.getTypes();
List types2 = schema2.getTypes();
if (types1.size() != types2.size()) {
return false;
}
for (int i = 0; i < types1.size(); i++) {
if (!metaEqual(types1.get(i), types2.get(i), cache)) {
return false;
}
}
return true;
default:
return true;
}
}
private boolean fieldMetaEqual(
List fields1,
List fields2,
Map, Boolean> cache) {
if (fields1.size() != fields2.size()) {
return false;
}
for (int i = 0; i < fields1.size(); i++) {
org.apache.avro.Schema.Field field1 = fields1.get(i);
org.apache.avro.Schema.Field field2 = fields2.get(i);
if (field1 == field2) {
continue;
}
if (!Objects.equals(field1.aliases(), field2.aliases())
|| !Objects.equals(field1.doc(), field2.doc())) {
return false;
}
boolean fieldSchemaMetaEqual = metaEqual(field1.schema(), field2.schema(), cache);
if (!fieldSchemaMetaEqual) {
return false;
}
}
return true;
}
@Override
public int hashCode() {
if (hashCode == NO_HASHCODE) {
hashCode = Objects.hash(schemaObj, references, version)
+ metaHash(schemaObj, new IdentityHashMap<>());
}
return hashCode;
}
private int metaHash(org.apache.avro.Schema schema, Map cache) {
if (schema == null) {
return 0;
}
switch (schema.getType()) {
case RECORD:
// Add a temporary value to the cache to avoid cycles.
// As long as we recurse only at the end of the method, we can safely default to 0 here.
// The cache is updated at the end of the method with the actual comparison result.
Integer cacheHit = cache.putIfAbsent(schema, 0);
if (cacheHit != null) {
return cacheHit;
}
int result = Objects.hash(schema.getAliases(), schema.getDoc())
+ fieldMetaHash(schema.getFields(), cache);
cache.put(schema, result);
return result;
case ENUM:
return Objects.hash(schema.getAliases(), schema.getDoc(), schema.getEnumDefault());
case FIXED:
return Objects.hash(schema.getAliases(), schema.getDoc());
case UNION:
int hash = 0;
List types = schema.getTypes();
for (org.apache.avro.Schema type : types) {
hash += metaHash(type, cache);
}
return hash;
default:
return 0;
}
}
private int fieldMetaHash(List fields, Map cache) {
int hash = 0;
for (org.apache.avro.Schema.Field field : fields) {
hash += Objects.hash(field.aliases(), field.doc()) + metaHash(field.schema(), cache);
}
return hash;
}
@Override
public String toString() {
return canonicalString();
}
@Getter
static class IdentityPair {
private final K key;
private final V value;
public IdentityPair(K key, V value) {
this.key = key;
this.value = value;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
IdentityPair, ?> pair = (IdentityPair, ?>) o;
// Only perform identity check
return key == pair.key && value == pair.value;
}
@Override
public int hashCode() {
return System.identityHashCode(key) + System.identityHashCode(value);
}
@Override
public String toString() {
return "IdentityPair{"
+ "key=" + key
+ ", value=" + value
+ '}';
}
}
public enum Format {
DEFAULT("default"),
RESOLVED("resolved");
private static final EnumHashBiMap lookup =
EnumHashBiMap.create(Format.class);
static {
for (Format type : Format.values()) {
lookup.put(type, type.symbol());
}
}
private final String symbol;
Format(String symbol) {
this.symbol = symbol;
}
public String symbol() {
return symbol;
}
public static Format get(String symbol) {
return lookup.inverse().get(symbol);
}
public static Set symbols() {
return lookup.inverse().keySet();
}
@Override
public String toString() {
return symbol();
}
}
}