All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.atleon.avro.AvroDeserializer Maven / Gradle / Ivy

The newest version!
package io.atleon.avro;

import io.atleon.schema.KeyableSchema;
import io.atleon.schema.SchematicDeserializer;
import io.atleon.util.Throwing;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.specific.SpecificData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Type;
import java.nio.ByteBuffer;
import java.util.function.Function;

/**
 * This deserializer makes a best effort to take advantage of Avro Compatibility rules such that
 * deserialization does not break over time as writers may update their schemas. In other words, as
 * long as Avro writers make forward compatible changes to their schemas, this deserialization
 * should not break, even once writers take advantage of those changes (i.e. by populating data for
 * newly-added fields). This is accomplished by attempting to load appropriate reader Schemas to
 * match with any given writer Schema. Doing so requires not just deduction of the runtime types
 * being deserialized, but also instantiation of those types to cover cases when a data type may be
 * able to explicitly say what its Schema is (i.e. in the case of
 * {@link org.apache.avro.generic.GenericContainer GenericContainer}).
 *
 * 

This "reference data instantiation" indirectly allows this deserializer to also handle * generic deserialization types (however inadvisable that may be). Generic data fields are * recursively instantiated based on writer schema-specified type information, and when coupled * with reader schema generation based on that instantiated reference data, continues to allow * backward compatible deserialization of those generic data types. */ public final class AvroDeserializer implements SchematicDeserializer { private static final Logger LOGGER = LoggerFactory.getLogger(AvroDeserializer.class); private final AvroSchemaCache readerSchemasByWriterKey = new AvroSchemaCache<>(); private final GenericData genericData; private final Function typeSchemaLoader; private final boolean readerSchemaLoadingEnabled; private final boolean readerReferenceSchemaGenerationEnabled; private AvroDeserializer(GenericData genericData, Function typeSchemaLoader) { this(genericData, typeSchemaLoader, genericData instanceof SpecificData, false); } private AvroDeserializer( GenericData genericData, Function typeSchemaLoader, boolean readerSchemaLoadingEnabled, boolean readerReferenceSchemaGenerationEnabled ) { this.genericData = genericData; this.typeSchemaLoader = typeSchemaLoader; this.readerSchemaLoadingEnabled = readerSchemaLoadingEnabled; this.readerReferenceSchemaGenerationEnabled = readerReferenceSchemaGenerationEnabled; } public static AvroDeserializer generic() { return create(GenericData.get()); } public static AvroDeserializer specific() { return create(SpecificData.get()); } public static AvroDeserializer reflect() { return create(AtleonReflectData.get()); } public static AvroDeserializer create(GenericData genericData) { return new AvroDeserializer<>(genericData, GenericDatas.createTypeSchemaLoader(genericData)); } public AvroDeserializer withReaderSchemaLoadingEnabled(boolean readerSchemaLoadingEnabled) { return new AvroDeserializer<>( genericData, typeSchemaLoader, readerSchemaLoadingEnabled, readerReferenceSchemaGenerationEnabled ); } public AvroDeserializer withReaderReferenceSchemaGenerationEnabled(boolean readerReferenceSchemaGenerationEnabled) { return new AvroDeserializer<>( genericData, typeSchemaLoader, readerSchemaLoadingEnabled, readerReferenceSchemaGenerationEnabled ); } @Override public T deserialize(byte[] data, Function> dataBufferToWriterSchema) { try { return deserializeUnsafe(data, dataBufferToWriterSchema); } catch (IOException e) { throw Throwing.propagate(e); } } private T deserializeUnsafe( byte[] data, Function> dataBufferToWriterSchema ) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(data); KeyableSchema keyableWriterSchema = dataBufferToWriterSchema.apply(buffer); Schema readerSchema = keyableWriterSchema.key() .map(writerKey -> readerSchemasByWriterKey.load(writerKey, __ -> loadReaderSchema(keyableWriterSchema.schema()))) .orElseGet(() -> loadReaderSchema(keyableWriterSchema.schema())); Decoder decoder = DecoderFactory.get() .binaryDecoder(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining(), null); return createDatumReader(keyableWriterSchema.schema(), readerSchema).read(null, decoder); } private Schema loadReaderSchema(Schema writerSchema) { try { return readerSchemaLoadingEnabled ? loadReaderSchemaUnsafe(writerSchema) : writerSchema; } catch (Exception e) { LOGGER.error("Failed to load readerSchema. Defaulting to writerSchema={}", writerSchema, e); return writerSchema; } } private Schema loadReaderSchemaUnsafe(Schema writerSchema) { // Note: If deserialization type is a SpecificRecord, it conventionally has a no-arg public constructor Object referenceData = AvroDeserialization.instantiateReferenceData(writerSchema); return AvroSchemas.getOrSupply(referenceData, () -> loadReaderSchemaUnsafe(writerSchema, referenceData)); } private Schema loadReaderSchemaUnsafe(Schema writerSchema, Object referenceData) { return readerReferenceSchemaGenerationEnabled ? AvroDeserialization.generateReaderReferenceSchema(referenceData, writerSchema, typeSchemaLoader) : typeSchemaLoader.apply(referenceData.getClass()); } private DatumReader createDatumReader(Schema writerSchema, Schema readerSchema) { return (DatumReader) genericData.createDatumReader(writerSchema, readerSchema); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy