All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.atleon.avro.AvroSerializer Maven / Gradle / Ivy

There is a newer version: 0.28.3
Show newest version
package io.atleon.avro;

import io.atleon.schema.SchemaBytes;
import io.atleon.schema.SchematicPreSerializer;
import io.atleon.schema.SchematicSerializer;
import io.atleon.util.Throwing;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.EncoderFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.EnumSet;
import java.util.function.Function;
import java.util.function.Supplier;

/**
 * This Serializer implements serialization-time Avro Schema loading such that Schemas for written
 * data types do not need to be generated/provided prior to runtime. In addition to supporting
 * plain data types and Avro-native data types, this Serializer also supports usage with generic
 * data types via Schema generation based on values populated at serialization time.
 *
 * 

It should be noted that usage of Schema generation should be combined with some form of * caching, as generating schemas is a heavy-duty process, and certain usages (like with Schema * Registry) enforce a cap on the number of used schemas (by Object identity). Usage with generic * data types can complicate this restriction if it is possible for serialized generic types to * change during the course of an application's lifetime. In all other cases when schemas are * otherwise stable on a per-type basis, this class provides a cache that can be enabled to * minimize the number of generated schemas. */ public final class AvroSerializer implements SchematicSerializer { private enum Transform {REMOVE_JAVA_PROPERTIES} private final GenericData genericData; private final Function typeSchemaLoader; private final boolean schemaCachingEnabled; private final boolean schemaGenerationEnabled; private final EnumSet transforms; private final AvroSchemaCache> schemaCache = new AvroSchemaCache<>(); private final AvroSchemaCache transformedSchemas = new AvroSchemaCache<>(); private AvroSerializer(GenericData genericData, Function typeSchemaLoader) { this(genericData, typeSchemaLoader, false, false, EnumSet.noneOf(Transform.class)); } private AvroSerializer( GenericData genericData, Function typeSchemaLoader, boolean schemaCachingEnabled, boolean schemaGenerationEnabled, EnumSet transforms ) { this.genericData = genericData; this.typeSchemaLoader = typeSchemaLoader; this.schemaCachingEnabled = schemaCachingEnabled; this.schemaGenerationEnabled = schemaGenerationEnabled; this.transforms = transforms; } public static AvroSerializer generic() { return create(GenericData.get()); } public static AvroSerializer reflect() { return create(AtleonReflectData.get()); } public static AvroSerializer create(GenericData genericData) { return new AvroSerializer<>(genericData, GenericDatas.createTypeSchemaLoader(genericData)); } public AvroSerializer withSchemaCachingEnabled(boolean schemaCachingEnabled) { return new AvroSerializer<>( genericData, typeSchemaLoader, schemaCachingEnabled, schemaGenerationEnabled, transforms ); } public AvroSerializer withSchemaGenerationEnabled(boolean schemaGenerationEnabled) { return new AvroSerializer<>( genericData, typeSchemaLoader, schemaCachingEnabled, schemaGenerationEnabled, transforms ); } public AvroSerializer withRemoveJavaProperties(boolean removeJavaProperties) { return new AvroSerializer<>( genericData, typeSchemaLoader, schemaCachingEnabled, schemaGenerationEnabled, modifyIfNecessary(transforms, Transform.REMOVE_JAVA_PROPERTIES, removeJavaProperties) ); } @Override public SchemaBytes serialize(T data, SchematicPreSerializer preSerializer) { try { return serializeUnsafe(data, preSerializer); } catch (Exception e) { throw Throwing.propagate(e); } } private SchemaBytes serializeUnsafe(T data, SchematicPreSerializer preSerializer) throws IOException { Schema schema = schemaCachingEnabled ? schemaCache.load(data.getClass(), __ -> loadSchema(data)) : loadSchema(data); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); Schema writerSchema = preSerializer.apply(schema, outputStream); createDatumWriter(writerSchema).write(data, EncoderFactory.get().directBinaryEncoder(outputStream, null)); return SchemaBytes.serialized(writerSchema, outputStream.toByteArray()); } private Schema loadSchema(T data) { Supplier supplier = schemaGenerationEnabled ? () -> AvroSerialization.generateWriterSchema(data, typeSchemaLoader) : () -> typeSchemaLoader.apply(data.getClass()); Schema schema = AvroSchemas.getOrSupply(data, supplier); return transforms.isEmpty() ? schema : transformSchema(schema); } private Schema transformSchema(Schema schema) { return schemaCachingEnabled // Avoid redundant caching ? transformUncachedSchema(schema) : transformedSchemas.load(schema, this::transformUncachedSchema); } private Schema transformUncachedSchema(Schema schema) { if (transforms.contains(Transform.REMOVE_JAVA_PROPERTIES)) { schema = AvroSchemas.removeJavaProperties(schema); } return schema; } private DatumWriter createDatumWriter(Schema schema) { return (DatumWriter) genericData.createDatumWriter(schema); } private static > EnumSet modifyIfNecessary(EnumSet source, E value, boolean shouldContain) { EnumSet result = EnumSet.copyOf(source); boolean modified = shouldContain ? result.add(value) : result.remove(value); return modified ? result : source; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy