io.atleon.avro.AvroSerializer Maven / Gradle / Ivy
package io.atleon.avro;
import io.atleon.schema.SchemaBytes;
import io.atleon.schema.SchematicPreSerializer;
import io.atleon.schema.SchematicSerializer;
import io.atleon.util.Throwing;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.EnumSet;
import java.util.function.Function;
import java.util.function.Supplier;
/**
* This Serializer implements serialization-time Avro Schema loading such that Schemas for written
* data types do not need to be generated/provided prior to runtime. In addition to supporting
* plain data types and Avro-native data types, this Serializer also supports usage with generic
* data types via Schema generation based on values populated at serialization time.
*
* It should be noted that usage of Schema generation should be combined with some form of
* caching, as generating schemas is a heavy-duty process, and certain usages (like with Schema
* Registry) enforce a cap on the number of used schemas (by Object identity). Usage with generic
* data types can complicate this restriction if it is possible for serialized generic types to
* change during the course of an application's lifetime. In all other cases when schemas are
* otherwise stable on a per-type basis, this class provides a cache that can be enabled to
* minimize the number of generated schemas.
*/
public final class AvroSerializer implements SchematicSerializer {
private enum Transform {REMOVE_JAVA_PROPERTIES}
private final GenericData genericData;
private final Function typeSchemaLoader;
private final boolean schemaCachingEnabled;
private final boolean schemaGenerationEnabled;
private final EnumSet transforms;
private final AvroSchemaCache> schemaCache = new AvroSchemaCache<>();
private final AvroSchemaCache transformedSchemas = new AvroSchemaCache<>();
private AvroSerializer(GenericData genericData, Function typeSchemaLoader) {
this(genericData, typeSchemaLoader, false, false, EnumSet.noneOf(Transform.class));
}
private AvroSerializer(
GenericData genericData,
Function typeSchemaLoader,
boolean schemaCachingEnabled,
boolean schemaGenerationEnabled,
EnumSet transforms
) {
this.genericData = genericData;
this.typeSchemaLoader = typeSchemaLoader;
this.schemaCachingEnabled = schemaCachingEnabled;
this.schemaGenerationEnabled = schemaGenerationEnabled;
this.transforms = transforms;
}
public static AvroSerializer generic() {
return create(GenericData.get());
}
public static AvroSerializer reflect() {
return create(AtleonReflectData.get());
}
public static AvroSerializer create(GenericData genericData) {
return new AvroSerializer<>(genericData, GenericDatas.createTypeSchemaLoader(genericData));
}
public AvroSerializer withSchemaCachingEnabled(boolean schemaCachingEnabled) {
return new AvroSerializer<>(
genericData,
typeSchemaLoader,
schemaCachingEnabled,
schemaGenerationEnabled,
transforms
);
}
public AvroSerializer withSchemaGenerationEnabled(boolean schemaGenerationEnabled) {
return new AvroSerializer<>(
genericData,
typeSchemaLoader,
schemaCachingEnabled,
schemaGenerationEnabled,
transforms
);
}
public AvroSerializer withRemoveJavaProperties(boolean removeJavaProperties) {
return new AvroSerializer<>(
genericData,
typeSchemaLoader,
schemaCachingEnabled,
schemaGenerationEnabled,
modifyIfNecessary(transforms, Transform.REMOVE_JAVA_PROPERTIES, removeJavaProperties)
);
}
@Override
public SchemaBytes serialize(T data, SchematicPreSerializer preSerializer) {
try {
return serializeUnsafe(data, preSerializer);
} catch (Exception e) {
throw Throwing.propagate(e);
}
}
private SchemaBytes serializeUnsafe(T data, SchematicPreSerializer preSerializer) throws IOException {
Schema schema = schemaCachingEnabled ? schemaCache.load(data.getClass(), __ -> loadSchema(data)) : loadSchema(data);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Schema writerSchema = preSerializer.apply(schema, outputStream);
createDatumWriter(writerSchema).write(data, EncoderFactory.get().directBinaryEncoder(outputStream, null));
return SchemaBytes.serialized(writerSchema, outputStream.toByteArray());
}
private Schema loadSchema(T data) {
Supplier supplier = schemaGenerationEnabled
? () -> AvroSerialization.generateWriterSchema(data, typeSchemaLoader)
: () -> typeSchemaLoader.apply(data.getClass());
Schema schema = AvroSchemas.getOrSupply(data, supplier);
return transforms.isEmpty() ? schema : transformSchema(schema);
}
private Schema transformSchema(Schema schema) {
return schemaCachingEnabled // Avoid redundant caching
? transformUncachedSchema(schema)
: transformedSchemas.load(schema, this::transformUncachedSchema);
}
private Schema transformUncachedSchema(Schema schema) {
if (transforms.contains(Transform.REMOVE_JAVA_PROPERTIES)) {
schema = AvroSchemas.removeJavaProperties(schema);
}
return schema;
}
private DatumWriter createDatumWriter(Schema schema) {
return (DatumWriter) genericData.createDatumWriter(schema);
}
private static > EnumSet modifyIfNecessary(EnumSet source, E value, boolean shouldContain) {
EnumSet result = EnumSet.copyOf(source);
boolean modified = shouldContain ? result.add(value) : result.remove(value);
return modified ? result : source;
}
}