io.axual.connect.plugins.adls.gen2.avro.ContainerGenerator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of adls-gen2-sink Show documentation
Show all versions of adls-gen2-sink Show documentation
Collect the records from topics in an Azure Data Lake Storage Gen2
package io.axual.connect.plugins.adls.gen2.avro;
/*-
* ========================LICENSE_START=================================
* Azure Data Lake Storage Gen2 Sink Connector for Kafka Connect
* %%
* Copyright (C) 2021 Axual B.V.
* %%
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =========================LICENSE_END==================================
*/
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.Collections;
import io.axual.connect.plugins.adls.gen2.exceptions.AdlsGen2Exception;
import io.axual.connect.plugins.adls.gen2.extract.PayloadType;
import io.axual.connect.plugins.adls.gen2.extract.ScanResult;
/**
* A helper class for generating {@link Container} instances. The generator is instantiated with
* a MessageDigest for generating the schema fingerprints.
*/
public class ContainerGenerator {
private static final SchemaBuilder.TypeBuilder GLOBAL_BUILDER = SchemaBuilder.builder(ContainerDefinitions.NAMESPACE);
private static final Schema SCHEMA_KAFKA_HEADER = GLOBAL_BUILDER
.record(ContainerDefinitions.RECORD_NAME_KAFKA_HEADER)
.fields()
.requiredString(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_NAME)
.name(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_VALUE).type()
.unionOf().nullType()
.and().booleanType()
.and().intType()
.and().longType()
.and().floatType()
.and().doubleType()
.and().bytesType()
.and().stringType()
.endUnion().nullDefault()
.endRecord();
private static final Schema SCHEMA_PAYLOAD_TYPE = GLOBAL_BUILDER
.enumeration(ContainerDefinitions.ENUM_NAME_PAYLOAD_TYPE)
.symbols(ContainerDefinitions.ENUM_VALUE_NULL,
ContainerDefinitions.ENUM_VALUE_BOOLEAN,
ContainerDefinitions.ENUM_VALUE_INTEGER,
ContainerDefinitions.ENUM_VALUE_LONG,
ContainerDefinitions.ENUM_VALUE_FLOAT,
ContainerDefinitions.ENUM_VALUE_DOUBLE,
ContainerDefinitions.ENUM_VALUE_BYTES,
ContainerDefinitions.ENUM_VALUE_STRING,
ContainerDefinitions.ENUM_VALUE_AVRO,
ContainerDefinitions.ENUM_VALUE_COMPLEX);
private final MessageDigest messageDigest;
/**
* Construct a new generator and use the provided {@link MessageDigest} for fingerprinting the schemas used
*
* @param messageDigest the Message Digest to use for generating fingerprints
*/
public ContainerGenerator(MessageDigest messageDigest) {
this.messageDigest = messageDigest;
}
/**
* Create a new {@link Container} based on the provided key and value {@link ScanResult}
*
* @param keyScan the scanResult of the the key data
* @param valueScan the scanResult of the the value data
* @return a fully instantiated {@link Container} for processing records with the provided key and value types
*/
public Container createContainer(ScanResult keyScan, ScanResult valueScan) {
SchemaBuilder.TypeBuilder schemaBuilder = SchemaBuilder.builder(ContainerDefinitions.NAMESPACE);
Schema containerSchema = schemaBuilder.record(ContainerDefinitions.RECORD_NAME_CONTAINER)
.fields()
.optionalString(ContainerDefinitions.FIELD_NAME_CONTAINER_TOPIC)
.optionalInt(ContainerDefinitions.FIELD_NAME_CONTAINER_PARTITION)
.optionalLong(ContainerDefinitions.FIELD_NAME_CONTAINER_OFFSET)
.optionalLong(ContainerDefinitions.FIELD_NAME_CONTAINER_TIMESTAMP)
.name(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS).type().array().items(SCHEMA_KAFKA_HEADER).arrayDefault(Collections.emptyList())
.name(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY).type(createKafkaKeyValueContainer(schemaBuilder, keyScan, true)).noDefault()
.name(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE).type(createKafkaKeyValueContainer(schemaBuilder, valueScan, false)).noDefault()
.endRecord();
return new Container(containerSchema, keyScan.getType(), keyScan.getSchema(), fingerprint(keyScan.getSchema()), valueScan.getType(), valueScan.getSchema(), fingerprint(valueScan.getSchema()));
}
Schema createKafkaKeyValueContainer(SchemaBuilder.TypeBuilder schemaBuilder, ScanResult scanResult, boolean isKey) {
Schema innerType = determineInnerSchema(scanResult);
Schema payloadSchema = scanResult.getType() == PayloadType.NULL ? schemaBuilder.nullType(): schemaBuilder.unionOf().nullType().and().type(innerType).endUnion();
return schemaBuilder.record(isKey ? ContainerDefinitions.RECORD_NAME_KAFKA_KV_KEY : ContainerDefinitions.RECORD_NAME_KAFKA_KV_VALUE)
.fields()
.name(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).type(SCHEMA_PAYLOAD_TYPE).noDefault()
.optionalString(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT)
.name(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD)
.type(payloadSchema).withDefault(null)
.endRecord();
}
Schema determineInnerSchema(ScanResult scanResult) {
if (scanResult == null || scanResult.getType() == null) {
throw new AdlsGen2Exception("Null scanresults or scanresult types are not supported");
}
switch (scanResult.getType()) {
case NULL:
return GLOBAL_BUILDER.nullType();
case BOOLEAN:
return GLOBAL_BUILDER.booleanType();
case INTEGER:
return GLOBAL_BUILDER.intType();
case LONG:
return GLOBAL_BUILDER.longType();
case FLOAT:
return GLOBAL_BUILDER.floatType();
case DOUBLE:
return GLOBAL_BUILDER.doubleType();
case BYTES:
return GLOBAL_BUILDER.bytesType();
case STRING:
return GLOBAL_BUILDER.stringType();
case AVRO:
return scanResult.getSchema();
default:
throw new AdlsGen2Exception("Unknown Type for scanresult " + scanResult.getType().name());
}
}
public String fingerprint(Schema schema) {
if (schema == null) {
return null;
}
return encodeHexString(messageDigest.digest(schema.toString(false).getBytes(StandardCharsets.UTF_8)));
}
String encodeHexString(byte[] byteArray) {
StringBuilder hexStringBuilder = new StringBuilder();
for (int i = 0; i < byteArray.length; i++) {
hexStringBuilder.append(byteToHex(byteArray[i]));
}
return hexStringBuilder.toString();
}
String byteToHex(byte num) {
char[] hexDigits = new char[2];
hexDigits[0] = Character.forDigit((num >> 4) & 0xF, 16);
hexDigits[1] = Character.forDigit((num & 0xF), 16);
return new String(hexDigits);
}
}