All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.axual.connect.plugins.adls.gen2.avro.ContainerGenerator Maven / Gradle / Ivy

There is a newer version: 1.2.2
Show newest version
package io.axual.connect.plugins.adls.gen2.avro;

/*-
 * ========================LICENSE_START=================================
 * Azure Data Lake Storage Gen2 Sink Connector for Kafka Connect
 * %%
 * Copyright (C) 2021 Axual B.V.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License")
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =========================LICENSE_END==================================
 */

import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.Collections;

import io.axual.connect.plugins.adls.gen2.exceptions.AdlsGen2Exception;
import io.axual.connect.plugins.adls.gen2.extract.PayloadType;
import io.axual.connect.plugins.adls.gen2.extract.ScanResult;

/**
 * A helper class for generating {@link Container} instances. The generator is instantiated with
 * a MessageDigest for generating the schema fingerprints.
 */
public class ContainerGenerator {
    private static final SchemaBuilder.TypeBuilder GLOBAL_BUILDER = SchemaBuilder.builder(ContainerDefinitions.NAMESPACE);

    private static final Schema SCHEMA_KAFKA_HEADER = GLOBAL_BUILDER
            .record(ContainerDefinitions.RECORD_NAME_KAFKA_HEADER)
            .fields()
            .requiredString(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_NAME)
            .name(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_VALUE).type()
            .unionOf().nullType()
            .and().booleanType()
            .and().intType()
            .and().longType()
            .and().floatType()
            .and().doubleType()
            .and().bytesType()
            .and().stringType()
            .endUnion().nullDefault()
            .endRecord();

    private static final Schema SCHEMA_PAYLOAD_TYPE = GLOBAL_BUILDER
            .enumeration(ContainerDefinitions.ENUM_NAME_PAYLOAD_TYPE)
            .symbols(ContainerDefinitions.ENUM_VALUE_NULL,
                    ContainerDefinitions.ENUM_VALUE_BOOLEAN,
                    ContainerDefinitions.ENUM_VALUE_INTEGER,
                    ContainerDefinitions.ENUM_VALUE_LONG,
                    ContainerDefinitions.ENUM_VALUE_FLOAT,
                    ContainerDefinitions.ENUM_VALUE_DOUBLE,
                    ContainerDefinitions.ENUM_VALUE_BYTES,
                    ContainerDefinitions.ENUM_VALUE_STRING,
                    ContainerDefinitions.ENUM_VALUE_AVRO,
                    ContainerDefinitions.ENUM_VALUE_COMPLEX);

    private final MessageDigest messageDigest;

    /**
     * Construct a new generator and use the provided {@link MessageDigest} for fingerprinting the schemas used
     *
     * @param messageDigest the Message Digest to use for generating fingerprints
     */
    public ContainerGenerator(MessageDigest messageDigest) {
        this.messageDigest = messageDigest;
    }

    /**
     * Create a new {@link Container}  based on the provided key and value {@link ScanResult}
     *
     * @param keyScan   the scanResult of the the key data
     * @param valueScan the scanResult of the the value data
     * @return a fully instantiated {@link Container} for processing records with the provided key and value types
     */
    public Container createContainer(ScanResult keyScan, ScanResult valueScan) {
        SchemaBuilder.TypeBuilder schemaBuilder = SchemaBuilder.builder(ContainerDefinitions.NAMESPACE);
        Schema containerSchema = schemaBuilder.record(ContainerDefinitions.RECORD_NAME_CONTAINER)
                .fields()
                .optionalString(ContainerDefinitions.FIELD_NAME_CONTAINER_TOPIC)
                .optionalInt(ContainerDefinitions.FIELD_NAME_CONTAINER_PARTITION)
                .optionalLong(ContainerDefinitions.FIELD_NAME_CONTAINER_OFFSET)
                .optionalLong(ContainerDefinitions.FIELD_NAME_CONTAINER_TIMESTAMP)
                .name(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS).type().array().items(SCHEMA_KAFKA_HEADER).arrayDefault(Collections.emptyList())
                .name(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY).type(createKafkaKeyValueContainer(schemaBuilder, keyScan, true)).noDefault()
                .name(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE).type(createKafkaKeyValueContainer(schemaBuilder, valueScan, false)).noDefault()
                .endRecord();

        return new Container(containerSchema, keyScan.getType(), keyScan.getSchema(), fingerprint(keyScan.getSchema()), valueScan.getType(), valueScan.getSchema(), fingerprint(valueScan.getSchema()));
    }

    Schema createKafkaKeyValueContainer(SchemaBuilder.TypeBuilder schemaBuilder, ScanResult scanResult, boolean isKey) {
        Schema innerType = determineInnerSchema(scanResult);
        Schema payloadSchema = scanResult.getType() == PayloadType.NULL ? schemaBuilder.nullType(): schemaBuilder.unionOf().nullType().and().type(innerType).endUnion();
        return schemaBuilder.record(isKey ? ContainerDefinitions.RECORD_NAME_KAFKA_KV_KEY : ContainerDefinitions.RECORD_NAME_KAFKA_KV_VALUE)
                .fields()
                .name(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).type(SCHEMA_PAYLOAD_TYPE).noDefault()
                .optionalString(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT)
                .name(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD)
                .type(payloadSchema).withDefault(null)
                .endRecord();
    }

    Schema determineInnerSchema(ScanResult scanResult) {
        if (scanResult == null || scanResult.getType() == null) {
            throw new AdlsGen2Exception("Null scanresults or scanresult types are not supported");
        }

        switch (scanResult.getType()) {
            case NULL:
                return GLOBAL_BUILDER.nullType();
            case BOOLEAN:
                return GLOBAL_BUILDER.booleanType();
            case INTEGER:
                return GLOBAL_BUILDER.intType();
            case LONG:
                return GLOBAL_BUILDER.longType();
            case FLOAT:
                return GLOBAL_BUILDER.floatType();
            case DOUBLE:
                return GLOBAL_BUILDER.doubleType();
            case BYTES:
                return GLOBAL_BUILDER.bytesType();
            case STRING:
                return GLOBAL_BUILDER.stringType();
            case AVRO:
                return scanResult.getSchema();
            default:
                throw new AdlsGen2Exception("Unknown Type for scanresult " + scanResult.getType().name());
        }

    }


    public String fingerprint(Schema schema) {
        if (schema == null) {
            return null;
        }
        return encodeHexString(messageDigest.digest(schema.toString(false).getBytes(StandardCharsets.UTF_8)));
    }

    String encodeHexString(byte[] byteArray) {
        StringBuilder hexStringBuilder = new StringBuilder();
        for (int i = 0; i < byteArray.length; i++) {
            hexStringBuilder.append(byteToHex(byteArray[i]));
        }
        return hexStringBuilder.toString();
    }

    String byteToHex(byte num) {
        char[] hexDigits = new char[2];
        hexDigits[0] = Character.forDigit((num >> 4) & 0xF, 16);
        hexDigits[1] = Character.forDigit((num & 0xF), 16);
        return new String(hexDigits);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy