All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.axual.connect.plugins.adls.gen2.avro.Container Maven / Gradle / Ivy

There is a newer version: 1.2.2
Show newest version
package io.axual.connect.plugins.adls.gen2.avro;

/*-
 * ========================LICENSE_START=================================
 * Azure Data Lake Storage Gen2 Sink Connector for Kafka Connect
 * %%
 * Copyright (C) 2021 Axual B.V.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License")
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =========================LICENSE_END==================================
 */

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericContainer;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.kafka.connect.sink.SinkRecord;

import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import io.axual.connect.plugins.adls.gen2.exceptions.AdlsGen2ConversionException;
import io.axual.connect.plugins.adls.gen2.extract.PayloadType;
import io.axual.connect.plugins.adls.gen2.extract.ScanResult;

/**
 * The Container class is a representation of the data used in an Avro Object Container file.
 * It is used to convert a Connect SinkRecord to an entry for the container file
 *
 * It als contains the Avro Schema for the file, as well as the key and value types, to help 
 * determine if the a new Container should be created for a record.
 */
public class Container {
    private final Schema containerSchema;
    private final PayloadType keyType;
    private final Schema keySchema;
    private final String keyFingerprint;
    private final PayloadType valueType;
    private final Schema valueSchema;
    private final String valueFingerprint;

    /**
     * Construct the new container
     * @param containerSchema The schema used for the Avro Object Container file
     * @param keyType The type of key set for this container
     * @param keySchema The Avro schema definition used as key if the keyType is Avro
     * @param keyFingerprint The fingerprint, or hash, of a Avro schema definition used as key
     * @param valueType The type of value set for this container
     * @param valueSchema The Avro schema definition used as value if the valueType is Avro
     * @param valueFingerprint The fingerprint, or hash, of a Avro schema definition used as value
     */
    public Container(Schema containerSchema, PayloadType keyType, Schema keySchema, String keyFingerprint, PayloadType valueType, Schema valueSchema, String valueFingerprint) {
        this.containerSchema = containerSchema;
        this.keyType = keyType;
        this.keySchema = keySchema;
        this.keyFingerprint = keyFingerprint;
        this.valueType = valueType;
        this.valueSchema = valueSchema;
        this.valueFingerprint = valueFingerprint;
    }

    /**
     * Check if the ScanResult for the key matches the container settings
     * @param keyScan The ScanResult object for a new record
     * @return true if the ScanResult matches the container settings
     */
    public boolean keyMatch(ScanResult keyScan) {
        return Objects.equals(keyScan.getType(), keyType)
                && Objects.equals(keyScan.getSchema(), keySchema);
    }

    /**
     * Check if the ScanResult for the value matches the container settings
     * @param valueScan The ScanResult object for a new record
     * @return true if the ScanResult matches the container settings
     */
    public boolean valueMatch(ScanResult valueScan) {
        return Objects.equals(valueScan.getType(), valueType)
                && Objects.equals(valueScan.getSchema(), valueSchema);
    }

    /**
     * Converts the SinkRecord into an Avro GenericRecord to be used in an Avro Object Container file 
     * @param sinkRecord the SinkRecord to convert
     * @return The GenericRecord to be used as a new entry in an Avro Object Container File
     */
    public GenericRecord createRecord(SinkRecord sinkRecord) {
        return new GenericRecordBuilder(containerSchema)
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_TOPIC, sinkRecord.topic())
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_PARTITION, sinkRecord.kafkaPartition())
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_OFFSET, sinkRecord.kafkaOffset())
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_TIMESTAMP, sinkRecord.timestamp())
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS, createKafkaHeaders(sinkRecord))
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY, createKeyPayload(containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY).schema(), sinkRecord))
                .set(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE, createValuePayload(containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE).schema(), sinkRecord))
                .build();
    }

    GenericData.Array createKafkaHeaders(SinkRecord sinkRecord) {
        Schema arraySchema = containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS).schema();
        Schema headerSchema = arraySchema.getElementType();

        return new GenericData.Array<>(arraySchema,
                StreamSupport.stream(sinkRecord.headers().spliterator(), false)
                        .map(header -> new GenericRecordBuilder(headerSchema)
                                .set(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_NAME, header.key())
                                .set(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_VALUE, convertToAvro(header.value()))
                                .build())
                        .collect(Collectors.toList())
        );
    }

    GenericRecord createKeyPayload(Schema payloadSchema, SinkRecord sinkRecord) {
        return new GenericRecordBuilder(payloadSchema)
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE, new GenericData.EnumSymbol(payloadSchema.getField(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).schema(), keyType.getAvroSymbol()))
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT, keyFingerprint)
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD, convertToAvro(sinkRecord.key()))
                .build();
    }

    GenericRecord createValuePayload(Schema payloadSchema, SinkRecord sinkRecord) {
        return new GenericRecordBuilder(payloadSchema)
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE, new GenericData.EnumSymbol(payloadSchema.getField(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).schema(), valueType.getAvroSymbol()))
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT, valueFingerprint)
                .set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD, convertToAvro(sinkRecord.value()))
                .build();
    }

    Object convertToAvro(Object toConvert) {
        if (toConvert == null) {
            return null;
        }
        if (toConvert instanceof GenericContainer) {
            return toConvert;
        }
        if( toConvert instanceof Boolean){
            return toConvert;
        }
        if (toConvert instanceof CharSequence) {
            return toConvert;
        }
        if (toConvert instanceof Long) {
            return toConvert;
        }
        if (toConvert instanceof Integer) {
            return toConvert;
        }
        if (toConvert instanceof Double) {
            return toConvert;
        }
        if (toConvert instanceof Float) {
            return toConvert;
        }
        if (toConvert instanceof ByteBuffer) {
            return toConvert;
        }
        if (toConvert instanceof byte[]) {
            return ByteBuffer.wrap((byte[]) toConvert);
        } else
            throw new AdlsGen2ConversionException(String.format("Cannot convert type %s to Avro compatible type", toConvert.getClass().getName()));
    }

    public Schema getContainerSchema() {
        return containerSchema;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy