io.axual.connect.plugins.adls.gen2.avro.Container Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of adls-gen2-sink Show documentation
Show all versions of adls-gen2-sink Show documentation
Collect the records from topics in an Azure Data Lake Storage Gen2
package io.axual.connect.plugins.adls.gen2.avro;
/*-
* ========================LICENSE_START=================================
* Azure Data Lake Storage Gen2 Sink Connector for Kafka Connect
* %%
* Copyright (C) 2021 Axual B.V.
* %%
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =========================LICENSE_END==================================
*/
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericContainer;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.kafka.connect.sink.SinkRecord;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import io.axual.connect.plugins.adls.gen2.exceptions.AdlsGen2ConversionException;
import io.axual.connect.plugins.adls.gen2.extract.PayloadType;
import io.axual.connect.plugins.adls.gen2.extract.ScanResult;
/**
* The Container class is a representation of the data used in an Avro Object Container file.
* It is used to convert a Connect SinkRecord to an entry for the container file
*
* It als contains the Avro Schema for the file, as well as the key and value types, to help
* determine if the a new Container should be created for a record.
*/
public class Container {
private final Schema containerSchema;
private final PayloadType keyType;
private final Schema keySchema;
private final String keyFingerprint;
private final PayloadType valueType;
private final Schema valueSchema;
private final String valueFingerprint;
/**
* Construct the new container
* @param containerSchema The schema used for the Avro Object Container file
* @param keyType The type of key set for this container
* @param keySchema The Avro schema definition used as key if the keyType is Avro
* @param keyFingerprint The fingerprint, or hash, of a Avro schema definition used as key
* @param valueType The type of value set for this container
* @param valueSchema The Avro schema definition used as value if the valueType is Avro
* @param valueFingerprint The fingerprint, or hash, of a Avro schema definition used as value
*/
public Container(Schema containerSchema, PayloadType keyType, Schema keySchema, String keyFingerprint, PayloadType valueType, Schema valueSchema, String valueFingerprint) {
this.containerSchema = containerSchema;
this.keyType = keyType;
this.keySchema = keySchema;
this.keyFingerprint = keyFingerprint;
this.valueType = valueType;
this.valueSchema = valueSchema;
this.valueFingerprint = valueFingerprint;
}
/**
* Check if the ScanResult for the key matches the container settings
* @param keyScan The ScanResult object for a new record
* @return true if the ScanResult matches the container settings
*/
public boolean keyMatch(ScanResult keyScan) {
return Objects.equals(keyScan.getType(), keyType)
&& Objects.equals(keyScan.getSchema(), keySchema);
}
/**
* Check if the ScanResult for the value matches the container settings
* @param valueScan The ScanResult object for a new record
* @return true if the ScanResult matches the container settings
*/
public boolean valueMatch(ScanResult valueScan) {
return Objects.equals(valueScan.getType(), valueType)
&& Objects.equals(valueScan.getSchema(), valueSchema);
}
/**
* Converts the SinkRecord into an Avro GenericRecord to be used in an Avro Object Container file
* @param sinkRecord the SinkRecord to convert
* @return The GenericRecord to be used as a new entry in an Avro Object Container File
*/
public GenericRecord createRecord(SinkRecord sinkRecord) {
return new GenericRecordBuilder(containerSchema)
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_TOPIC, sinkRecord.topic())
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_PARTITION, sinkRecord.kafkaPartition())
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_OFFSET, sinkRecord.kafkaOffset())
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_TIMESTAMP, sinkRecord.timestamp())
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS, createKafkaHeaders(sinkRecord))
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY, createKeyPayload(containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_KEY).schema(), sinkRecord))
.set(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE, createValuePayload(containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_VALUE).schema(), sinkRecord))
.build();
}
GenericData.Array createKafkaHeaders(SinkRecord sinkRecord) {
Schema arraySchema = containerSchema.getField(ContainerDefinitions.FIELD_NAME_CONTAINER_HEADERS).schema();
Schema headerSchema = arraySchema.getElementType();
return new GenericData.Array<>(arraySchema,
StreamSupport.stream(sinkRecord.headers().spliterator(), false)
.map(header -> new GenericRecordBuilder(headerSchema)
.set(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_NAME, header.key())
.set(ContainerDefinitions.FIELD_NAME_KAFKA_HEADER_VALUE, convertToAvro(header.value()))
.build())
.collect(Collectors.toList())
);
}
GenericRecord createKeyPayload(Schema payloadSchema, SinkRecord sinkRecord) {
return new GenericRecordBuilder(payloadSchema)
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE, new GenericData.EnumSymbol(payloadSchema.getField(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).schema(), keyType.getAvroSymbol()))
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT, keyFingerprint)
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD, convertToAvro(sinkRecord.key()))
.build();
}
GenericRecord createValuePayload(Schema payloadSchema, SinkRecord sinkRecord) {
return new GenericRecordBuilder(payloadSchema)
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE, new GenericData.EnumSymbol(payloadSchema.getField(ContainerDefinitions.FIELD_NAME_KAFKA_KV_TYPE).schema(), valueType.getAvroSymbol()))
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_FINGERPRINT, valueFingerprint)
.set(ContainerDefinitions.FIELD_NAME_KAFKA_KV_PAYLOAD, convertToAvro(sinkRecord.value()))
.build();
}
Object convertToAvro(Object toConvert) {
if (toConvert == null) {
return null;
}
if (toConvert instanceof GenericContainer) {
return toConvert;
}
if( toConvert instanceof Boolean){
return toConvert;
}
if (toConvert instanceof CharSequence) {
return toConvert;
}
if (toConvert instanceof Long) {
return toConvert;
}
if (toConvert instanceof Integer) {
return toConvert;
}
if (toConvert instanceof Double) {
return toConvert;
}
if (toConvert instanceof Float) {
return toConvert;
}
if (toConvert instanceof ByteBuffer) {
return toConvert;
}
if (toConvert instanceof byte[]) {
return ByteBuffer.wrap((byte[]) toConvert);
} else
throw new AdlsGen2ConversionException(String.format("Cannot convert type %s to Avro compatible type", toConvert.getClass().getName()));
}
public Schema getContainerSchema() {
return containerSchema;
}
}