
com.blueapron.connect.protobuf.ProtobufData Maven / Gradle / Ivy
package com.blueapron.connect.protobuf;
import com.google.protobuf.ByteString;
import com.google.protobuf.Descriptors;
import com.google.protobuf.Descriptors.FieldDescriptor;
import com.google.protobuf.Descriptors.OneofDescriptor;
import com.google.protobuf.GeneratedMessageV3.Builder;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.MapEntry;
import com.google.protobuf.Message;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.google.protobuf.util.Timestamps;
class ProtobufData {
public static final String CONNECT_DECIMAL_PRECISION_PROP = "connect.decimal.precision";
private final Method newBuilder;
private final Schema schema;
private final String legacyName;
private final boolean useConnectSchemaMap;
private final HashMap connectProtoNameMap = new HashMap<>();
private Builder> getBuilder() {
try {
return (Builder>) newBuilder.invoke(Object.class);
} catch (Exception e) {
throw new ConnectException("Not a valid proto3 builder", e);
}
}
private Message getMessage(byte[] value) {
try {
return getBuilder().mergeFrom(value).build();
} catch (InvalidProtocolBufferException e) {
throw new DataException("Invalid protobuf data", e);
}
}
private String getProtoMapKey(String descriptorContainingTypeName, String connectFieldName) {
return descriptorContainingTypeName.concat(connectFieldName);
}
private String getConnectFieldName(Descriptors.FieldDescriptor descriptor) {
String name = descriptor.getName();
for (Map.Entry option: descriptor.getOptions().getAllFields().entrySet()) {
if (option.getKey().getFullName().equalsIgnoreCase(this.legacyName)) {
name = option.getValue().toString();
}
}
connectProtoNameMap.put(getProtoMapKey(descriptor.getContainingType().getFullName(), name), descriptor.getName());
return name;
}
private String getProtoFieldName(String descriptorForTypeName, String connectFieldName) {
return connectProtoNameMap.get(getProtoMapKey(descriptorForTypeName, connectFieldName));
}
ProtobufData(Class extends com.google.protobuf.GeneratedMessageV3> clazz, String legacyName) {
this(clazz, legacyName, false);
}
ProtobufData(Class extends com.google.protobuf.GeneratedMessageV3> clazz, String legacyName, boolean useConnectSchemaMap ) {
this.legacyName = legacyName;
this.useConnectSchemaMap = useConnectSchemaMap;
try {
this.newBuilder = clazz.getDeclaredMethod("newBuilder");
} catch (NoSuchMethodException e) {
throw new ConnectException("Proto class " + clazz.getCanonicalName() + " is not a valid proto3 message class", e);
}
this.schema = toConnectSchema(getBuilder().getDefaultInstanceForType());
}
SchemaAndValue toConnectData(byte[] value) {
Message message = getMessage(value);
if (message == null) {
return SchemaAndValue.NULL;
}
return new SchemaAndValue(this.schema, toConnectData(this.schema, message));
}
private Schema toConnectSchema(Message message) {
final SchemaBuilder builder = SchemaBuilder.struct().name(message.getDescriptorForType().getName());
final List fieldDescriptorList = message.getDescriptorForType().getFields();
for (Descriptors.FieldDescriptor descriptor : fieldDescriptorList) {
builder.field(getConnectFieldName(descriptor), toConnectSchema(descriptor));
}
return builder.build();
}
private boolean isTimestampDescriptor(Descriptors.FieldDescriptor descriptor) {
return descriptor.getMessageType().getFullName().equals("google.protobuf.Timestamp");
}
private boolean isDateDescriptor(Descriptors.FieldDescriptor descriptor) {
return descriptor.getMessageType().getFullName().equals("google.type.Date");
}
private Schema toConnectSchema(Descriptors.FieldDescriptor descriptor) {
final SchemaBuilder builder;
switch (descriptor.getType()) {
case INT32:
case SINT32:
{
builder = SchemaBuilder.int32();
break;
}
case INT64:
case SINT64:
case UINT32:
{
builder = SchemaBuilder.int64();
break;
}
case UINT64:
builder = Decimal.builder(0).parameter(CONNECT_DECIMAL_PRECISION_PROP, "20");
break;
case FLOAT: {
builder = SchemaBuilder.float32();
break;
}
case DOUBLE: {
builder = SchemaBuilder.float64();
break;
}
case BOOL: {
builder = SchemaBuilder.bool();
break;
}
// TODO - Do we need to support byte or short?
/*case INT8:
// Encoded as an Integer
converted = value == null ? null : ((Integer) value).byteValue();
break;
case INT16:
// Encoded as an Integer
converted = value == null ? null : ((Integer) value).shortValue();
break;*/
case STRING:
case ENUM:
builder = SchemaBuilder.string();
break;
case BYTES:
builder = SchemaBuilder.bytes();
break;
case MESSAGE: {
if (isTimestampDescriptor(descriptor)) {
builder = Timestamp.builder();
break;
}
if (isDateDescriptor(descriptor)) {
builder = Date.builder();
break;
}
if (shouldConvertToConnectSchemaMap(descriptor)) {
FieldDescriptor keyFieldDescriptor = descriptor.getMessageType().findFieldByName("key");
FieldDescriptor valueFieldDescriptor = descriptor.getMessageType().findFieldByName("value");
builder = SchemaBuilder.map(toConnectSchema(keyFieldDescriptor), toConnectSchema(valueFieldDescriptor));
break;
}
String jsonName = descriptor.getJsonName();
builder = SchemaBuilder.struct().name(jsonName.substring(0, 1).toUpperCase() + jsonName.substring(1));
for (Descriptors.FieldDescriptor fieldDescriptor : descriptor.getMessageType().getFields()) {
builder.field(getConnectFieldName(fieldDescriptor), toConnectSchema(fieldDescriptor));
}
break;
}
default:
throw new DataException("Unknown Connect schema type: " + descriptor.getType());
}
builder.optional();
Schema schema = builder.build();
if (descriptor.isRepeated() && !shouldConvertToConnectSchemaMap(descriptor)) {
final SchemaBuilder arrayBuilder = SchemaBuilder.array(schema);
arrayBuilder.optional();
schema = arrayBuilder.build();
}
return schema;
}
private boolean shouldConvertToConnectSchemaMap(FieldDescriptor descriptor) {
return useConnectSchemaMap && descriptor.isMapField();
}
private boolean isProtobufTimestamp(Schema schema) {
return Timestamp.SCHEMA.name().equals(schema.name());
}
private boolean isProtobufDate(Schema schema) {
return Date.SCHEMA.name().equals(schema.name());
}
private void setStructField(Schema schema, Message message, Struct result, Descriptors.FieldDescriptor fieldDescriptor) {
final String fieldName = getConnectFieldName(fieldDescriptor);
final Field field = schema.field(fieldName);
Object obj = null;
if (fieldDescriptor.getType() != FieldDescriptor.Type.MESSAGE || fieldDescriptor.isRepeated() || fieldDescriptor.isMapField() || message.hasField(fieldDescriptor)) {
obj = toConnectData(field.schema(), message.getField(fieldDescriptor));
}
result.put(fieldName, obj);
}
Object toConnectData(Schema schema, Object value) {
try {
if (isProtobufTimestamp(schema)) {
com.google.protobuf.Timestamp timestamp = (com.google.protobuf.Timestamp) value;
return Timestamp.toLogical(schema, Timestamps.toMillis(timestamp));
}
if (isProtobufDate(schema)) {
com.google.type.Date date = (com.google.type.Date) value;
return ProtobufUtils.convertFromGoogleDate(date);
}
Object converted;
switch (schema.type()) {
// Pass through types
case INT32: {
Integer intValue = (Integer) value; // Validate type
converted = value;
break;
}
case INT64: {
try {
Long longValue = (Long) value; // Validate type
converted = value;
} catch (ClassCastException e) {
Integer intValue = (Integer) value; // Validate type
converted = Integer.toUnsignedLong(intValue);
}
break;
}
case FLOAT32: {
Float floatValue = (Float) value; // Validate type
converted = value;
break;
}
case FLOAT64: {
Double doubleValue = (Double) value; // Validate type
converted = value;
break;
}
case BOOLEAN: {
Boolean boolValue = (Boolean) value; // Validate type
converted = value;
break;
}
// TODO - Do we need to support byte or short?
/*case INT8:
// Encoded as an Integer
converted = value == null ? null : ((Integer) value).byteValue();
break;
case INT16:
// Encoded as an Integer
converted = value == null ? null : ((Integer) value).shortValue();
break;*/
case STRING:
if (value instanceof String) {
converted = value;
} else if (value instanceof CharSequence
|| value instanceof Enum
|| value instanceof Descriptors.EnumValueDescriptor) {
converted = value.toString();
} else {
throw new DataException("Invalid class for string type, expecting String or "
+ "CharSequence but found " + value.getClass());
}
break;
case BYTES:
if (value instanceof byte[]) {
converted = ByteBuffer.wrap((byte[]) value);
} else if (value instanceof ByteString) {
final byte[] valueBytes = ((ByteString) value).toByteArray();
converted = ByteBuffer.wrap(valueBytes);
} else if (value instanceof ByteBuffer) {
converted = value;
} else if (value instanceof Long){
BigInteger unsigned = toUnsigned(BigInteger.valueOf((Long) value));
converted = new BigDecimal(unsigned, 0);
} else {
throw new DataException("Invalid class for bytes type, expecting byte[], ByteString, "
+ "or ByteBuffer but found " + value.getClass());
}
break;
// Used for repeated types
case ARRAY: {
final Schema valueSchema = schema.valueSchema();
final Collection
© 2015 - 2025 Weber Informatics LLC | Privacy Policy