All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.util.ThriftToDynamicProto Maven / Gradle / Ivy

The newest version!
package com.twitter.elephantbird.util;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.protobuf.ByteString;
import com.google.protobuf.DescriptorProtos;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Label;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto.Type;
import com.google.protobuf.Descriptors;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import com.google.protobuf.Descriptors.FieldDescriptor;
import com.google.protobuf.DynamicMessage;
import com.google.protobuf.Message;

import org.apache.thrift.TBase;
import org.apache.thrift.TFieldIdEnum;
import org.apache.thrift.protocol.TType;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.thrift.TStructDescriptor;
import com.twitter.elephantbird.thrift.TStructDescriptor.Field;
import com.twitter.elephantbird.util.Pair;

/**
 * Translates a Thrift object into a Protocol Buffer message.
 * 

* For most data types, there is a 1:1 mapping between Thrift * and Protobufs, except for:

    * *
  • enums: converted to their String representations. *
  • lists: added to the enclosing struct as a repeated field. *
  • sets: protobufs doesn't have native support for them so they're * treated as lists and we rely on thrift to enforce uniqueness. *
  • maps: protobufs also doesn't have native support for these so we * create an intermediate Message type with key and value fields of the * appropriate types and add these as repeated fields of the enclosing * struct.
      * * @param Source thrift class */ public class ThriftToDynamicProto> { private static final Logger LOG = LoggerFactory.getLogger(ThriftToDynamicProto.class); private static final List> EMPTY_FIELDS = new ArrayList>(); private static final String MAP_KEY_FIELD_NAME = "key"; private static final String MAP_VALUE_FIELD_NAME = "value"; private boolean supportNestedObjects = false; private boolean ignoreUnsupportedTypes = false; private final Descriptors.FileDescriptor fileDescriptor; // a map of descriptors keyed to their typeName private Map descriptorBuilderMap = Maps.newHashMap(); // a map of message builders keyed to their typeName private Map messageBuilderMap = Maps.newHashMap(); /** * Create a Dynamic Protocol Buffer message with fields equivalent to those in the provided * Thrift class. * @param thriftClass * @throws DescriptorValidationException */ public ThriftToDynamicProto(Class thriftClass) throws DescriptorValidationException { this(thriftClass, new ArrayList>()); } public ThriftToDynamicProto(Class thriftClass, boolean supportNestedObjects, boolean ignoreUnsupportedTypes) throws DescriptorValidationException { this(thriftClass, new ArrayList>(), supportNestedObjects, ignoreUnsupportedTypes); } /** * Create a converter that produces a Dynamic Protocol Buffer message with fields equivalent to * those in the provided Thrift class, plus additional fields as defined by extraFields. Nested * Thrift Structs will be ignored, but other unsupported types will throw an exception. * @param thriftClass class to use to generate the protobuf schema * @param extraFields a list of pairs of (fieldName, protobufType) that defines extra fields to * add to the generated message. * @throws DescriptorValidationException */ public ThriftToDynamicProto(Class thriftClass, List> extraFields) throws DescriptorValidationException { this(thriftClass, extraFields, false, false); } /** * Create a converter that produces a Dynamic Protocol Buffer message with fields equivalent to * those in the provided Thrift class, plus additional fields as defined by extraFields. * @param thriftClass class to use to generate the protobuf schema * @param extraFields a list of pairs of (fieldName, protobufType) that defines extra fields to * add to the generated message. * @param supportNestedObjects if true, builds a nested structure. Defaults is false, which will * silently ignore nested objects, which include structs, lists/sets of structs and maps of all * types. Lists/sets of base types are not considered to be nested objects. * @param ignoreUnsupportedTypes if true, ignores types that aren't supported. If false an * exception will be thrown when an unsupported type is encountered. Default is false. * @throws DescriptorValidationException */ public ThriftToDynamicProto(Class thriftClass, List> extraFields, boolean supportNestedObjects, boolean ignoreUnsupportedTypes) throws DescriptorValidationException { this.supportNestedObjects = supportNestedObjects; this.ignoreUnsupportedTypes = ignoreUnsupportedTypes; // setup the descriptor proto builder for the top-level class DescriptorProtos.DescriptorProto.Builder desBuilder = DescriptorProtos.DescriptorProto.newBuilder(); desBuilder.setName(protoMessageType(thriftClass)); descriptorBuilderMap.put(desBuilder.getName(), desBuilder); // convert the thrift schema to a proto schema thriftToProtoSchema(desBuilder, TStructDescriptor.getInstance(thriftClass), extraFields); // add all of the message types to the file descriptor proto builder DescriptorProtos.FileDescriptorProto.Builder fileDescProtoBuilder = DescriptorProtos.FileDescriptorProto.newBuilder(); for (DescriptorProtos.DescriptorProto.Builder builder : descriptorBuilderMap.values()) { fileDescProtoBuilder.addMessageType(builder); } // create dynamic message builders to be cloned for all types Descriptors.FileDescriptor dynamicDescriptor = Descriptors.FileDescriptor.buildFrom( fileDescProtoBuilder.build(), new Descriptors.FileDescriptor[0]); for (String type : descriptorBuilderMap.keySet()) { Descriptors.Descriptor msgDescriptor = dynamicDescriptor.findMessageTypeByName(type); messageBuilderMap.put(type, DynamicMessage.newBuilder(msgDescriptor)); } fileDescriptor = dynamicDescriptor; } /** * For the given thriftClass, return a Protobufs builder to build a similar protobuf class. * @param thriftClass The thrift class for which the builder is desired. * @return a protobuf message builder */ public Message.Builder getBuilder(Class> thriftClass) { return messageBuilderMap.get(protoMessageType(thriftClass)).clone(); } /** * Return Protobufs builder for a Map field */ private Message.Builder mapEntryProtoBuilder(TStructDescriptor descriptor, Field field) { return messageBuilderMap.get(mapProtoMessageType(descriptor, field)).clone(); } private void thriftToProtoSchema(DescriptorProtos.DescriptorProto.Builder desBuilder, TStructDescriptor fieldDesc, List> extraFields) throws DescriptorValidationException { int maxThriftId = doSchemaMapping(desBuilder, fieldDesc); // handle extra fields if they exist. Only supported on the top level message int extraFieldIdx = maxThriftId + 1; for (Pair extraField : extraFields) { addProtoField(desBuilder, extraField.getFirst(), ++extraFieldIdx, extraField.getSecond(), false); } } private int doSchemaMapping(DescriptorProtos.DescriptorProto.Builder desBuilder, TStructDescriptor fieldDesc) throws DescriptorValidationException { int maxThriftId = 0; for (Field tField : fieldDesc.getFields()) { maxThriftId = Math.max(tField.getFieldId(), maxThriftId); if (supportNestedObjects && tField.isMap()) { String typeName = mapProtoMessageType(fieldDesc, tField); if (descriptorBuilderMap.get(typeName) == null) { DescriptorProtos.DescriptorProto.Builder mapBuilder = mapDescriptorProtoBuilder(tField, typeName); descriptorBuilderMap.put(typeName, mapBuilder); addProtoField(desBuilder, tField.getName(), tField.getFieldId() + 1, typeName, true); } } else { Field field = resolveField(tField); Type protoType = thriftTypeToProtoType(field); boolean isContainer = isContainer(tField); if (supportNestedObjects && protoType == Type.TYPE_MESSAGE) { String typeName = resolveMessageTypeName(field.gettStructDescriptor()); // Protobuf field ids start at 1. Thrift starts at 0. addProtoField(desBuilder, tField.getName(), tField.getFieldId() + 1, typeName, isContainer); } else if (protoType != null) { if (supportNestedObjects || (!supportNestedObjects && !hasNestedObject(tField))) { addProtoField(desBuilder, tField.getName(), tField.getFieldId() + 1, protoType, isContainer); } } } } return maxThriftId; } // When dealing with a Set/List, we want to retrieve the relevant Field type private Field resolveField(Field inputField) { if (inputField.isList()) { return inputField.getListElemField(); } else if (inputField.isSet()) { return inputField.getSetElemField(); } else { return inputField; } } /** * Determines whether a field is considered to be a nested object based on: * - whether the field itself is a struct * - whether the field is a list/set of structs * - whether field is a Map */ private boolean hasNestedObject(Field field) { return field.isStruct() || (field.isList() && field.getListElemField().isStruct()) || (field.isSet() && field.getSetElemField().isStruct()) || field.isMap(); } /** * Generate a DescriptorProto.Builder for the Message type that will be used * to represent the entries of the input Map field. * * @param field a Map Field (field.isMap() == true) * @param typeName name of new message type */ private DescriptorProtos.DescriptorProto.Builder mapDescriptorProtoBuilder( Field field, String typeName) throws DescriptorValidationException { DescriptorProtos.DescriptorProto.Builder mapBuilder = DescriptorProtos.DescriptorProto.newBuilder().setName(typeName); Field keyField = field.getMapKeyField(); Field valueField = field.getMapValueField(); DescriptorProtos.FieldDescriptorProto.Builder keyBuilder = mapKeyProtoBuilder(); DescriptorProtos.FieldDescriptorProto.Builder valueBuilder = mapValueProtoBuilder(); setBuilderTypeFromField(keyField, keyBuilder); setBuilderTypeFromField(valueField, valueBuilder); mapBuilder.addField(keyBuilder.build()); mapBuilder.addField(valueBuilder.build()); return mapBuilder; } // field descriptor for key field to be used in Message used to hold Map entries private DescriptorProtos.FieldDescriptorProto.Builder mapKeyProtoBuilder() { return fieldDescriptorProtoBuilder(MAP_KEY_FIELD_NAME, 1).setLabel(Label.LABEL_REQUIRED); } // field descriptor for value field to be used in Message used to hold Map entries private DescriptorProtos.FieldDescriptorProto.Builder mapValueProtoBuilder() { return fieldDescriptorProtoBuilder(MAP_VALUE_FIELD_NAME, 2).setLabel(Label.LABEL_REQUIRED); } private void setBuilderTypeFromField( Field field, DescriptorProtos.FieldDescriptorProto.Builder builder ) throws DescriptorValidationException { Type valueProtoType = thriftTypeToProtoType(field); if (valueProtoType == Type.TYPE_MESSAGE) { builder.setTypeName(resolveMessageTypeName(field.gettStructDescriptor())); } else if (valueProtoType != null) { builder.setType(valueProtoType); } } /** * For a TStructDescriptor, resolves the typeName and optionally converts and memoizes it's * schema. */ private String resolveMessageTypeName(TStructDescriptor descriptor) throws DescriptorValidationException { String typeName = protoMessageType(descriptor.getThriftClass()); // Anytime we have a new message typeName, we make sure that we have a builder for it. // If not, we create one. DescriptorProtos.DescriptorProto.Builder builder = descriptorBuilderMap.get(typeName); if (builder == null) { builder = DescriptorProtos.DescriptorProto.newBuilder(); builder.setName(typeName); descriptorBuilderMap.put(typeName, builder); doSchemaMapping(builder, descriptor); } return typeName; } private void addProtoField(DescriptorProtos.DescriptorProto.Builder builder, String name, int fieldIdx, Type type, boolean isRepeated) { DescriptorProtos.FieldDescriptorProto.Builder fdBuilder = fieldDescriptorProtoBuilder(name, fieldIdx).setType(type); if (isRepeated) { fdBuilder.setLabel(Label.LABEL_REPEATED); } builder.addField(fdBuilder.build()); } private void addProtoField(DescriptorProtos.DescriptorProto.Builder builder, String name, int fieldIdx, String type, boolean isRepeated) { DescriptorProtos.FieldDescriptorProto.Builder fdBuilder = fieldDescriptorProtoBuilder(name, fieldIdx).setTypeName(type); if (isRepeated) { fdBuilder.setLabel(Label.LABEL_REPEATED); } builder.addField(fdBuilder.build()); } private DescriptorProtos.FieldDescriptorProto.Builder fieldDescriptorProtoBuilder( String name, int fieldIdx) { DescriptorProtos.FieldDescriptorProto.Builder fdBuilder = DescriptorProtos.FieldDescriptorProto.newBuilder() .setName(name) .setNumber(fieldIdx); return fdBuilder; } /** * Convert a thrift object to a protobuf message. * @param thriftObj thrift object * @return protobuf protobuf message */ @SuppressWarnings("unchecked") public Message convert(T thriftObj) { return doConvert((TBase) Preconditions.checkNotNull(thriftObj, "Can not convert a null object")); } /** * conver TBase object to Message object * @param thriftObj */ @SuppressWarnings("unchecked") public Message doConvert(TBase thriftObj) { if (thriftObj == null) { return null; } Class> clazz = (Class>) thriftObj.getClass(); checkState(clazz); Message.Builder builder = getBuilder(clazz); TStructDescriptor fieldDesc = TStructDescriptor.getInstance(clazz); int fieldId = 0; for (Field tField : fieldDesc.getFields()) { // don't want to carry over default values from unset fields if (!thriftObj.isSet((F) tField.getFieldIdEnum()) || (!supportNestedObjects && hasNestedObject(tField))) { fieldId++; continue; } // recurse into the object if it's a struct, otherwise just add the field if (supportNestedObjects && tField.getType() == TType.STRUCT) { TBase fieldValue = (TBase) fieldDesc.getFieldValue(fieldId++, thriftObj); Message message = doConvert(fieldValue); if (message != null) { FieldDescriptor protoFieldDesc = builder.getDescriptorForType().findFieldByName( tField.getName()); builder.setField(protoFieldDesc, message); } } else { fieldId = convertField(thriftObj, builder, fieldDesc, fieldId, tField); } } return builder.build(); } private void checkState(Class> thriftClass) { Preconditions.checkState(hasBuilder(thriftClass), "No message builder found for thrift class: " + thriftClass.getCanonicalName()); } private boolean hasBuilder(Class> thriftClass) { return messageBuilderMap.get(protoMessageType(thriftClass)) != null; } private Object sanitizeRawValue(Object value, Field tField) { Object returnValue = value; if (tField.isEnum()) { // TODO: proper enum handling returnValue = returnValue.toString(); } else if (tField.isBuffer()) { returnValue = ByteString.copyFrom((byte[]) returnValue); } if (returnValue instanceof Byte) { returnValue = new Integer((Byte) returnValue); } else if (returnValue instanceof Short) { returnValue = new Integer((Short) returnValue); } return returnValue; } /* * Determines if the field in question is a Set or List of a Struct type */ private boolean isStructContainer(Field tField) { return (tField.isList() && tField.getListElemField().isStruct()) || (tField.isSet() && tField.getSetElemField().isStruct()); } @SuppressWarnings("unchecked") private int convertField(TBase thriftObj, Message.Builder builder, TStructDescriptor fieldDesc, int fieldId, Field tField) { int tmpFieldId = fieldId; FieldDescriptor protoFieldDesc = builder.getDescriptorForType().findFieldByName( tField.getName()); if (protoFieldDesc == null) { // not finding a field might be ok if we're ignoring an unsupported types Type protoType = thriftTypeToProtoType(tField); if (protoType == null && (ignoreUnsupportedTypes || (!supportNestedObjects && hasNestedObject(tField)))) { return tmpFieldId; // no-op } throw new RuntimeException("Field " + tField.getName() + " not found in dynamic protobuf."); } Object fieldValue = fieldDesc.getFieldValue(tmpFieldId++, thriftObj); if (fieldValue == null) { return tmpFieldId; } try { // For non-Map containers that contain struct types, // we have to convert each struct into a Message. if (isStructContainer(tField)) { List convertedStructs = Lists.newLinkedList(); Iterable> structIterator = (Iterable>) fieldValue; for (TBase struct : structIterator) { convertedStructs.add(doConvert(struct)); } fieldValue = convertedStructs; } else if (tField.isMap()) { List convertedMapEntries = Lists.newLinkedList(); Map rawMap = (Map) fieldValue; for (Map.Entry entry : rawMap.entrySet()) { Message.Builder mapBuilder = mapEntryProtoBuilder(fieldDesc, tField); Message msg = buildMapEntryMessage(mapBuilder, tField, entry.getKey(), entry.getValue()); convertedMapEntries.add(msg); } fieldValue = convertedMapEntries; } else { // protobufs throws an exception if you try to set byte on an int32 field so we need to // convert it to an Integer before it gets set fieldValue = sanitizeRawValue(fieldValue, tField); } // Container types have to be added as repeated fields if (isContainer(tField)) { Iterable container = (Iterable) fieldValue; for (Object obj : container) { builder.addRepeatedField(protoFieldDesc, obj); } } else { builder.setField(protoFieldDesc, fieldValue); } } catch (IllegalArgumentException e) { LOG.error(String.format("Could not set protoField(index=%d, name=%s, type=%s) with " + "thriftField(index=%d, name=%s, type=%d, value=%s)", protoFieldDesc.getIndex(), protoFieldDesc.getName(), protoFieldDesc.getType(), tmpFieldId - 1, tField.getName(), tField.getType(), fieldValue), e); throw e; } return tmpFieldId; } /** * Builds a Message that contains the key value pair of a Map entry */ private Message buildMapEntryMessage(Message.Builder mapBuilder, Field field, Object mapKey, Object mapValue) { FieldDescriptor keyFieldDescriptor = mapBuilder.getDescriptorForType().findFieldByName(MAP_KEY_FIELD_NAME); FieldDescriptor valueFieldDescriptor = mapBuilder.getDescriptorForType().findFieldByName(MAP_VALUE_FIELD_NAME); boolean isKeyStruct = field.getMapKeyField().isStruct(); boolean isValueStruct = field.getMapValueField().isStruct(); Object convertedKey; if (isKeyStruct) { convertedKey = doConvert((TBase) mapKey); } else { convertedKey = sanitizeRawValue(mapKey, field.getMapKeyField()); } Object convertedValue; if (isValueStruct) { convertedValue = doConvert((TBase) mapValue); } else { convertedValue = sanitizeRawValue(mapValue, field.getMapValueField()); } mapBuilder.setField(keyFieldDescriptor, convertedKey); mapBuilder.setField(valueFieldDescriptor, convertedValue); return mapBuilder.build(); } // Checks a field to determine whether it's a List/Set/Map private boolean isContainer(Field field) { return field.isSet() || field.isList() || field.isMap(); } private Type thriftTypeToProtoType(Field tField) { byte thriftType = tField.getType(); switch (thriftType) { case TType.BOOL: return Type.TYPE_BOOL; case TType.BYTE: return Type.TYPE_INT32; case TType.DOUBLE: return Type.TYPE_DOUBLE; case TType.I16: return Type.TYPE_INT32; case TType.I32: return Type.TYPE_INT32; case TType.I64: return Type.TYPE_INT64; case TType.STRING: // Thrift thinks bytes and strings are interchangeable. Protocol buffers are not insane. return tField.isBuffer() ? Type.TYPE_BYTES : Type.TYPE_STRING; case TType.ENUM: // TODO: proper enum handling. For now, convert to strings. return Type.TYPE_STRING; case TType.STRUCT: if (supportNestedObjects) { return Type.TYPE_MESSAGE; } return null; case TType.MAP: return null; case TType.SET: return null; case TType.LIST: return null; default: if (ignoreUnsupportedTypes) { LOG.warn("Thrift type " + thriftType + " not supported for field " + tField.getName() + ". Ignoring"); return null; } throw new IllegalArgumentException("Can't map Thrift type " + thriftType + " to a Protobuf type for field: " + tField.getName()); } } // name the proto message type after the thrift class name. Dots are not permitted in protobuf // names private String protoMessageType(Class> thriftClass) { return thriftClass.getCanonicalName().replace(".", "_"); } /** * name the proto message used for Map types after the thrift class name of the enclosing * struct and the field name */ private String mapProtoMessageType(TStructDescriptor descriptor, Field field) { return String.format("%s_%s", protoMessageType(descriptor.getThriftClass()), field.getName()); } // Given the class name, finds the corresponding Descriptor and return the appropriate // FieldDescriptor public FieldDescriptor getFieldDescriptor(Class> thriftClass, String fieldName) { checkState(thriftClass); Descriptors.Descriptor descriptor = getBuilder(thriftClass).getDescriptorForType(); return descriptor.findFieldByName(fieldName); } // Picks off the FileDescriptor for this instance public Descriptors.FileDescriptor getFileDescriptor() { return fileDescriptor; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy