Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
*
* Input Tuples are expected to contain field values in order {@code (f1, f2, f3)}. Tuples may
* contain fewer values than Protobuf message fields (e.g. only {@code (f1, f2)} in the prior
* example); Any remaining fields will be left unset.
*
* @author Vikram Oberoi
*/
public class PigToProtobuf {
private static final Logger LOG = LoggerFactory.getLogger(PigToProtobuf.class);
public PigToProtobuf() {}
@SuppressWarnings("unchecked")
public static M tupleToMessage(Class protoClass, Tuple tuple) {
Builder builder = Protobufs.getMessageBuilder(protoClass);
return (M) tupleToMessage(builder, tuple);
}
/**
* Turn a Tuple into a Message with the given type.
* @param builder a builder for the Message type the tuple will be converted to
* @param tuple the tuple
* @return a message representing the given tuple
*/
public static Message tupleToMessage(Builder builder, Tuple tuple) {
return tupleToMessage(builder, builder.getDescriptorForType().getFields(), tuple);
}
/**
* @param builder
* @param fieldDescriptors should be same as builder.getDescriptorForType.getFields().
* Avoids overhead of getFields() which creates an array each time.
* @param tuple
* @return
*/
public static Message tupleToMessage(Builder builder, List fieldDescriptors, Tuple tuple) {
if (tuple == null) {
return builder.build();
}
for (int i = 0; i < fieldDescriptors.size() && i < tuple.size(); i++) {
Object tupleField = null;
FieldDescriptor fieldDescriptor = fieldDescriptors.get(i);
try {
tupleField = tuple.get(i);
} catch (ExecException e) {
LOG.warn("Could not convert tuple field " + tupleField + " to field with descriptor " + fieldDescriptor);
continue;
}
if (tupleField != null) {
try {
if (fieldDescriptor.isRepeated()) {
// Repeated fields are set with Lists containing objects of the fields' Java type.
builder.setField(fieldDescriptor,
dataBagToRepeatedField(builder, fieldDescriptor, (DataBag) tupleField));
} else {
if (fieldDescriptor.getType() == FieldDescriptor.Type.MESSAGE) {
Builder nestedMessageBuilder = builder.newBuilderForField(fieldDescriptor);
builder.setField(fieldDescriptor,
tupleToMessage(nestedMessageBuilder, (Tuple) tupleField));
} else {
builder.setField(fieldDescriptor,
tupleFieldToSingleField(fieldDescriptor, tupleField));
}
}
} catch (Exception e) {
String value = String.valueOf(tupleField);
final int max_length = 100;
if (max_length < value.length()) {
value = value.substring(0, max_length - 3) + "...";
}
String type = tupleField == null ? "unknown" : tupleField.getClass().getName();
throw new RuntimeException(String.format(
"Failed to set field '%s' using tuple value '%s' of type '%s' at index %d",
fieldDescriptor.getName(), value, type, i), e);
}
}
}
return builder.build();
}
/**
* For a given ResourceSchema, generate a protobufs Descriptor with analagous field names
* and types.
*
* @param schema Pig schema.
* @return Protobufs Descriptor
* @throws Descriptors.DescriptorValidationException
*/
public static Descriptor schemaToProtoDescriptor(ResourceSchema schema)
throws DescriptorValidationException {
return schemaToProtoDescriptor(schema, null);
}
/**
* For a given ResourceSchema, generate a protobufs Descriptor with analogous field names
* and types.
*
* @param schema Pig schema.
* @param extraFields optionally pass a List of extra fields (Pairs of name:type) to be included.
* @return Protobufs Descriptor
* @throws Descriptors.DescriptorValidationException
*/
public static Descriptor schemaToProtoDescriptor(ResourceSchema schema, List> extraFields)
throws DescriptorValidationException {
// init protobufs
DescriptorProto.Builder desBuilder = DescriptorProto.newBuilder();
int count = 0;
for (ResourceFieldSchema fieldSchema : schema.getFields()) {
// Pig types
int position = ++count;
String fieldName = fieldSchema.getName();
byte dataTypeId = fieldSchema.getType();
// determine and add protobuf types
Type protoType = pigTypeToProtoType(dataTypeId);
LOG.info("Mapping Pig field " + fieldName + " of type " + dataTypeId + " to protobuf type: " + protoType);
addField(desBuilder, fieldName, position, protoType);
}
if (count == 0) {
throw new IllegalArgumentException("ResourceSchema does not have any fields");
}
// If extra fields are needed, let's add them
if (extraFields != null) {
for (Pair extraField : extraFields) {
addField(desBuilder, extraField.first, ++count, extraField.second);
}
}
desBuilder.setName("PigToProtobufDynamicBuilder");
return Protobufs.makeMessageDescriptor(desBuilder.build());
}
/**
* Converts a DataBag into a List of objects with the type in the given FieldDescriptor. DataBags
* don't map cleanly to repeated protobuf types, so each Tuple has to be unwrapped (by taking the
* first element if the type is primitive or by converting the Tuple to a Message if the type is
* MESSAGE), and the contents have to be appended to a List.
* @param containingMessageBuilder a Message builder for the Message that contains this repeated field
* @param fieldDescriptor a FieldDescriptor for this repeated field
* @param bag the DataBag being serialized
* @return a protobuf-friendly List of fieldDescriptor-type objects
*/
private static List