All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.pig.util.PigToThrift Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.pig.util;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.google.common.base.Charsets;

import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.thrift.TBase;
import org.apache.thrift.TEnum;
import org.apache.thrift.protocol.TType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.thrift.TStructDescriptor;
import com.twitter.elephantbird.thrift.TStructDescriptor.Field;
import com.twitter.elephantbird.util.ThriftUtils;
import com.twitter.elephantbird.util.TypeRef;

/**
 * Converts a Pig Tuple into a Thrift struct. Tuple values should be ordered to match the natural
 * order of Thrift field ordinal values. For example, say we define the following Thrift struct:
 *
 * 
 * struct MyThriftType {
 *   1: i32 f1
 *   3: i32 f2
 *   7: i32 f3
 * }
 * 
* * Input Tuples are expected to contain field values in order {@code (f1, f2, f3)}. Tuples may * contain fewer values than Thrift struct fields (e.g. only {@code (f1, f2)} in the prior example); * Any remaining fields will be left unset. */ public class PigToThrift> { public static final Logger LOG = LoggerFactory.getLogger(PigToThrift.class); private TStructDescriptor structDesc; public static > PigToThrift newInstance(Class tClass) { return new PigToThrift(tClass); } public static > PigToThrift newInstance(TypeRef typeRef) { return new PigToThrift(typeRef.getRawClass()); } public PigToThrift(Class tClass) { structDesc = TStructDescriptor.getInstance(tClass); // may be TODO : compare the schemas to catch errors early. } @SuppressWarnings("unchecked") public T getThriftObject(Tuple tuple) { return (T)toThrift(structDesc, tuple); } /** * Construct a Thrift object from the tuple. */ @SuppressWarnings("unchecked") private static TBase toThrift(TStructDescriptor tDesc, Tuple tuple) { int size = tDesc.getFields().size(); int tupleSize = tuple.size(); @SuppressWarnings("rawtypes") TBase tObj = newTInstance(tDesc.getThriftClass()); for(int i = 0; i)pigValue); case TType.SET: return toThriftSet(thriftField.getSetElemField(), (DataBag) pigValue); case TType.LIST: return toThriftList(thriftField.getListElemField(), (DataBag)pigValue); case TType.ENUM: return toThriftEnum(thriftField, (String) pigValue); default: // standard types : I32, I64, DOUBLE, etc. return pigValue; } } catch (Exception e) { // mostly a schema mismatch. LOG.warn(String.format( "Failed to set field '%s' of type '%s' with value '%s' of type '%s'", thriftField.getName(), ThriftUtils.getFieldValueType(thriftField).getName(), pigValue, pigValue.getClass().getName()), e); } return null; } /* TType.STRING could be either a DataByteArray or a String */ private static Object toStringType(Object value) { if (value instanceof String) { return value; } else if (value instanceof DataByteArray) { byte[] buf = ((DataByteArray)value).get(); // mostly there is no need to copy. return ByteBuffer.wrap(Arrays.copyOf(buf, buf.length)); } return null; } private static Map toThriftMap(Field field, Map map) { Field keyField = field.getMapKeyField(); Field valueField = field.getMapValueField(); HashMap out = new HashMap(map.size()); for(Entry e : map.entrySet()) { String s = e.getKey(); Object key; switch (keyField.getType()) { case TType.STRING: key = s; break; case TType.BOOL: key = Boolean.parseBoolean(s); break; case TType.BYTE: key = Byte.parseByte(s); break; case TType.I16: key = Short.parseShort(s); break; case TType.I32: key = Integer.parseInt(s); break; case TType.I64: key = Long.parseLong(s); break; case TType.DOUBLE: key = Double.parseDouble(s); break; case TType.ENUM: key = toThriftEnum(keyField, s); break; default: // LIST, MAP, SET, STOP, STRUCT, VOID types are unsupported throw new RuntimeException(String.format( "Conversion from string map key to type '%s' is unsupported", ThriftUtils.getFieldValueType(keyField).getName())); } if (keyField.isBuffer()) { key = ByteBuffer.wrap(s.getBytes(Charsets.UTF_8)); } out.put(key, toThriftValue(valueField, e.getValue())); } return out; } private static Set toThriftSet(Field elemField, DataBag bag) { Set set = new HashSet((int)bag.size()); fillThriftCollection(set, elemField, bag); return set; } private static List toThriftList(Field elemField, DataBag bag) { List list = new ArrayList((int)bag.size()); fillThriftCollection(list, elemField, bag); return list; } private static TEnum toThriftEnum(Field elemField, String name) { TEnum out = elemField.getEnumValueOf(name); if (out == null) { throw new IllegalArgumentException( String.format("Failed to convert string '%s'" + " to enum value of type '%s'", name, ThriftUtils.getFieldValueType(elemField).getName())); } return out; } private static void fillThriftCollection(Collection tColl, Field elemField, DataBag bag) { for (Tuple tuple : bag) { if (!elemField.isStruct()) { // this tuple is a just wrapper for another object. try { tColl.add(toThriftValue(elemField, tuple.get(0))); } catch (ExecException e) { throw new RuntimeException(e); } } else { // tuple for a struct. tColl.add(toThriftValue(elemField, tuple)); } } } /** return an instance assuming tClass is a Thrift class */ private static TBase newTInstance(Class tClass) { try { return (TBase) tClass.newInstance(); } catch (Exception e) { // not expected. throw new RuntimeException(e); } } }