
com.twitter.elephantbird.pig.util.PigToThrift Maven / Gradle / Ivy
package com.twitter.elephantbird.pig.util;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import com.google.common.base.Charsets;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.thrift.TBase;
import org.apache.thrift.TEnum;
import org.apache.thrift.protocol.TType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.elephantbird.thrift.TStructDescriptor;
import com.twitter.elephantbird.thrift.TStructDescriptor.Field;
import com.twitter.elephantbird.util.ThriftUtils;
import com.twitter.elephantbird.util.TypeRef;
/**
* Converts a Pig Tuple into a Thrift struct. Tuple values should be ordered to match the natural
* order of Thrift field ordinal values. For example, say we define the following Thrift struct:
*
*
* struct MyThriftType {
* 1: i32 f1
* 3: i32 f2
* 7: i32 f3
* }
*
*
* Input Tuples are expected to contain field values in order {@code (f1, f2, f3)}. Tuples may
* contain fewer values than Thrift struct fields (e.g. only {@code (f1, f2)} in the prior example);
* Any remaining fields will be left unset.
*/
public class PigToThrift> {
public static final Logger LOG = LoggerFactory.getLogger(PigToThrift.class);
private TStructDescriptor structDesc;
public static > PigToThrift newInstance(Class tClass) {
return new PigToThrift(tClass);
}
public static > PigToThrift newInstance(TypeRef typeRef) {
return new PigToThrift(typeRef.getRawClass());
}
public PigToThrift(Class tClass) {
structDesc = TStructDescriptor.getInstance(tClass);
// may be TODO : compare the schemas to catch errors early.
}
@SuppressWarnings("unchecked")
public T getThriftObject(Tuple tuple) {
return (T)toThrift(structDesc, tuple);
}
/**
* Construct a Thrift object from the tuple.
*/
@SuppressWarnings("unchecked")
private static TBase, ?> toThrift(TStructDescriptor tDesc, Tuple tuple) {
int size = tDesc.getFields().size();
int tupleSize = tuple.size();
@SuppressWarnings("rawtypes")
TBase tObj = newTInstance(tDesc.getThriftClass());
for(int i = 0; i)pigValue);
case TType.SET:
return toThriftSet(thriftField.getSetElemField(), (DataBag) pigValue);
case TType.LIST:
return toThriftList(thriftField.getListElemField(), (DataBag)pigValue);
case TType.ENUM:
return toThriftEnum(thriftField, (String) pigValue);
default:
// standard types : I32, I64, DOUBLE, etc.
return pigValue;
}
} catch (Exception e) {
// mostly a schema mismatch.
LOG.warn(String.format(
"Failed to set field '%s' of type '%s' with value '%s' of type '%s'",
thriftField.getName(), ThriftUtils.getFieldValueType(thriftField).getName(),
pigValue, pigValue.getClass().getName()), e);
}
return null;
}
/* TType.STRING could be either a DataByteArray or a String */
private static Object toStringType(Object value) {
if (value instanceof String) {
return value;
} else if (value instanceof DataByteArray) {
byte[] buf = ((DataByteArray)value).get();
// mostly there is no need to copy.
return ByteBuffer.wrap(Arrays.copyOf(buf, buf.length));
}
return null;
}
private static Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy