All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.pig.util.ProjectedThriftTupleFactory Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.pig.util;

import java.util.List;

import org.apache.pig.LoadPushDown.RequiredField;
import org.apache.pig.LoadPushDown.RequiredFieldList;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.thrift.TBase;

import com.google.common.base.Preconditions;
import com.twitter.elephantbird.thrift.TStructDescriptor;
import com.twitter.elephantbird.thrift.TStructDescriptor.Field;
import com.twitter.elephantbird.util.TypeRef;

/**
 * A tuple factory to create thrift tuples where
 * only a subset of fields are required.
 */
public class ProjectedThriftTupleFactory> {

  private static TupleFactory tf  = TupleFactory.getInstance();

  private int[] requiredFields;
  private final TStructDescriptor tStructDesc;

  public ProjectedThriftTupleFactory(TypeRef typeRef, RequiredFieldList requiredFieldList) {
    tStructDesc = TStructDescriptor.getInstance(typeRef.getRawClass());
    int numFields = tStructDesc.getFields().size();

    if (requiredFieldList != null) {
      List tupleFields = requiredFieldList.getFields();
      requiredFields = new int[tupleFields.size()];

      // should we handle nested projections? not yet.

      int i = 0;
      for(RequiredField f : tupleFields) {
        Preconditions.checkState(f.getIndex() < numFields,
                                 "Projected index is out of range");
        requiredFields[i++] = f.getIndex();
      }
    } else { // all the fields are required
      requiredFields = new int[numFields];
      for (int i=0; i < numFields; i++) {
        requiredFields[i] = i;
      }
    }
  }

  public Tuple newTuple(T tObject) throws ExecException {
    int size = requiredFields.length;
    List tFields = tStructDesc.getFields();

    Tuple tuple = tf.newTuple(size);

    for(int i=0; i < size; i++) {
      int idx = requiredFields[i];
      Object value = tStructDesc.getFieldValue(idx, tObject);
      tuple.set(i, ThriftToPig.toPigObject(tFields.get(idx), value, true));
    }

    return tuple;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy