com.twitter.elephantbird.pig.load.ThriftPigLoader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elephant-bird-pig Show documentation
Pig utilities.
There is a newer version: 4.17
package com.twitter.elephantbird.pig.load;

import java.io.IOException;

import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;

import org.apache.pig.ResourceSchema;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.thrift.TBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.mapreduce.input.LzoRecordReader;
import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.pig.util.PigUtil;
import com.twitter.elephantbird.pig.util.ProjectedThriftTupleFactory;
import com.twitter.elephantbird.pig.util.ThriftToPig;

import com.twitter.elephantbird.util.TypeRef;

public class ThriftPigLoader> extends LzoBaseLoadFunc {
  static final Logger LOG = LoggerFactory.getLogger(ThriftPigLoader.class);

  protected final TypeRef typeRef;
  private ProjectedThriftTupleFactory tupleTemplate;

  public ThriftPigLoader(String thriftClassName) {
    typeRef = PigUtil.getThriftTypeRef(thriftClassName);
  }

  /**
   * Return every non-null line as a single-element tuple to Pig.
   *
   * A small fraction of bad records are tolerated. See {@link LzoRecordReader}
   * for more information on error handling.
   */
  @Override
  public Tuple getNext() throws IOException {
    if (tupleTemplate == null) {
      tupleTemplate = new ProjectedThriftTupleFactory(typeRef, requiredFieldList);
    }

    M value = getNextBinaryValue(typeRef);

    return value != null ?
      tupleTemplate.newTuple(value) : null;
  }

  @Override
  public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList)
                                              throws FrontendException {
    return pushProjectionHelper(requiredFieldList);
  }

  @Override
  public void setLocation(String location, Job job) throws IOException {
    super.setLocation(location, job);
    if (job != null) {
      ThriftToPig.setConversionProperties(HadoopCompat.getConfiguration(job));
    }
  }

  @Override
  public ResourceSchema getSchema(String filename, Job job) throws IOException {
    // getSchema usually should only be called after setLocation, but it is not always enforced.
    if (job != null) {
      ThriftToPig.setConversionProperties(HadoopCompat.getConfiguration(job));
    }
    return new ResourceSchema(ThriftToPig.toSchema(typeRef.getRawClass()));
  }

  @Override
  public InputFormat> getInputFormat() throws IOException {
    return new MultiInputFormat(typeRef);
  }
}