All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.pig.load.ThriftPigLoader Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.pig.load;

import java.io.IOException;

import com.twitter.elephantbird.util.HadoopCompat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;

import org.apache.pig.ResourceSchema;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.thrift.TBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.mapreduce.input.LzoRecordReader;
import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.pig.util.PigUtil;
import com.twitter.elephantbird.pig.util.ProjectedThriftTupleFactory;
import com.twitter.elephantbird.pig.util.ThriftToPig;

import com.twitter.elephantbird.util.TypeRef;

public class ThriftPigLoader> extends LzoBaseLoadFunc {
  static final Logger LOG = LoggerFactory.getLogger(ThriftPigLoader.class);

  protected final TypeRef typeRef;
  private ProjectedThriftTupleFactory tupleTemplate;

  public ThriftPigLoader(String thriftClassName) {
    typeRef = PigUtil.getThriftTypeRef(thriftClassName);
  }

  /**
   * Return every non-null line as a single-element tuple to Pig.
   *

* A small fraction of bad records are tolerated. See {@link LzoRecordReader} * for more information on error handling. */ @Override public Tuple getNext() throws IOException { if (tupleTemplate == null) { tupleTemplate = new ProjectedThriftTupleFactory(typeRef, requiredFieldList); } M value = getNextBinaryValue(typeRef); return value != null ? tupleTemplate.newTuple(value) : null; } @Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { return pushProjectionHelper(requiredFieldList); } @Override public void setLocation(String location, Job job) throws IOException { super.setLocation(location, job); if (job != null) { ThriftToPig.setConversionProperties(HadoopCompat.getConfiguration(job)); } } @Override public ResourceSchema getSchema(String filename, Job job) throws IOException { // getSchema usually should only be called after setLocation, but it is not always enforced. if (job != null) { ThriftToPig.setConversionProperties(HadoopCompat.getConfiguration(job)); } return new ResourceSchema(ThriftToPig.toSchema(typeRef.getRawClass())); } @Override public InputFormat> getInputFormat() throws IOException { return new MultiInputFormat(typeRef); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy