com.twitter.elephantbird.pig.load.LzoRawBytesLoader Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of elephant-bird-pig Show documentation

Pig utilities.

There is a newer version: 4.17

package com.twitter.elephantbird.pig.load;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.ResourceSchema;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.Utils;

import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
import com.twitter.elephantbird.mapreduce.io.BinaryWritable;
import com.twitter.elephantbird.util.TypeRef;

/**
 * Loads raw bytes.
 */
public class LzoRawBytesLoader extends LzoBaseLoadFunc {

  private static final TupleFactory tupleFactory = TupleFactory.getInstance();

  private TypeRef typeRef = new TypeRef(byte[].class){};

  @Override
  public InputFormat> getInputFormat() throws IOException {
    return new MultiInputFormat(typeRef);
  }

  @Override
  public Tuple getNext() throws IOException {
    byte[] bytes = getNextBinaryValue(typeRef);
    return bytes != null ?
        tupleFactory.newTuple(new DataByteArray(bytes)) : null;
  }

  @Override
  public ResourceSchema getSchema(String filename, Job job) throws IOException {
    return new ResourceSchema(Utils.getSchemaFromString("bytes : bytearray"));
  }
}