All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.cascading2.scheme.LzoBinaryScheme Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.cascading2.scheme;

import java.io.IOException;

import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.elephantbird.mapreduce.io.BinaryWritable;

import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.tap.Tap;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

/**
 * Scheme for lzo binary encoded files. Handles both block and base 64. Can be used for Protobuf and Thrift.
 *
 * @author Argyris Zymnis
 */
abstract public class LzoBinaryScheme> extends
  Scheme {

  private static final Logger LOG = LoggerFactory.getLogger(LzoBinaryScheme.class);
  private static final long serialVersionUID = -5011096855302946106L;

  @Override
  public void sink(FlowProcess flowProcess, SinkCall sinkCall)
    throws IOException {
    OutputCollector collector = sinkCall.getOutput();
    TupleEntry entry = sinkCall.getOutgoingEntry();
    T writable = sinkCall.getContext();
    writable.set((M) entry.getTuple().getObject(0));
    collector.collect(null, writable);
  }

  @Override
  public void sinkPrepare( FlowProcess fp, SinkCall sinkCall ) {
    sinkCall.setContext(prepareBinaryWritable());
  }

  protected abstract T prepareBinaryWritable();

  @Override
  public boolean source(FlowProcess flowProcess,
    SourceCall sourceCall) throws IOException {

    Object[] context = sourceCall.getContext();
    while(sourceCall.getInput().next(context[0], context[1])) {
      Object out = ((T) context[1]).get();
      if(out != null) {
        sourceCall.getIncomingEntry().setTuple(new Tuple(out));
        return true;
      }
      LOG.warn("failed to decode record");
    }
    return false;
  }

  @Override
  public void sourceCleanup(FlowProcess flowProcess,
    SourceCall sourceCall) {
    sourceCall.setContext(null);
  }

  /**
  * This sets up the state between succesive calls to source
  */
  @Override
  public void sourcePrepare(FlowProcess flowProcess,
    SourceCall sourceCall) {
    //Hadoop sets a key value pair:
    sourceCall.setContext(new Object[2]);
    sourceCall.getContext()[0] = sourceCall.getInput().createKey();
    sourceCall.getContext()[1] = sourceCall.getInput().createValue();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy