
com.twitter.elephantbird.cascading3.scheme.LzoProtobufScheme Maven / Gradle / Ivy
package com.twitter.elephantbird.cascading3.scheme;
import com.twitter.elephantbird.mapreduce.input.combine.DelegateCombineFileInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapreduce.OutputFormat;
import com.google.protobuf.Message;
import com.twitter.elephantbird.mapred.output.DeprecatedOutputFormatWrapper;
import com.twitter.elephantbird.mapreduce.input.MultiInputFormat;
import com.twitter.elephantbird.mapreduce.io.ProtobufWritable;
import com.twitter.elephantbird.mapreduce.output.LzoProtobufBlockOutputFormat;
import com.twitter.elephantbird.util.Protobufs;
import com.twitter.elephantbird.util.TypeRef;
import cascading.flow.FlowProcess;
import cascading.tap.Tap;
/**
* Scheme for Protobuf lzo compressed files.
*
* @author Avi Bryant, Ning Liang
*/
public class LzoProtobufScheme extends
LzoBinaryScheme> {
private static final long serialVersionUID = -5011096855302946105L;
private Class protoClass;
public LzoProtobufScheme(Class protoClass) {
this.protoClass = protoClass;
}
protected ProtobufWritable prepareBinaryWritable() {
TypeRef typeRef = (TypeRef) Protobufs.getTypeRef(protoClass.getName());
return new ProtobufWritable(typeRef);
}
@Override
public void sinkConfInit(FlowProcess extends Configuration> hfp, Tap tap, Configuration conf) {
LzoProtobufBlockOutputFormat.setClassConf(protoClass, conf);
DeprecatedOutputFormatWrapper.setOutputFormat(LzoProtobufBlockOutputFormat.class, conf);
}
@Override
public void sourceConfInit(FlowProcess extends Configuration> hfp, Tap tap, Configuration conf) {
MultiInputFormat.setClassConf(protoClass, conf);
DelegateCombineFileInputFormat.setDelegateInputFormat(conf, MultiInputFormat.class);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy