
com.twitter.elephantbird.examples.DeprecatedWrapperWordCount Maven / Gradle / Ivy
package com.twitter.elephantbird.examples;
import com.twitter.elephantbird.mapred.input.DeprecatedInputFormatWrapper;
import com.twitter.elephantbird.mapred.output.DeprecatedOutputFormatWrapper;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
/**
* Implements Word Count using {@link DeprecatedInputFormatWrapper}
* and {@link DeprecatedOutputFormatWrapper}.
*/
public class DeprecatedWrapperWordCount {
private DeprecatedWrapperWordCount() {}
public static class WordCountMapper extends MapReduceBase
implements Mapper {
private final LongWritable one = new LongWritable(1L);
private final Text word = new Text();
@Override
public void map(LongWritable key, Text value,
OutputCollector collector,
Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
collector.collect(word, one);
}
}
}
public static class WordCountReducer extends MapReduceBase
implements Reducer {
LongWritable longWritable = new LongWritable(0);
@Override
public void reduce(Text key, Iterator values,
OutputCollector collector,
Reporter reporter) throws IOException {
long total = 0;
while (values.hasNext()) {
total += values.next().get();
}
longWritable.set(total);
collector.collect(key, longWritable);
}
}
public static void main(String[] args) throws Exception {
System.out.println("CLASSPATH: " + System.getProperty("CLASSPATH"));
GenericOptionsParser options = new GenericOptionsParser(args);
args = options.getRemainingArgs();
if (args.length != 2) {
System.err.println("Usage: hadoop jar path/to/this.jar " +
DeprecatedWrapperWordCount.class + "
© 2015 - 2025 Weber Informatics LLC | Privacy Policy