
com.twitter.elephantbird.util.ExecuteOnClusterTool Maven / Gradle / Ivy
package com.twitter.elephantbird.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility for running a method as a single mapper on the hadoop cluster
* Useful for running a job that isn't really a map reduce job on the cluster
*
* @author Alex Levenson
*/
public abstract class ExecuteOnClusterTool extends Configured implements Tool {
private static final Logger LOG = LoggerFactory.getLogger(ExecuteOnClusterTool.class);
private static final String IMPL_KEY = ExecuteOnClusterTool.class.getName() + ".implclass";
/**
* Override if you need to store any of the commandline args into the job conf
*
* @param args passed to this tool
* @param conf job conf
*/
protected void setup(String[] args, Configuration conf) throws IOException { }
/**
* This will be called once from a single mapper
* A heartbeat thread will be started (and stopped) for you, so this method
* can be slow, the task will not fail due to timing out.
*
* @param context the mapper's context
* @throws IOException
*/
public abstract void execute(Mapper.Context context) throws IOException;
@Override
public int run(String[] args) throws Exception {
setup(args, getConf());
getConf().set(IMPL_KEY, getClass().getName());
Job job = new Job(getConf());
job.setInputFormatClass(DummyInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(ExecuteOnClusterMapper.class);
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.submit();
return job.waitForCompletion(true) ? 0 : -1;
}
private static final class ExecuteOnClusterMapper
extends Mapper {
@Override
protected void map(NullWritable key, NullWritable value, Context context)
throws IOException, InterruptedException {
ExecuteOnClusterTool tool;
try {
tool = (ExecuteOnClusterTool) Class.forName(
HadoopCompat.getConfiguration(context).get(IMPL_KEY)).newInstance();
} catch (InstantiationException e) {
throw new IOException(e);
} catch (IllegalAccessException e) {
throw new IOException(e);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
TaskHeartbeatThread beat = new TaskHeartbeatThread(context) {
@Override
protected void progress() {
LOG.info("Sending heartbeat");
}
};
try {
beat.start();
tool.execute(context);
} finally {
beat.stop();
}
}
}
private static final class DummyInputSplit extends InputSplit implements Writable {
@Override
public long getLength() throws IOException, InterruptedException {
return 0;
}
@Override
public String[] getLocations() throws IOException, InterruptedException {
return new String[0];
}
@Override
public void write(DataOutput dataOutput) throws IOException {
}
@Override
public void readFields(DataInput dataInput) throws IOException {
}
}
private static final class DummyInputFormat extends InputFormat {
@Override
public List getSplits(JobContext jobContext)
throws IOException, InterruptedException {
return Lists.newArrayList(new DummyInputSplit());
}
@Override
public RecordReader createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException, InterruptedException {
return new DummyRecordReader();
}
}
private static final class DummyRecordReader extends RecordReader {
private boolean first = true;
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
InterruptedException {
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (first) {
first = false;
return true;
}
return false;
}
@Override
public NullWritable getCurrentKey() throws IOException, InterruptedException {
return NullWritable.get();
}
@Override
public NullWritable getCurrentValue() throws IOException, InterruptedException {
return NullWritable.get();
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy