All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.util.ExecuteOnClusterTool Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.util;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;

import com.google.common.collect.Lists;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Utility for running a method as a single mapper on the hadoop cluster
 * Useful for running a job that isn't really a map reduce job on the cluster
 *
 * @author Alex Levenson
 */
public abstract class ExecuteOnClusterTool extends Configured implements Tool {
  private static final Logger LOG = LoggerFactory.getLogger(ExecuteOnClusterTool.class);
  private static final String IMPL_KEY = ExecuteOnClusterTool.class.getName() + ".implclass";

  /**
   * Override if you need to store any of the commandline args into the job conf
   *
   * @param args passed to this tool
   * @param conf job conf
   */
  protected void setup(String[] args, Configuration conf) throws IOException { }

  /**
   * This will be called once from a single mapper
   * A heartbeat thread will be started (and stopped) for you, so this method
   * can be slow, the task will not fail due to timing out.
   *
   * @param context the mapper's context
   * @throws IOException
   */
  public abstract void execute(Mapper.Context context) throws IOException;

  @Override
  public int run(String[] args) throws Exception {
    setup(args, getConf());

    getConf().set(IMPL_KEY, getClass().getName());
    Job job = new Job(getConf());
    job.setInputFormatClass(DummyInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapperClass(ExecuteOnClusterMapper.class);
    job.setNumReduceTasks(0);
    job.setJarByClass(getClass());
    job.submit();
    return job.waitForCompletion(true) ? 0 : -1;
  }

  private static final class ExecuteOnClusterMapper
      extends Mapper {

    @Override
    protected void map(NullWritable key, NullWritable value, Context context)
      throws IOException, InterruptedException {
      ExecuteOnClusterTool tool;
      try {
        tool = (ExecuteOnClusterTool) Class.forName(
            HadoopCompat.getConfiguration(context).get(IMPL_KEY)).newInstance();
      } catch (InstantiationException e) {
        throw new IOException(e);
      } catch (IllegalAccessException e) {
        throw new IOException(e);
      } catch (ClassNotFoundException e) {
        throw new IOException(e);
      }

      TaskHeartbeatThread beat = new TaskHeartbeatThread(context) {
        @Override
        protected void progress() {
          LOG.info("Sending heartbeat");
        }
      };
      try {
        beat.start();
        tool.execute(context);
      } finally {
        beat.stop();
      }
    }
  }

  private static final class DummyInputSplit extends InputSplit implements Writable {

    @Override
    public long getLength() throws IOException, InterruptedException {
      return 0;
    }

    @Override
    public String[] getLocations() throws IOException, InterruptedException {
      return new String[0];
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
    }
  }

  private static final class DummyInputFormat extends InputFormat {

    @Override
    public List getSplits(JobContext jobContext)
      throws IOException, InterruptedException {
      return Lists.newArrayList(new DummyInputSplit());
    }

    @Override
    public RecordReader createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
      return new DummyRecordReader();
    }
  }

  private static final class DummyRecordReader extends RecordReader {
    private boolean first = true;
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
      InterruptedException {
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
      if (first) {
        first = false;
        return true;
      }
      return false;
    }

    @Override
    public NullWritable getCurrentKey() throws IOException, InterruptedException {
      return NullWritable.get();
    }

    @Override
    public NullWritable getCurrentValue() throws IOException, InterruptedException {
      return NullWritable.get();
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
      return 0;
    }

    @Override
    public void close() throws IOException {
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy