![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.hadoop.examples.SleepJob Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-core Show documentation
Show all versions of hadoop-core Show documentation
Hadoop is the distributed computing framework of Apache; hadoop-core contains
the filesystem, job tracker and map/reduce modules
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.examples;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Dummy class for testing MR framefork. Sleeps for a defined period
* of time in mapper and reducer. Generates fake input for map / reduce
* jobs. Note that generated number of input pairs is in the order
* of numMappers * mapSleepTime / 100
, so the job uses
* some disk space.
*/
public class SleepJob extends Configured implements Tool,
Mapper,
Reducer,
Partitioner {
private static final String SLOW_RATIO = "sleep.job.slow.ratio";
private static final String SLOW_MAPS = "sleep.job.slow.maps";
private static final String SLOW_REDUCES = "sleep.job.slow.reduces";
private static final String HOSTS_FOR_LOCALITY = "sleep.job.hosts";
private static final String HOSTS_PER_SPLIT = "sleep.job.hosts.per.split";
private long mapSleepDuration = 100;
private long reduceSleepDuration = 100;
private int mapSleepCount = 1;
private int reduceSleepCount = 1;
private int count = 0;
private int countersPerTask = 0;
private RunningJob rJob = null;
private static Random generator = new Random();
public int getPartition(IntWritable k, NullWritable v, int numPartitions) {
return k.get() % numPartitions;
}
public static class EmptySplit implements InputSplit {
private List hosts = new ArrayList();
public EmptySplit() { }
public EmptySplit(String host) {
this.hosts.add(host);
}
public EmptySplit(String [] hosts) {
for (String h : hosts) {
this.hosts.add(h);
}
}
// No need to write out the hosts as RawSplit handles that for us
public void write(DataOutput out) throws IOException { }
public void readFields(DataInput in) throws IOException { }
public long getLength() { return 0L; }
public String[] getLocations() {
if (hosts.size() == 0) {
return new String [0];
}
// Hadoop expects a period at the end of the hostnames
List modifiedHosts = new ArrayList();
for (String host : this.hosts) {
modifiedHosts.add(host + ".");
}
return modifiedHosts.toArray(new String[0]);
}
}
public static class SleepInputFormat extends Configured
implements InputFormat {
public InputSplit[] getSplits(JobConf conf, int numSplits) {
InputSplit[] ret = new InputSplit[numSplits];
String hostsStr = conf.get(HOSTS_FOR_LOCALITY, "");
int hostsPerSplit = conf.getInt(HOSTS_PER_SPLIT, 0);
// If hostsStr is empty, hosts will be [""]
String [] hosts = hostsStr.split(",");
// Distribute the hosts randomly to the splits
for (int i = 0; i < numSplits; ++i) {
Set hostsForSplit = new HashSet();
for (int j = 0; j < hostsPerSplit; j++) {
int index = generator.nextInt(hosts.length);
hostsForSplit.add(hosts[index]);
}
ret[i] = new EmptySplit(hostsForSplit.toArray(new String[0]));
}
return ret;
}
public RecordReader getRecordReader(
InputSplit ignored, JobConf conf, Reporter reporter)
throws IOException {
final int count = conf.getInt("sleep.job.map.sleep.count", 1);
if (count < 0) throw new IOException("Invalid map count: " + count);
final int redcount = conf.getInt("sleep.job.reduce.sleep.count", 1);
if (redcount < 0)
throw new IOException("Invalid reduce count: " + redcount);
final int emitPerMapTask = (redcount * conf.getNumReduceTasks());
return new RecordReader() {
private int records = 0;
private int emitCount = 0;
public boolean next(IntWritable key, IntWritable value)
throws IOException {
key.set(emitCount);
int emit = emitPerMapTask / count;
if ((emitPerMapTask) % count > records) {
++emit;
}
emitCount += emit;
value.set(emit);
return records++ < count;
}
public IntWritable createKey() { return new IntWritable(); }
public IntWritable createValue() { return new IntWritable(); }
public long getPos() throws IOException { return records; }
public void close() throws IOException { }
public float getProgress() throws IOException {
return records / ((float)count);
}
};
}
}
private List counterNames = null;
private List getCounterNames() {
if (counterNames != null) {
return counterNames;
}
counterNames = new ArrayList();
for(int i=0; i output, Reporter reporter)
throws IOException {
List counterNames = getCounterNames();
for (String counterName : counterNames) {
reporter.incrCounter("Counters from Mappers", counterName, 1);
}
//it is expected that every map processes mapSleepCount number of records.
try {
reporter.setStatus("Sleeping... (" +
(mapSleepDuration * (mapSleepCount - count)) + ") ms left");
Thread.sleep(mapSleepDuration);
}
catch (InterruptedException ex) {
throw (IOException)new IOException(
"Interrupted while sleeping").initCause(ex);
}
++count;
// output reduceSleepCount * numReduce number of random values, so that
// each reducer will get reduceSleepCount number of keys.
int k = key.get();
for (int i = 0; i < value.get(); ++i) {
output.collect(new IntWritable(k + i), NullWritable.get());
}
}
public void reduce(IntWritable key, Iterator values,
OutputCollector output, Reporter reporter)
throws IOException {
List counterNames = getCounterNames();
for (String counterName : counterNames) {
reporter.incrCounter("Counters from Reducers", counterName, 1);
}
try {
reporter.setStatus("Sleeping... (" +
(reduceSleepDuration * (reduceSleepCount - count)) + ") ms left");
Thread.sleep(reduceSleepDuration);
}
catch (InterruptedException ex) {
throw (IOException)new IOException(
"Interrupted while sleeping").initCause(ex);
}
count++;
}
public void configure(JobConf job) {
this.mapSleepCount =
job.getInt("sleep.job.map.sleep.count", mapSleepCount);
this.reduceSleepCount =
job.getInt("sleep.job.reduce.sleep.count", reduceSleepCount);
this.mapSleepDuration =
job.getLong("sleep.job.map.sleep.time" , 100) / mapSleepCount;
this.reduceSleepDuration =
job.getLong("sleep.job.reduce.sleep.time" , 100) / reduceSleepCount;
this.countersPerTask =
job.getInt("sleep.job.counters.per.task", 0);
makeSomeTasksSlower(job);
}
private void makeSomeTasksSlower(JobConf job) {
int id = getTaskId(job);
int slowRatio = job.getInt(SLOW_RATIO, 1);
if (isMap(job)) {
String slowMaps[] = job.getStrings(SLOW_MAPS);
if (slowMaps != null) {
for (String s : job.getStrings(SLOW_MAPS)) {
if (id == Integer.parseInt(s)) {
System.out.println("Map task:" + id + " is slowed." +
" slowRatio:" + slowRatio);
this.mapSleepDuration *= slowRatio;
return;
}
}
}
} else {
String slowReduces[] = job.getStrings(SLOW_REDUCES);
if (slowReduces != null) {
for (String s : job.getStrings(SLOW_REDUCES)) {
if (id == Integer.parseInt(s)) {
System.out.println("Reduce task:" + id + " is slowed" +
" slowRatio:" + slowRatio);
this.reduceSleepDuration *= slowRatio;
return;
}
}
}
}
}
private boolean isMap(JobConf job) {
String taskid = job.get("mapred.task.id");
return taskid.split("_")[3].equals("m");
}
private int getTaskId(JobConf job) {
String taskid = job.get("mapred.task.id");
return Integer.parseInt(taskid.split("_")[4]);
}
public void close() throws IOException {
}
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(new Configuration(), new SleepJob(), args);
System.exit(res);
}
public int run(int numMapper, int numReducer, long mapSleepTime,
int mapSleepCount, long reduceSleepTime,
int reduceSleepCount, boolean doSpeculation,
List slowMaps, List slowReduces,
int slowRatio, int countersPerTask, List hosts, int hostsPerSplit,
boolean setup)
throws IOException {
JobConf job = setupJobConf(numMapper, numReducer, mapSleepTime,
mapSleepCount, reduceSleepTime, reduceSleepCount,
doSpeculation, slowMaps, slowReduces, slowRatio,
countersPerTask, hosts, hostsPerSplit, setup);
rJob = JobClient.runJob(job);
return 0;
}
public JobConf setupJobConf(int numMapper, int numReducer,
long mapSleepTime, int mapSleepCount,
long reduceSleepTime, int reduceSleepCount) {
final List EMPTY = Collections.emptyList();
return setupJobConf(numMapper, numReducer, mapSleepTime, mapSleepCount,
reduceSleepTime, reduceSleepCount, false, EMPTY, EMPTY, 1, 0,
new ArrayList(), 1, false);
}
public JobConf setupJobConf(int numMapper, int numReducer,
long mapSleepTime, int mapSleepCount,
long reduceSleepTime, int reduceSleepCount,
boolean doSpeculation, List slowMaps,
List slowReduces, int slowRatio,
int countersPerTask, List hosts,
int hostsPerSplit, boolean setup) {
JobConf job = new JobConf(getConf(), SleepJob.class);
job.setNumMapTasks(numMapper);
job.setNumReduceTasks(numReducer);
job.setMapperClass(SleepJob.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(SleepJob.class);
job.setOutputFormat(NullOutputFormat.class);
job.setJobSetupCleanupNeeded(setup);
job.setInputFormat(SleepInputFormat.class);
job.setPartitionerClass(SleepJob.class);
job.setJobName("Sleep job");
FileInputFormat.addInputPath(job, new Path("ignored"));
job.setLong("sleep.job.map.sleep.time", mapSleepTime);
job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
job.setInt("sleep.job.map.sleep.count", mapSleepCount);
job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
job.setSpeculativeExecution(doSpeculation);
job.setInt(SLOW_RATIO, slowRatio);
job.setStrings(SLOW_MAPS, slowMaps.toArray(new String[slowMaps.size()]));
job.setStrings(SLOW_REDUCES, slowMaps.toArray(new String[slowReduces.size()]));
job.setInt("sleep.job.counters.per.task", countersPerTask);
job.setStrings(HOSTS_FOR_LOCALITY, hosts.toArray(new String[hosts.size()]));
job.setInt(HOSTS_PER_SPLIT, hostsPerSplit);
return job;
}
public int run(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("SleepJob [-m numMapper] [-r numReducer]" +
" [-mt mapSleepTime (msec)] [-rt reduceSleepTime (msec)]" +
" [-recordt recordSleepTime (msec)]" +
" [-slowmaps slowMaps (int separated by ,)]" +
" [-slowreduces slowReduces (int separated by ,)]" +
" [-slowratio slowRatio]" +
" [-counters numCountersToIncPerRecordPerTask]" +
" [-nosetup]" +
" [-hosts hostsToRunMaps (for testing locality. host names" +
" separated by ,)]" +
" [-hostspersplit numHostsPerSplit (for testing locality. number" +
" of random hosts per split " +
" ");
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
int numMapper = 1, numReducer = 1;
long mapSleepTime = 100, reduceSleepTime = 100, recSleepTime = 100;
int mapSleepCount = 1, reduceSleepCount = 1;
int hostsPerSplit = 0;
List slowMaps = Collections.emptyList();
List slowReduces = Collections.emptyList();
int slowRatio = 10;
boolean setup = true;
boolean doSpeculation = false;
List hosts = new ArrayList();
int countersPerTask = 0;
for(int i=0; i < args.length; i++ ) {
if(args[i].equals("-m")) {
numMapper = Integer.parseInt(args[++i]);
}
else if(args[i].equals("-r")) {
numReducer = Integer.parseInt(args[++i]);
}
else if(args[i].equals("-mt")) {
mapSleepTime = Long.parseLong(args[++i]);
}
else if(args[i].equals("-rt")) {
reduceSleepTime = Long.parseLong(args[++i]);
}
else if (args[i].equals("-recordt")) {
recSleepTime = Long.parseLong(args[++i]);
}
else if (args[i].equals("-slowmaps")) {
doSpeculation = true;
slowMaps = parseSlowTaskList(args[++i]);
}
else if (args[i].equals("-slowreduces")) {
doSpeculation = true;
slowReduces = parseSlowTaskList(args[++i]);
}
else if (args[i].equals("-slowratio")) {
doSpeculation = true;
slowRatio = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-hosts")) {
for (String host : args[++i].split(",")) {
hosts.add(host);
}
}
else if (args[i].equals("-speculation")) {
doSpeculation = true;
}
else if (args[i].equals("-counters")) {
// Number of counters to increment per record per task
countersPerTask = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-hostspersplit")) {
hostsPerSplit = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-nosetup")) {
setup = false;
}
else {
System.err.println("Invalid option " + args[i]);
System.exit(-1);
}
}
// sleep for *SleepTime duration in Task by recSleepTime per record
mapSleepCount = (int)Math.ceil(mapSleepTime / ((double)recSleepTime));
reduceSleepCount = (int)Math.ceil(reduceSleepTime / ((double)recSleepTime));
return run(numMapper, numReducer, mapSleepTime, mapSleepCount,
reduceSleepTime, reduceSleepCount,
doSpeculation, slowMaps, slowReduces, slowRatio, countersPerTask,
hosts, hostsPerSplit, setup);
}
private List parseSlowTaskList(String input) {
String tasks[] = input.split(",");
List slowTasks = new ArrayList();
for (String task : tasks) {
int id = Integer.parseInt(task);
slowTasks.add(id + "");
}
return slowTasks;
}
public RunningJob getRunningJob() {
return rJob;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy