org.apache.hadoop.mapred.MapperWaitThread Maven / Gradle / Ivy
/**
*
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.TaskTracker.TaskInProgress;
/**
* A thread that waits for all the mappers for the given reduce task to finish
* before adding itself to the supplied SimulatedTaskRunner. While
* waiting, this thread also updates the task status so that they don't get
* killed due to inactivity.
*/
public class MapperWaitThread extends Thread {
public static final Log LOG =
LogFactory.getLog(MapperWaitThread.class);
private TaskUmbilicalProtocol umbilicalProtocol;
private SimulatedTaskRunner taskRunner;
private TaskInProgress tip;
// Max map completion events to fetch in one go from the tasktracker
private static final int MAX_EVENTS_TO_FETCH = 10000;
// Time to wait between fetches.
private static final int SLEEP_TIME = 2000;
/**
* @param tip a reduce task in progress that we should wait for the mappers
* to finish
* @param taskRunner the task runner thread that the TIP should be sent
* to after all the mappers are done.
* @param umbilicalProtocol The umbilical
* events
*/
public MapperWaitThread(TaskInProgress tip,
SimulatedTaskRunner taskRunner, TaskUmbilicalProtocol umbilicalProtocol) {
this.taskRunner = taskRunner;
this.umbilicalProtocol = umbilicalProtocol;
this.tip = tip;
this.setName("Map-waiting thread for job: " + tip.getTask().getJobID() +
" reduce task: " + tip.getTask().getTaskID());
// Don't want to prevent the TT from shutting down just because of this
// thread
this.setDaemon(true);
}
/**
* Updates status / fetches map completion events until it gets them for
* all the mappers. Adds task to finish afterward.
*/
@Override
public void run() {
try {
if (tip.getTask().isMapTask()) {
throw new RuntimeException("Only works for reducers!");
}
ReduceTask reduceTask = (ReduceTask) tip.getTask();
LOG.info("MapperWaitThread started for reduce task " +
reduceTask.getTaskID());
int successfulMapCompletions = 0;
int getFromEventId = 0;
// Wait for the mappers in a loop
while (successfulMapCompletions < reduceTask.getNumMaps()) {
LOG.debug("Job: " + reduceTask.getJobID() + " ReduceTask: " +
reduceTask.getTaskID() + " Got Successful Maps: " +
successfulMapCompletions + "/" + reduceTask.getNumMaps());
try {
// This gets whether the mappers finished and also the location of the
// output
MapTaskCompletionEventsUpdate updates =
umbilicalProtocol.getMapCompletionEvents(reduceTask.getJobID(), getFromEventId,
MAX_EVENTS_TO_FETCH, reduceTask.getTaskID());
TaskCompletionEvent [] completionEvents =
updates.getMapTaskCompletionEvents();
if (updates.shouldReset()) {
getFromEventId = 0;
successfulMapCompletions = 0;
}
// Increment to get the next set of updates
LOG.debug("Job: " + reduceTask.getJobID() + " ReduceTask: " +
reduceTask.getTaskID() + " Got " +
completionEvents.length + " map task " +
" completion events");
getFromEventId += completionEvents.length;
// Tally up all the successful maps
for(TaskCompletionEvent t : completionEvents) {
if (t.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED) {
successfulMapCompletions++;
}
}
// Update the progress of the threads so that they don't get killed
// for inactivity
umbilicalProtocol.statusUpdate(tip.getTask().getTaskID(), tip.getStatus());
// If the thread were interrupted, then it means that we need to stop
// as the task was killed
if (Thread.interrupted()) {
throw new InterruptedException("Generated in loop");
}
if (successfulMapCompletions % MAX_EVENTS_TO_FETCH == 0) {
Thread.sleep(SLEEP_TIME);
}
} catch (IOException e) {
LOG.error("Got an exception while getting map completion events", e);
} catch (InterruptedException e) {
LOG.debug("Got an interrupted exception while waiting for mappers " +
"for " + tip.getTask().getTaskID() + " job " +
tip.getTask().getJobID());
return;
}
}
// All the mappers are done, so we can finish the reduce task
LOG.info("Job: " + reduceTask.getJobID() + " ReduceTask: " +
reduceTask.getTaskID() + " All maps finished, adding to task to " +
"finish");
taskRunner.addTipToFinish(tip, umbilicalProtocol);
} finally {
LOG.info("Exiting mapper wait thread " +
"for " + tip.getTask().getTaskID() + " job " +
tip.getTask().getJobID());
}
}
}