org.apache.hadoop.mapred.LocalContainerLauncher Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSError;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* Runs the container task locally in a thread.
* Since all (sub)tasks share the same local directory, they must be executed
* sequentially in order to avoid creating/deleting the same files/dirs.
*/
public class LocalContainerLauncher extends AbstractService implements
ContainerLauncher {
private static final File curDir = new File(".");
private static final Log LOG = LogFactory.getLog(LocalContainerLauncher.class);
private FileContext curFC = null;
private final HashSet localizedFiles;
private final AppContext context;
private final TaskUmbilicalProtocol umbilical;
private ExecutorService taskRunner;
private Thread eventHandler;
private byte[] encryptedSpillKey = new byte[] {0};
private BlockingQueue eventQueue =
new LinkedBlockingQueue();
public LocalContainerLauncher(AppContext context,
TaskUmbilicalProtocol umbilical) {
super(LocalContainerLauncher.class.getName());
this.context = context;
this.umbilical = umbilical;
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us
// (TODO/FIXME: pointless to use RPC to talk to self; should create
// LocalTaskAttemptListener or similar: implement umbilical protocol
// but skip RPC stuff)
try {
curFC = FileContext.getFileContext(curDir.toURI());
} catch (UnsupportedFileSystemException ufse) {
LOG.error("Local filesystem " + curDir.toURI().toString()
+ " is unsupported?? (should never happen)");
}
// Save list of files/dirs that are supposed to be present so can delete
// any extras created by one task before starting subsequent task. Note
// that there's no protection against deleted or renamed localization;
// users who do that get what they deserve (and will have to disable
// uberization in order to run correctly).
File[] curLocalFiles = curDir.listFiles();
localizedFiles = new HashSet(curLocalFiles.length);
for (int j = 0; j < curLocalFiles.length; ++j) {
localizedFiles.add(curLocalFiles[j]);
}
// Relocalization note/future FIXME (per chrisdo, 20110315): At moment,
// full localization info is in AppSubmissionContext passed from client to
// RM and then to NM for AM-container launch: no difference between AM-
// localization and MapTask- or ReduceTask-localization, so can assume all
// OK. Longer-term, will need to override uber-AM container-localization
// request ("needed resources") with union of regular-AM-resources + task-
// resources (and, if maps and reduces ever differ, then union of all three
// types), OR will need localizer service/API that uber-AM can request
// after running (e.g., "localizeForTask()" or "localizeForMapTask()").
}
public void serviceStart() throws Exception {
// create a single thread for serial execution of tasks
// make it a daemon thread so that the process can exit even if the task is
// not interruptible
taskRunner =
Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().
setDaemon(true).setNameFormat("uber-SubtaskRunner").build());
// create and start an event handling thread
eventHandler = new Thread(new EventHandler(), "uber-EventHandler");
eventHandler.start();
super.serviceStart();
}
public void serviceStop() throws Exception {
if (eventHandler != null) {
eventHandler.interrupt();
}
if (taskRunner != null) {
taskRunner.shutdownNow();
}
super.serviceStop();
}
@Override
public void handle(ContainerLauncherEvent event) {
try {
eventQueue.put(event);
} catch (InterruptedException e) {
throw new YarnRuntimeException(e); // FIXME? YarnRuntimeException is "for runtime exceptions only"
}
}
public void setEncryptedSpillKey(byte[] encryptedSpillKey) {
if (encryptedSpillKey != null) {
this.encryptedSpillKey = encryptedSpillKey;
}
}
/*
* Uber-AM lifecycle/ordering ("normal" case):
*
* - [somebody] sends TA_ASSIGNED
* - handled by ContainerAssignedTransition (TaskAttemptImpl.java)
* - creates "remoteTask" for us == real Task
* - sends CONTAINER_REMOTE_LAUNCH
* - TA: UNASSIGNED -> ASSIGNED
* - CONTAINER_REMOTE_LAUNCH handled by LocalContainerLauncher (us)
* - sucks "remoteTask" out of TaskAttemptImpl via getRemoteTask()
* - sends TA_CONTAINER_LAUNCHED
* [[ elsewhere...
* - TA_CONTAINER_LAUNCHED handled by LaunchedContainerTransition
* - registers "remoteTask" with TaskAttemptListener (== umbilical)
* - NUKES "remoteTask"
* - sends T_ATTEMPT_LAUNCHED (Task: SCHEDULED -> RUNNING)
* - TA: ASSIGNED -> RUNNING
* ]]
* - runs Task (runSubMap() or runSubReduce())
* - TA can safely send TA_UPDATE since in RUNNING state
*/
private class EventHandler implements Runnable {
// doneWithMaps and finishedSubMaps are accessed from only
// one thread. Therefore, no need to make them volatile.
private boolean doneWithMaps = false;
private int finishedSubMaps = 0;
private final Map> futures =
new ConcurrentHashMap>();
EventHandler() {
}
@SuppressWarnings("unchecked")
@Override
public void run() {
ContainerLauncherEvent event = null;
// Collect locations of map outputs to give to reduces
final Map localMapFiles =
new HashMap();
// _must_ either run subtasks sequentially or accept expense of new JVMs
// (i.e., fork()), else will get weird failures when maps try to create/
// write same dirname or filename: no chdir() in Java
while (!Thread.currentThread().isInterrupted()) {
try {
event = eventQueue.take();
} catch (InterruptedException e) { // mostly via T_KILL? JOB_KILL?
LOG.error("Returning, interrupted : " + e);
break;
}
LOG.info("Processing the event " + event.toString());
if (event.getType() == EventType.CONTAINER_REMOTE_LAUNCH) {
final ContainerRemoteLaunchEvent launchEv =
(ContainerRemoteLaunchEvent)event;
// execute the task on a separate thread
Future> future = taskRunner.submit(new Runnable() {
public void run() {
runTask(launchEv, localMapFiles);
}
});
// remember the current attempt
futures.put(event.getTaskAttemptID(), future);
} else if (event.getType() == EventType.CONTAINER_REMOTE_CLEANUP) {
// cancel (and interrupt) the current running task associated with the
// event
TaskAttemptId taId = event.getTaskAttemptID();
Future> future = futures.remove(taId);
if (future != null) {
LOG.info("canceling the task attempt " + taId);
future.cancel(true);
}
// send "cleaned" event to task attempt to move us from
// SUCCESS_CONTAINER_CLEANUP to SUCCEEDED state (or
// {FAIL|KILL}_CONTAINER_CLEANUP to {FAIL|KILL}_TASK_CLEANUP)
context.getEventHandler().handle(
new TaskAttemptEvent(taId,
TaskAttemptEventType.TA_CONTAINER_CLEANED));
} else {
LOG.warn("Ignoring unexpected event " + event.toString());
}
}
}
@SuppressWarnings("unchecked")
private void runTask(ContainerRemoteLaunchEvent launchEv,
Map localMapFiles) {
TaskAttemptId attemptID = launchEv.getTaskAttemptID();
Job job = context.getAllJobs().get(attemptID.getTaskId().getJobId());
int numMapTasks = job.getTotalMaps();
int numReduceTasks = job.getTotalReduces();
// YARN (tracking) Task:
org.apache.hadoop.mapreduce.v2.app.job.Task ytask =
job.getTask(attemptID.getTaskId());
// classic mapred Task:
org.apache.hadoop.mapred.Task remoteTask = launchEv.getRemoteTask();
// after "launching," send launched event to task attempt to move
// state from ASSIGNED to RUNNING (also nukes "remoteTask", so must
// do getRemoteTask() call first)
//There is no port number because we are not really talking to a task
// tracker. The shuffle is just done through local files. So the
// port number is set to -1 in this case.
context.getEventHandler().handle(
new TaskAttemptContainerLaunchedEvent(attemptID, -1));
if (numMapTasks == 0) {
doneWithMaps = true;
}
try {
if (remoteTask.isMapOrReduce()) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
jce.addCounterUpdate(JobCounter.TOTAL_LAUNCHED_UBERTASKS, 1);
if (remoteTask.isMapTask()) {
jce.addCounterUpdate(JobCounter.NUM_UBER_SUBMAPS, 1);
} else {
jce.addCounterUpdate(JobCounter.NUM_UBER_SUBREDUCES, 1);
}
context.getEventHandler().handle(jce);
}
runSubtask(remoteTask, ytask.getType(), attemptID, numMapTasks,
(numReduceTasks > 0), localMapFiles);
} catch (RuntimeException re) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId());
jce.addCounterUpdate(JobCounter.NUM_FAILED_UBERTASKS, 1);
context.getEventHandler().handle(jce);
// this is our signal that the subtask failed in some way, so
// simulate a failed JVM/container and send a container-completed
// event to task attempt (i.e., move state machine from RUNNING
// to FAIL_CONTAINER_CLEANUP [and ultimately to FAILED])
context.getEventHandler().handle(new TaskAttemptEvent(attemptID,
TaskAttemptEventType.TA_CONTAINER_COMPLETED));
} catch (IOException ioe) {
// if umbilical itself barfs (in error-handler of runSubMap()),
// we're pretty much hosed, so do what YarnChild main() does
// (i.e., exit clumsily--but can never happen, so no worries!)
LOG.fatal("oopsie... this can never happen: "
+ StringUtils.stringifyException(ioe));
ExitUtil.terminate(-1);
} finally {
// remove my future
if (futures.remove(attemptID) != null) {
LOG.info("removed attempt " + attemptID +
" from the futures to keep track of");
}
}
}
private void runSubtask(org.apache.hadoop.mapred.Task task,
final TaskType taskType,
TaskAttemptId attemptID,
final int numMapTasks,
boolean renameOutputs,
Map localMapFiles)
throws RuntimeException, IOException {
org.apache.hadoop.mapred.TaskAttemptID classicAttemptID =
TypeConverter.fromYarn(attemptID);
try {
JobConf conf = new JobConf(getConfig());
conf.set(JobContext.TASK_ID, task.getTaskID().toString());
conf.set(JobContext.TASK_ATTEMPT_ID, classicAttemptID.toString());
conf.setBoolean(JobContext.TASK_ISMAP, (taskType == TaskType.MAP));
conf.setInt(JobContext.TASK_PARTITION, task.getPartition());
conf.set(JobContext.ID, task.getJobID().toString());
// Use the AM's local dir env to generate the intermediate step
// output files
String[] localSysDirs = StringUtils.getTrimmedStrings(
System.getenv(Environment.LOCAL_DIRS.name()));
conf.setStrings(MRConfig.LOCAL_DIR, localSysDirs);
LOG.info(MRConfig.LOCAL_DIR + " for uber task: "
+ conf.get(MRConfig.LOCAL_DIR));
// mark this as an uberized subtask so it can set task counter
// (longer-term/FIXME: could redefine as job counter and send
// "JobCounterEvent" to JobImpl on [successful] completion of subtask;
// will need new Job state-machine transition and JobImpl jobCounters
// map to handle)
conf.setBoolean("mapreduce.task.uberized", true);
// Check and handle Encrypted spill key
task.setEncryptedSpillKey(encryptedSpillKey);
YarnChild.setEncryptedSpillKeyIfRequired(task);
// META-FIXME: do we want the extra sanity-checking (doneWithMaps,
// etc.), or just assume/hope the state machine(s) and uber-AM work
// as expected?
if (taskType == TaskType.MAP) {
if (doneWithMaps) {
LOG.error("CONTAINER_REMOTE_LAUNCH contains a map task ("
+ attemptID + "), but should be finished with maps");
throw new RuntimeException();
}
MapTask map = (MapTask)task;
map.setConf(conf);
map.run(conf, umbilical);
if (renameOutputs) {
MapOutputFile renamed = renameMapOutputForReduce(conf, attemptID,
map.getMapOutputFile());
localMapFiles.put(classicAttemptID, renamed);
}
relocalize();
if (++finishedSubMaps == numMapTasks) {
doneWithMaps = true;
}
} else /* TaskType.REDUCE */ {
if (!doneWithMaps) {
// check if event-queue empty? whole idea of counting maps vs.
// checking event queue is a tad wacky...but could enforce ordering
// (assuming no "lost events") at LocalMRAppMaster [CURRENT BUG(?):
// doesn't send reduce event until maps all done]
LOG.error("CONTAINER_REMOTE_LAUNCH contains a reduce task ("
+ attemptID + "), but not yet finished with maps");
throw new RuntimeException();
}
// a.k.a. "mapreduce.jobtracker.address" in LocalJobRunner:
// set framework name to local to make task local
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
conf.set(MRConfig.MASTER_ADDRESS, "local"); // bypass shuffle
ReduceTask reduce = (ReduceTask)task;
reduce.setLocalMapFiles(localMapFiles);
reduce.setConf(conf);
reduce.run(conf, umbilical);
relocalize();
}
} catch (FSError e) {
LOG.fatal("FSError from child", e);
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us
if (!ShutdownHookManager.get().isShutdownInProgress()) {
umbilical.fsError(classicAttemptID, e.getMessage());
}
throw new RuntimeException();
} catch (Exception exception) {
LOG.warn("Exception running local (uberized) 'child' : "
+ StringUtils.stringifyException(exception));
try {
if (task != null) {
// do cleanup for the task
task.taskCleanup(umbilical);
}
} catch (Exception e) {
LOG.info("Exception cleaning up: "
+ StringUtils.stringifyException(e));
}
// Report back any failures, for diagnostic purposes
umbilical.reportDiagnosticInfo(classicAttemptID,
StringUtils.stringifyException(exception));
throw new RuntimeException();
} catch (Throwable throwable) {
LOG.fatal("Error running local (uberized) 'child' : "
+ StringUtils.stringifyException(throwable));
if (!ShutdownHookManager.get().isShutdownInProgress()) {
Throwable tCause = throwable.getCause();
String cause =
(tCause == null) ? throwable.getMessage() : StringUtils
.stringifyException(tCause);
umbilical.fatalError(classicAttemptID, cause);
}
throw new RuntimeException();
}
}
/**
* Also within the local filesystem, we need to restore the initial state
* of the directory as much as possible. Compare current contents against
* the saved original state and nuke everything that doesn't belong, with
* the exception of the renamed map outputs.
*
* Any jobs that go out of their way to rename or delete things from the
* local directory are considered broken and deserve what they get...
*/
private void relocalize() {
File[] curLocalFiles = curDir.listFiles();
for (int j = 0; j < curLocalFiles.length; ++j) {
if (!localizedFiles.contains(curLocalFiles[j])) {
// found one that wasn't there before: delete it
boolean deleted = false;
try {
if (curFC != null) {
// this is recursive, unlike File delete():
deleted = curFC.delete(new Path(curLocalFiles[j].getName()),true);
}
} catch (IOException e) {
deleted = false;
}
if (!deleted) {
LOG.warn("Unable to delete unexpected local file/dir "
+ curLocalFiles[j].getName() + ": insufficient permissions?");
}
}
}
}
} // end EventHandler
/**
* Within the _local_ filesystem (not HDFS), all activity takes place within
* a subdir inside one of the LOCAL_DIRS
* (${local.dir}/usercache/$user/appcache/$appId/$contId/),
* and all sub-MapTasks create the same filename ("file.out"). Rename that
* to something unique (e.g., "map_0.out") to avoid possible collisions.
*
* Longer-term, we'll modify [something] to use TaskAttemptID-based
* filenames instead of "file.out". (All of this is entirely internal,
* so there are no particular compatibility issues.)
*/
@VisibleForTesting
protected static MapOutputFile renameMapOutputForReduce(JobConf conf,
TaskAttemptId mapId, MapOutputFile subMapOutputFile) throws IOException {
FileSystem localFs = FileSystem.getLocal(conf);
// move map output to reduce input
Path mapOut = subMapOutputFile.getOutputFile();
FileStatus mStatus = localFs.getFileStatus(mapOut);
Path reduceIn = subMapOutputFile.getInputFileForWrite(
TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen());
Path mapOutIndex = subMapOutputFile.getOutputIndexFile();
Path reduceInIndex = new Path(reduceIn.toString() + ".index");
if (LOG.isDebugEnabled()) {
LOG.debug("Renaming map output file for task attempt "
+ mapId.toString() + " from original location " + mapOut.toString()
+ " to destination " + reduceIn.toString());
}
if (!localFs.mkdirs(reduceIn.getParent())) {
throw new IOException("Mkdirs failed to create "
+ reduceIn.getParent().toString());
}
if (!localFs.rename(mapOut, reduceIn))
throw new IOException("Couldn't rename " + mapOut);
if (!localFs.rename(mapOutIndex, reduceInIndex))
throw new IOException("Couldn't rename " + mapOutIndex);
return new RenamedMapOutputFile(reduceIn);
}
private static class RenamedMapOutputFile extends MapOutputFile {
private Path path;
public RenamedMapOutputFile(Path path) {
this.path = path;
}
@Override
public Path getOutputFile() throws IOException {
return path;
}
@Override
public Path getOutputFileForWrite(long size) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getOutputFileForWriteInVolume(Path existing) {
throw new UnsupportedOperationException();
}
@Override
public Path getOutputIndexFile() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getOutputIndexFileForWrite(long size) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getOutputIndexFileForWriteInVolume(Path existing) {
throw new UnsupportedOperationException();
}
@Override
public Path getSpillFile(int spillNumber) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getSpillFileForWrite(int spillNumber, long size)
throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getSpillIndexFile(int spillNumber) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getSpillIndexFileForWrite(int spillNumber, long size)
throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getInputFile(int mapId) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Path getInputFileForWrite(TaskID mapId, long size)
throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void removeAll() throws IOException {
throw new UnsupportedOperationException();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy