Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.mapred.LocalJobRunner Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.util.UUID;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.mapred.JobClient.RawSplit;
import org.apache.hadoop.util.StringUtils;
/** Implements MapReduce locally, in-process, for debugging. */
public class LocalJobRunner implements JobSubmissionProtocol {
public static final Log LOG =
LogFactory.getLog(LocalJobRunner.class);
private FileSystem fs;
private HashMap jobs = new HashMap();
private JobConf conf;
private volatile int map_tasks = 0;
private volatile int reduce_tasks = 0;
private JobTrackerInstrumentation myMetrics = null;
private String runnerLogDir;
private static final String jobDir = "localRunner/";
public static final String LOCALHOST = "127.0.0.1";
public static final String LOCAL_RUNNER_SLOTS = "local.job.tracker.slots";
public static final int DEFAULT_LOCAL_RUNNER_SLOTS = 4;
public long getProtocolVersion(String protocol, long clientVersion) {
return JobSubmissionProtocol.versionID;
}
public ProtocolSignature getProtocolSignature(String protocol,
long clientVersion, int clientMethodsHash) throws IOException {
return ProtocolSignature.getProtocolSignature(
this, protocol, clientVersion, clientMethodsHash);
}
private String computeLogDir() {
GregorianCalendar gc = new GregorianCalendar();
return String.format("local_%1$4d%2$02d%3$02d%4$02d%5$02d%5$02d",
gc.get(Calendar.YEAR), gc.get(Calendar.MONTH) + 1, gc
.get(Calendar.DAY_OF_MONTH), gc.get(Calendar.HOUR_OF_DAY), gc
.get(Calendar.MINUTE), gc.get(Calendar.SECOND))
+ "_"
+ UUID.randomUUID().toString();
}
private class Job extends Thread
implements TaskUmbilicalProtocol {
private JobID id;
private JobConf job;
private JobStatus status;
private volatile int numSucceededMaps = 0;
private ArrayList mapIds = new ArrayList();
private MapOutputFile mapoutputFile;
private JobProfile profile;
private Path localFile;
private FileSystem localFs;
boolean killed = false;
volatile boolean shutdown = false;
boolean doSequential = true;
// Current counters, including incomplete task(s)
private Map currentCounters = new HashMap();
public long getProtocolVersion(String protocol, long clientVersion) {
return TaskUmbilicalProtocol.versionID;
}
public ProtocolSignature getProtocolSignature(String protocol,
long clientVersion, int clientMethodsHash) throws IOException {
return ProtocolSignature.getProtocolSignature(
this, protocol, clientVersion, clientMethodsHash);
}
int numSlots;
// Identifier for task.
int taskCounter = 0;
// A thread pool with as many threads as the number of slots.
ExecutorService executor;
private Map taskJvms = new HashMap();
private Map runningTasks = new HashMap();
Server umbilicalServer;
int umbilicalPort;
class TaskRunnable implements Runnable {
private Task task;
int id;
TaskRunnable(Task task, int id) {
this.task = task;
this.id = id;
}
@Override
public void run() {
try {
Vector args = new Vector();
// Use same jvm as parent.
File jvm =
new File(new File(System.getProperty("java.home"), "bin"), "java");
args.add(jvm.toString());
// Add classpath.
String classPath = System.getProperty("java.class.path", "");
classPath += System.getProperty("path.separator") + currentClassPath();
args.add("-classpath");
args.add(classPath);
long logSize = TaskLog.getTaskLogLength(conf);
// Create a log4j directory for the job.
String logDir = new File(
System.getProperty("hadoop.log.dir")).getAbsolutePath() +
Path.SEPARATOR + runnerLogDir +
Path.SEPARATOR + Job.this.id;
LOG.info("Logs for " + task.getTaskID() + " are at " + logDir);
args.add("-Dhadoop.log.dir=" + logDir);
args.add("-Dhadoop.root.logger=INFO,TLA");
args.add("-Dhadoop.tasklog.taskid=" + task.getTaskID().toString());
args.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize);
// For test code.
if (System.getProperty("test.build.data") != null) {
args.add("-Dtest.build.data=" +
System.getProperty("test.build.data"));
}
// Set java options.
String javaOpts = conf.get(JobConf.MAPRED_TASK_JAVA_OPTS,
JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS);
javaOpts = javaOpts.replace("@taskid@", task.getTaskID().toString());
String [] javaOptsSplit = javaOpts.split(" ");
// Handle java.library.path.
// Do we need current working directory also here?
String libraryPath = System.getProperty("java.library.path");
boolean hasUserLDPath = false;
for(int i=0; i paths = new Stack();
ClassLoader ccl = Thread.currentThread().getContextClassLoader();
while (ccl != null) {
for (URL u: ((URLClassLoader)ccl).getURLs()) {
paths.push(u.getPath());
}
ccl = (URLClassLoader)ccl.getParent();
}
if (!paths.empty()) {
String sep = System.getProperty("path.separator");
StringBuffer appClassPath = new StringBuffer();
while (!paths.empty()) {
if (appClassPath.length() != 0) {
appClassPath.append(sep);
}
appClassPath.append(paths.pop());
}
return appClassPath.toString();
} else {
return "";
}
}
}
public Job(JobID jobid, JobConf conf) throws IOException {
this.doSequential =
conf.getBoolean("mapred.localrunner.sequential", true);
this.id = jobid;
this.mapoutputFile = new MapOutputFile(jobid);
this.mapoutputFile.setConf(conf);
this.localFile = new JobConf(conf).getLocalPath(jobDir+id+".xml");
this.localFs = FileSystem.getLocal(conf);
persistConf(this.localFs, this.localFile, conf);
this.job = new JobConf(localFile);
profile = new JobProfile(job.getUser(), id, localFile.toString(),
"http://localhost:8080/", job.getJobName());
status = new JobStatus(id, 0.0f, 0.0f, JobStatus.RUNNING);
jobs.put(id, this);
numSlots = conf.getInt(LOCAL_RUNNER_SLOTS, DEFAULT_LOCAL_RUNNER_SLOTS);
executor = Executors.newFixedThreadPool(numSlots);
int handlerCount = numSlots;
umbilicalServer =
RPC.getServer(this, LOCALHOST, 0, handlerCount, false, conf);
umbilicalServer.start();
umbilicalPort = umbilicalServer.getListenerAddress().getPort();
this.start();
}
JobProfile getProfile() {
return profile;
}
private void persistConf(FileSystem fs, Path file, JobConf conf)
throws IOException {
new File(file.toUri().getPath()).delete();
FSDataOutputStream out = FileSystem.create(
fs, file, FsPermission.getDefault());
conf.writeXml(out);
out.close();
}
@SuppressWarnings("unchecked")
@Override
public void run() {
JobID jobId = profile.getJobID();
JobContext jContext = new JobContext(conf, jobId);
OutputCommitter outputCommitter = job.getOutputCommitter();
try {
// split input into minimum number of splits
RawSplit[] rawSplits = JobClient.getAndRemoveCachedSplits(jobId);
LOG.info("Found " + rawSplits.length + " raw splits for job " + jobId);
int numReduceTasks = job.getNumReduceTasks();
if (numReduceTasks > 1 || numReduceTasks < 0) {
// we only allow 0 or 1 reducer in local mode
numReduceTasks = 1;
job.setNumReduceTasks(1);
}
outputCommitter.setupJob(jContext);
status.setSetupProgress(1.0f);
for (int i = 0; i < rawSplits.length; i++) {
if (!this.isInterrupted()) {
TaskAttemptID mapId = new TaskAttemptID(new TaskID(jobId, true, i),0);
mapIds.add(mapId);
Path taskJobFile = job.getLocalPath(jobDir + id + "_" + mapId + ".xml");
MapTask map = new MapTask(taskJobFile.toString(),
mapId, i,
rawSplits[i].getClassName(),
rawSplits[i].getBytes(), 1,
job.getUser());
JobConf localConf = new JobConf(job);
map.localizeConfiguration(localConf);
map.setConf(localConf);
persistConf(this.localFs, taskJobFile, localConf);
map.setJobFile(taskJobFile.toUri().getPath());
map_tasks += 1;
myMetrics.launchMap(mapId);
// Special handling for the single mapper case.
if (this.doSequential || rawSplits.length == 1) {
map.run(localConf, this);
numSucceededMaps++;
myMetrics.completeMap(mapId);
map_tasks -= 1;
} else {
runTask(map);
}
} else {
throw new InterruptedException();
}
}
// Wait for all maps to be done.
executor.shutdown();
try {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
} catch (InterruptedException ie) {
LOG.error("Interrupted while waiting mappers to finish");
throw ie;
}
if (numSucceededMaps < rawSplits.length) {
throw new IOException((rawSplits.length - numSucceededMaps) +
" maps failed");
}
TaskAttemptID reduceId =
new TaskAttemptID(new TaskID(jobId, false, 0), 0);
try {
if (numReduceTasks > 0) {
// move map output to reduce input
for (int i = 0; i < mapIds.size(); i++) {
if (!this.isInterrupted()) {
TaskAttemptID mapId = mapIds.get(i);
Path mapOut = this.mapoutputFile.getOutputFile(mapId);
Path reduceIn = this.mapoutputFile.getInputFileForWrite(
mapId.getTaskID(),reduceId,
localFs.getLength(mapOut));
if (!localFs.mkdirs(reduceIn.getParent())) {
throw new IOException("Mkdirs failed to create "
+ reduceIn.getParent().toString());
}
if (!localFs.rename(mapOut, reduceIn))
throw new IOException("Couldn't rename " + mapOut);
} else {
throw new InterruptedException();
}
}
if (!this.isInterrupted()) {
ReduceTask reduce = new ReduceTask(localFile.toString(),
reduceId, 0, mapIds.size(),
1, job.getUser());
JobConf localConf = new JobConf(job);
reduce.localizeConfiguration(localConf);
reduce.setConf(localConf);
persistConf(this.localFs, this.localFile, localConf);
reduce.setJobFile(localFile.toUri().getPath());
reduce_tasks += 1;
myMetrics.launchReduce(reduce.getTaskID());
reduce.run(localConf, this);
myMetrics.completeReduce(reduce.getTaskID());
reduce_tasks -= 1;
updateCounters(reduce.getTaskID(), reduce.getCounters());
} else {
throw new InterruptedException();
}
}
} finally {
for (TaskAttemptID mapId: mapIds) {
this.mapoutputFile.removeAll(mapId);
}
if (numReduceTasks == 1) {
this.mapoutputFile.removeAll(reduceId);
}
}
// delete the temporary directory in output directory
outputCommitter.commitJob(jContext);
status.setCleanupProgress(1.0f);
if (killed) {
this.status.setRunState(JobStatus.KILLED);
} else {
this.status.setRunState(JobStatus.SUCCEEDED);
}
JobEndNotifier.localRunnerNotification(job, status);
} catch (Throwable t) {
try {
outputCommitter.abortJob(jContext, JobStatus.FAILED);
} catch (IOException ioe) {
LOG.info("Error cleaning up job:" + id);
}
status.setCleanupProgress(1.0f);
if (killed) {
this.status.setRunState(JobStatus.KILLED);
} else {
this.status.setRunState(JobStatus.FAILED);
}
LOG.warn(id, t);
JobEndNotifier.localRunnerNotification(job, status);
} finally {
this.shutdown = true;
executor.shutdownNow();
umbilicalServer.stop();
try {
localFs.delete(localFile, true); // delete local copy
} catch (IOException e) {
LOG.warn("Error cleaning up "+id+": "+e);
}
}
}
/**
* Run the given task asynchronously.
*/
void runTask(Task task) {
JobID jobId = task.getJobID();
boolean isMap = task.isMapTask();
JVMId jvmId = new JVMId(jobId, isMap, taskCounter++);
synchronized(this) {
taskJvms.put(jvmId.getId(), jvmId);
runningTasks.put(jvmId.getId(), task);
}
TaskRunnable taskRunnable = new TaskRunnable(task, jvmId.getId());
executor.execute(taskRunnable);
}
// TaskUmbilicalProtocol methods
public JvmTask getTask(JvmContext context) {
int id = context.jvmId.getId();
synchronized(this) {
Task task = runningTasks.get(id);
if (task != null) {
return new JvmTask(task, false);
} else {
return new JvmTask(null, true);
}
}
}
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
LOG.info(taskStatus.getStateString());
float taskIndex = mapIds.indexOf(taskId);
if (taskIndex >= 0) { // mapping
float numTasks = mapIds.size();
status.setMapProgress(taskIndex/numTasks + taskStatus.getProgress()/numTasks);
} else {
status.setReduceProgress(taskStatus.getProgress());
}
Counters taskCounters = taskStatus.getCounters();
if (taskCounters != null) {
updateCounters(taskId, taskCounters);
}
// ignore phase
return true;
}
/**
* Task is reporting that it is in commit_pending
* and it is waiting for the commit Response
*/
public void commitPending(TaskAttemptID taskid,
TaskStatus taskStatus)
throws IOException, InterruptedException {
statusUpdate(taskid, taskStatus);
}
/**
* Updates counters corresponding to tasks.
*/
private void updateCounters(TaskAttemptID taskId, Counters ctrs) {
synchronized(currentCounters) {
currentCounters.put(taskId, ctrs);
}
}
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) {
LOG.error("Task diagnostic info for " + taskid + " : " + trace);
}
public void reportNextRecordRange(TaskAttemptID taskid,
SortedRanges.Range range) throws IOException {
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
}
public boolean ping(TaskAttemptID taskid) throws IOException {
return true;
}
public boolean canCommit(TaskAttemptID taskid)
throws IOException {
return true;
}
public void done(TaskAttemptID taskId) throws IOException {
int taskIndex = mapIds.indexOf(taskId);
if (taskIndex >= 0) { // mapping
status.setMapProgress(1.0f);
} else {
status.setReduceProgress(1.0f);
}
}
public synchronized void fsError(TaskAttemptID taskId, String message)
throws IOException {
LOG.fatal("FSError: "+ message + "from task: " + taskId);
}
public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
}
public synchronized void fatalError(TaskAttemptID taskId, String msg)
throws IOException {
LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
}
public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY,
false);
}
}
public LocalJobRunner(JobConf conf) throws IOException {
this.fs = FileSystem.getLocal(conf);
this.conf = conf;
runnerLogDir = computeLogDir();
myMetrics = new JobTrackerMetricsInst(null, new JobConf(conf));
}
// JobSubmissionProtocol methods
private static int jobid = 0;
public synchronized JobID getNewJobId() {
return new JobID("local", ++jobid);
}
public JobStatus submitJob(JobID jobid) throws IOException {
return new Job(jobid, this.conf).status;
}
public void killJob(JobID id) {
jobs.get(id).killed = true;
jobs.get(id).interrupt();
}
public void setJobPriority(JobID id, String jp) throws IOException {
throw new UnsupportedOperationException("Changing job priority " +
"in LocalJobRunner is not supported.");
}
/** Throws {@link UnsupportedOperationException} */
public boolean killTask(TaskAttemptID taskId, boolean shouldFail) throws IOException {
throw new UnsupportedOperationException("Killing tasks in " +
"LocalJobRunner is not supported");
}
public JobProfile getJobProfile(JobID id) {
Job job = jobs.get(id);
if(job != null)
return job.getProfile();
else
return null;
}
public TaskReport[] getMapTaskReports(JobID id) {
return new TaskReport[0];
}
public TaskReport[] getReduceTaskReports(JobID id) {
return new TaskReport[0];
}
public TaskReport[] getCleanupTaskReports(JobID id) {
return new TaskReport[0];
}
public TaskReport[] getSetupTaskReports(JobID id) {
return new TaskReport[0];
}
public JobStatus getJobStatus(JobID id) {
Job job = jobs.get(id);
if(job != null)
return job.status;
else
return null;
}
public Counters getJobCounters(JobID id) {
Job job = jobs.get(id);
Counters total = new Counters();
synchronized(job.currentCounters) {
for (Counters ctrs: job.currentCounters.values()) {
synchronized(ctrs) {
total.incrAllCounters(ctrs);
}
}
}
return total;
}
public String getFilesystemName() throws IOException {
return fs.getUri().toString();
}
public ClusterStatus getClusterStatus(boolean detailed) {
return new ClusterStatus(1, 0, 0, map_tasks, reduce_tasks, 1, 1,
JobTracker.State.RUNNING);
}
public JobStatus[] jobsToComplete() {return null;}
public TaskCompletionEvent[] getTaskCompletionEvents(JobID jobid
, int fromEventId, int maxEvents) throws IOException {
return TaskCompletionEvent.EMPTY_ARRAY;
}
public JobStatus[] getAllJobs() {return null;}
/**
* Returns the diagnostic information for a particular task in the given job.
* To be implemented
*/
public String[] getTaskDiagnostics(TaskAttemptID taskid)
throws IOException{
return new String [0];
}
/**
* @see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir()
*/
public String getSystemDir() {
Path sysDir = new Path(conf.get("mapred.system.dir", "/tmp/hadoop/mapred/system"));
return fs.makeQualified(sysDir).toString();
}
@Override
public JobStatus[] getJobsFromQueue(String queue) throws IOException {
return null;
}
@Override
public JobQueueInfo[] getQueues() throws IOException {
return null;
}
@Override
public JobQueueInfo getQueueInfo(String queue) throws IOException {
return null;
}
@Override
public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException{
return null;
}
public static class LocalChild {
public static void main(String[] args) throws Throwable {
JobConf defaultConf = new JobConf();
String host = args[0];
int port = Integer.parseInt(args[1]);
InetSocketAddress address = new InetSocketAddress(host, port);
final TaskAttemptID firstTaskid = TaskAttemptID.forName(args[2]);
final int SLEEP_LONGER_COUNT = 5;
int jvmIdInt = Integer.parseInt(args[3]);
JVMId jvmId = new JVMId(firstTaskid.getJobID(),firstTaskid.isMap(),jvmIdInt);
TaskUmbilicalProtocol umbilical =
(TaskUmbilicalProtocol)RPC.getProxy(TaskUmbilicalProtocol.class,
TaskUmbilicalProtocol.versionID,
address,
defaultConf);
String pid = "NONE";
JvmContext context = new JvmContext(jvmId, pid);
Task task = null;
try {
JvmTask myTask = umbilical.getTask(context);
task = myTask.getTask();
if (myTask.shouldDie() || task == null) {
LOG.error("Returning from local child");
System.exit(1);
}
JobConf job = new JobConf(task.getJobFile());
File userLogsDir = TaskLog.getBaseDir(task.getTaskID().toString());
userLogsDir.mkdirs();
System.setOut(new PrintStream(new FileOutputStream(
new File(userLogsDir, "stdout"))));
System.setErr(new PrintStream(new FileOutputStream(
new File(userLogsDir, "stderr"))));
task.setConf(job);
task.run(job, umbilical); // run the task
} catch (Exception exception) {
LOG.error("Got exception " + StringUtils.stringifyException(exception));
try {
if (task != null) {
umbilical.statusUpdate(task.getTaskID(), failedStatus(task));
// do cleanup for the task
task.taskCleanup(umbilical);
}
} catch (Exception e) {
}
System.exit(2);
} catch (Throwable throwable) {
LOG.error("Got throwable " + throwable);
if (task != null) {
Throwable tCause = throwable.getCause();
String cause = tCause == null
? throwable.getMessage()
: StringUtils.stringifyException(tCause);
umbilical.fatalError(task.getTaskID(), cause);
}
System.exit(3);
} finally {
RPC.stopProxy(umbilical);
}
}
}
static TaskStatus failedStatus(Task task) {
TaskStatus taskStatus = (TaskStatus) task.taskStatus.clone();
taskStatus.setRunState(TaskStatus.State.FAILED);
return taskStatus;
}
}