All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapreduce.Job Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.util.ConfigUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The job submitter's view of the Job.
 * 
 * 

It allows the user to configure the * job, submit it, control its execution, and query the state. The set methods * only work until the job is submitted, afterwards they will throw an * IllegalStateException.

* *

* Normally the user creates the application, describes various facets of the * job via {@link Job} and then submits the job and monitor its progress.

* *

Here is an example on how to submit a job:

*

 *     // Create a new Job
 *     Job job = Job.getInstance();
 *     job.setJarByClass(MyJob.class);
 *     
 *     // Specify various job-specific parameters     
 *     job.setJobName("myjob");
 *     
 *     job.setInputPath(new Path("in"));
 *     job.setOutputPath(new Path("out"));
 *     
 *     job.setMapperClass(MyJob.MyMapper.class);
 *     job.setReducerClass(MyJob.MyReducer.class);
 *
 *     // Submit the job, then poll for progress until the job is complete
 *     job.waitForCompletion(true);
 * 
* * */ @InterfaceAudience.Public @InterfaceStability.Evolving public class Job extends JobContextImpl implements JobContext, AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(Job.class); @InterfaceStability.Evolving public enum JobState {DEFINE, RUNNING}; private static final long MAX_JOBSTATUS_AGE = 1000 * 2; public static final String OUTPUT_FILTER = "mapreduce.client.output.filter"; /** Key in mapred-*.xml that sets completionPollInvervalMillis */ public static final String COMPLETION_POLL_INTERVAL_KEY = "mapreduce.client.completion.pollinterval"; /** Default completionPollIntervalMillis is 5000 ms. */ static final int DEFAULT_COMPLETION_POLL_INTERVAL = 5000; /** Key in mapred-*.xml that sets progMonitorPollIntervalMillis */ public static final String PROGRESS_MONITOR_POLL_INTERVAL_KEY = "mapreduce.client.progressmonitor.pollinterval"; /** Default progMonitorPollIntervalMillis is 1000 ms. */ static final int DEFAULT_MONITOR_POLL_INTERVAL = 1000; public static final String USED_GENERIC_PARSER = "mapreduce.client.genericoptionsparser.used"; public static final String SUBMIT_REPLICATION = "mapreduce.client.submit.file.replication"; public static final int DEFAULT_SUBMIT_REPLICATION = 10; public static final String USE_WILDCARD_FOR_LIBJARS = "mapreduce.client.libjars.wildcard"; public static final boolean DEFAULT_USE_WILDCARD_FOR_LIBJARS = true; @InterfaceStability.Evolving public enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL } static { ConfigUtil.loadResources(); } private JobState state = JobState.DEFINE; private JobStatus status; private long statustime; private Cluster cluster; private ReservationId reservationId; /** * @deprecated Use {@link #getInstance()} */ @Deprecated public Job() throws IOException { this(new JobConf(new Configuration())); } /** * @deprecated Use {@link #getInstance(Configuration)} */ @Deprecated public Job(Configuration conf) throws IOException { this(new JobConf(conf)); } /** * @deprecated Use {@link #getInstance(Configuration, String)} */ @Deprecated public Job(Configuration conf, String jobName) throws IOException { this(new JobConf(conf)); setJobName(jobName); } Job(JobConf conf) throws IOException { super(conf, null); // propagate existing user credentials to job this.credentials.mergeAll(this.ugi.getCredentials()); this.cluster = null; } Job(JobStatus status, JobConf conf) throws IOException { this(conf); setJobID(status.getJobID()); this.status = status; state = JobState.RUNNING; } /** * Creates a new {@link Job} with no particular {@link Cluster} . * A Cluster will be created with a generic {@link Configuration}. * * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException */ public static Job getInstance() throws IOException { // create with a null Cluster return getInstance(new Configuration()); } /** * Creates a new {@link Job} with no particular {@link Cluster} and a * given {@link Configuration}. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * A Cluster will be created from the conf parameter only when it's needed. * * @param conf the configuration * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException */ public static Job getInstance(Configuration conf) throws IOException { // create with a null Cluster JobConf jobConf = new JobConf(conf); return new Job(jobConf); } /** * Creates a new {@link Job} with no particular {@link Cluster} and a given jobName. * A Cluster will be created from the conf parameter only when it's needed. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * @param conf the configuration * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException */ public static Job getInstance(Configuration conf, String jobName) throws IOException { // create with a null Cluster Job result = getInstance(conf); result.setJobName(jobName); return result; } /** * Creates a new {@link Job} with no particular {@link Cluster} and given * {@link Configuration} and {@link JobStatus}. * A Cluster will be created from the conf parameter only when it's needed. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * @param status job status * @param conf job configuration * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException */ public static Job getInstance(JobStatus status, Configuration conf) throws IOException { return new Job(status, new JobConf(conf)); } /** * Creates a new {@link Job} with no particular {@link Cluster}. * A Cluster will be created from the conf parameter only when it's needed. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * @param ignored * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException * @deprecated Use {@link #getInstance()} */ @Deprecated public static Job getInstance(Cluster ignored) throws IOException { return getInstance(); } /** * Creates a new {@link Job} with no particular {@link Cluster} and given * {@link Configuration}. * A Cluster will be created from the conf parameter only when it's needed. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * @param ignored * @param conf job configuration * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException * @deprecated Use {@link #getInstance(Configuration)} */ @Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException { return getInstance(conf); } /** * Creates a new {@link Job} with no particular {@link Cluster} and given * {@link Configuration} and {@link JobStatus}. * A Cluster will be created from the conf parameter only when it's needed. * * The Job makes a copy of the Configuration so * that any necessary internal modifications do not reflect on the incoming * parameter. * * @param cluster cluster * @param status job status * @param conf job configuration * @return the {@link Job} , with no connection to a cluster yet. * @throws IOException */ @Private public static Job getInstance(Cluster cluster, JobStatus status, Configuration conf) throws IOException { Job job = getInstance(status, conf); job.setCluster(cluster); return job; } private void ensureState(JobState state) throws IllegalStateException { if (state != this.state) { throw new IllegalStateException("Job in state "+ this.state + " instead of " + state); } if (state == JobState.RUNNING && cluster == null) { throw new IllegalStateException ("Job in state " + this.state + ", but it isn't attached to any job tracker!"); } } /** * Some methods rely on having a recent job status object. Refresh * it, if necessary */ synchronized void ensureFreshStatus() throws IOException { if (System.currentTimeMillis() - statustime > MAX_JOBSTATUS_AGE) { updateStatus(); } } /** Some methods need to update status immediately. So, refresh * immediately * @throws IOException */ synchronized void updateStatus() throws IOException { try { this.status = ugi.doAs(new PrivilegedExceptionAction() { @Override public JobStatus run() throws IOException, InterruptedException { return cluster.getClient().getJobStatus(getJobID()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } if (this.status == null) { throw new IOException("Job status not available "); } this.statustime = System.currentTimeMillis(); } public JobStatus getStatus() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status; } /** * Returns the current state of the Job. * * @return JobStatus#State * @throws IOException * @throws InterruptedException */ public JobStatus.State getJobState() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.getState(); } /** * Get the URL where some job progress information will be displayed. * * @return the URL where some job progress information will be displayed. */ public String getTrackingURL(){ ensureState(JobState.RUNNING); return status.getTrackingUrl().toString(); } /** * Get the path of the submitted job configuration. * * @return the path of the submitted job configuration. */ public String getJobFile() { ensureState(JobState.RUNNING); return status.getJobFile(); } /** * Get start time of the job. * * @return the start time of the job */ public long getStartTime() { ensureState(JobState.RUNNING); return status.getStartTime(); } /** * Get finish time of the job. * * @return the finish time of the job */ public long getFinishTime() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.getFinishTime(); } /** * Get scheduling info of the job. * * @return the scheduling info of the job */ public String getSchedulingInfo() { ensureState(JobState.RUNNING); return status.getSchedulingInfo(); } /** * Get scheduling info of the job. * * @return the priority info of the job */ public JobPriority getPriority() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.getPriority(); } /** * The user-specified job name. */ public String getJobName() { if (state == JobState.DEFINE || status == null) { return super.getJobName(); } ensureState(JobState.RUNNING); return status.getJobName(); } public String getHistoryUrl() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.getHistoryFile(); } public boolean isRetired() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.isRetired(); } @Private public Cluster getCluster() { return cluster; } /** Only for mocks in unit tests. */ @Private private void setCluster(Cluster cluster) { this.cluster = cluster; } /** * Dump stats to screen. */ @Override public String toString() { ensureState(JobState.RUNNING); String reasonforFailure = " "; int numMaps = 0; int numReduces = 0; try { updateStatus(); if (status.getState().equals(JobStatus.State.FAILED)) reasonforFailure = getTaskFailureEventString(); numMaps = getTaskReports(TaskType.MAP).length; numReduces = getTaskReports(TaskType.REDUCE).length; } catch (IOException e) { } catch (InterruptedException ie) { } StringBuffer sb = new StringBuffer(); sb.append("Job: ").append(status.getJobID()).append("\n"); sb.append("Job File: ").append(status.getJobFile()).append("\n"); sb.append("Job Tracking URL : ").append(status.getTrackingUrl()); sb.append("\n"); sb.append("Uber job : ").append(status.isUber()).append("\n"); sb.append("Number of maps: ").append(numMaps).append("\n"); sb.append("Number of reduces: ").append(numReduces).append("\n"); sb.append("map() completion: "); sb.append(status.getMapProgress()).append("\n"); sb.append("reduce() completion: "); sb.append(status.getReduceProgress()).append("\n"); sb.append("Job state: "); sb.append(status.getState()).append("\n"); sb.append("retired: ").append(status.isRetired()).append("\n"); sb.append("reason for failure: ").append(reasonforFailure); return sb.toString(); } /** * @return taskid which caused job failure * @throws IOException * @throws InterruptedException */ String getTaskFailureEventString() throws IOException, InterruptedException { int failCount = 1; TaskCompletionEvent lastEvent = null; TaskCompletionEvent[] events = ugi.doAs(new PrivilegedExceptionAction() { @Override public TaskCompletionEvent[] run() throws IOException, InterruptedException { return cluster.getClient().getTaskCompletionEvents( status.getJobID(), 0, 10); } }); for (TaskCompletionEvent event : events) { if (event.getStatus().equals(TaskCompletionEvent.Status.FAILED)) { failCount++; lastEvent = event; } } if (lastEvent == null) { return "There are no failed tasks for the job. " + "Job is failed due to some other reason and reason " + "can be found in the logs."; } String[] taskAttemptID = lastEvent.getTaskAttemptId().toString().split("_", 2); String taskID = taskAttemptID[1].substring(0, taskAttemptID[1].length()-2); return (" task " + taskID + " failed " + failCount + " times " + "For details check tasktracker at: " + lastEvent.getTaskTrackerHttp()); } /** * Get the information of the current state of the tasks of a job. * * @param type Type of the task * @return the list of all of the map tips. * @throws IOException */ public TaskReport[] getTaskReports(TaskType type) throws IOException, InterruptedException { ensureState(JobState.RUNNING); final TaskType tmpType = type; return ugi.doAs(new PrivilegedExceptionAction() { public TaskReport[] run() throws IOException, InterruptedException { return cluster.getClient().getTaskReports(getJobID(), tmpType); } }); } /** * Get the progress of the job's map-tasks, as a float between 0.0 * and 1.0. When all map tasks have completed, the function returns 1.0. * * @return the progress of the job's map-tasks. * @throws IOException */ public float mapProgress() throws IOException { ensureState(JobState.RUNNING); ensureFreshStatus(); return status.getMapProgress(); } /** * Get the progress of the job's reduce-tasks, as a float between 0.0 * and 1.0. When all reduce tasks have completed, the function returns 1.0. * * @return the progress of the job's reduce-tasks. * @throws IOException */ public float reduceProgress() throws IOException { ensureState(JobState.RUNNING); ensureFreshStatus(); return status.getReduceProgress(); } /** * Get the progress of the job's cleanup-tasks, as a float between 0.0 * and 1.0. When all cleanup tasks have completed, the function returns 1.0. * * @return the progress of the job's cleanup-tasks. * @throws IOException */ public float cleanupProgress() throws IOException, InterruptedException { ensureState(JobState.RUNNING); ensureFreshStatus(); return status.getCleanupProgress(); } /** * Get the progress of the job's setup-tasks, as a float between 0.0 * and 1.0. When all setup tasks have completed, the function returns 1.0. * * @return the progress of the job's setup-tasks. * @throws IOException */ public float setupProgress() throws IOException { ensureState(JobState.RUNNING); ensureFreshStatus(); return status.getSetupProgress(); } /** * Check if the job is finished or not. * This is a non-blocking call. * * @return true if the job is complete, else false. * @throws IOException */ public boolean isComplete() throws IOException { ensureState(JobState.RUNNING); updateStatus(); return status.isJobComplete(); } /** * Check if the job completed successfully. * * @return true if the job succeeded, else false. * @throws IOException */ public boolean isSuccessful() throws IOException { ensureState(JobState.RUNNING); updateStatus(); return status.getState() == JobStatus.State.SUCCEEDED; } /** * Kill the running job. Blocks until all job tasks have been * killed as well. If the job is no longer running, it simply returns. * * @throws IOException */ public void killJob() throws IOException { ensureState(JobState.RUNNING); try { cluster.getClient().killJob(getJobID()); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Set the priority of a running job. * @param jobPriority the new priority for the job. * @throws IOException */ public void setPriority(JobPriority jobPriority) throws IOException, InterruptedException { if (state == JobState.DEFINE) { if (jobPriority == JobPriority.UNDEFINED_PRIORITY) { conf.setJobPriorityAsInteger(convertPriorityToInteger(jobPriority)); } else { conf.setJobPriority(org.apache.hadoop.mapred.JobPriority .valueOf(jobPriority.name())); } } else { ensureState(JobState.RUNNING); final int tmpPriority = convertPriorityToInteger(jobPriority); ugi.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws IOException, InterruptedException { cluster.getClient() .setJobPriority(getJobID(), Integer.toString(tmpPriority)); return null; } }); } } /** * Set the priority of a running job. * * @param jobPriority * the new priority for the job. * @throws IOException */ public void setPriorityAsInteger(int jobPriority) throws IOException, InterruptedException { if (state == JobState.DEFINE) { conf.setJobPriorityAsInteger(jobPriority); } else { ensureState(JobState.RUNNING); final int tmpPriority = jobPriority; ugi.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws IOException, InterruptedException { cluster.getClient() .setJobPriority(getJobID(), Integer.toString(tmpPriority)); return null; } }); } } private int convertPriorityToInteger(JobPriority jobPriority) { switch (jobPriority) { case VERY_HIGH : return 5; case HIGH : return 4; case NORMAL : return 3; case LOW : return 2; case VERY_LOW : return 1; case DEFAULT : return 0; default: break; } // For UNDEFINED_PRIORITY, we can set it to default for better handling return 0; } /** * Get events indicating completion (success/failure) of component tasks. * * @param startFrom index to start fetching events from * @param numEvents number of events to fetch * @return an array of {@link TaskCompletionEvent}s * @throws IOException */ public TaskCompletionEvent[] getTaskCompletionEvents(final int startFrom, final int numEvents) throws IOException, InterruptedException { ensureState(JobState.RUNNING); return ugi.doAs(new PrivilegedExceptionAction() { @Override public TaskCompletionEvent[] run() throws IOException, InterruptedException { return cluster.getClient().getTaskCompletionEvents(getJobID(), startFrom, numEvents); } }); } /** * Get events indicating completion (success/failure) of component tasks. * * @param startFrom index to start fetching events from * @return an array of {@link org.apache.hadoop.mapred.TaskCompletionEvent}s * @throws IOException */ public org.apache.hadoop.mapred.TaskCompletionEvent[] getTaskCompletionEvents(final int startFrom) throws IOException { try { TaskCompletionEvent[] events = getTaskCompletionEvents(startFrom, 10); org.apache.hadoop.mapred.TaskCompletionEvent[] retEvents = new org.apache.hadoop.mapred.TaskCompletionEvent[events.length]; for (int i = 0; i < events.length; i++) { retEvents[i] = org.apache.hadoop.mapred.TaskCompletionEvent.downgrade (events[i]); } return retEvents; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Kill indicated task attempt. * @param taskId the id of the task to kill. * @param shouldFail if true the task is failed and added * to failed tasks list, otherwise it is just killed, * w/o affecting job failure status. */ @Private public boolean killTask(final TaskAttemptID taskId, final boolean shouldFail) throws IOException { ensureState(JobState.RUNNING); try { return ugi.doAs(new PrivilegedExceptionAction() { public Boolean run() throws IOException, InterruptedException { return cluster.getClient().killTask(taskId, shouldFail); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Kill indicated task attempt. * * @param taskId the id of the task to be terminated. * @throws IOException */ public void killTask(final TaskAttemptID taskId) throws IOException { killTask(taskId, false); } /** * Fail indicated task attempt. * * @param taskId the id of the task to be terminated. * @throws IOException */ public void failTask(final TaskAttemptID taskId) throws IOException { killTask(taskId, true); } /** * Gets the counters for this job. May return null if the job has been * retired and the job is no longer in the completed job store. * * @return the counters for this job. * @throws IOException */ public Counters getCounters() throws IOException { ensureState(JobState.RUNNING); try { return ugi.doAs(new PrivilegedExceptionAction() { @Override public Counters run() throws IOException, InterruptedException { return cluster.getClient().getJobCounters(getJobID()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Gets the diagnostic messages for a given task attempt. * @param taskid * @return the list of diagnostic messages for the task * @throws IOException */ public String[] getTaskDiagnostics(final TaskAttemptID taskid) throws IOException, InterruptedException { ensureState(JobState.RUNNING); return ugi.doAs(new PrivilegedExceptionAction() { @Override public String[] run() throws IOException, InterruptedException { return cluster.getClient().getTaskDiagnostics(taskid); } }); } /** * Set the number of reduce tasks for the job. * @param tasks the number of reduce tasks * @throws IllegalStateException if the job is submitted */ public void setNumReduceTasks(int tasks) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setNumReduceTasks(tasks); } /** * Set the current working directory for the default file system. * * @param dir the new current working directory. * @throws IllegalStateException if the job is submitted */ public void setWorkingDirectory(Path dir) throws IOException { ensureState(JobState.DEFINE); conf.setWorkingDirectory(dir); } /** * Set the {@link InputFormat} for the job. * @param cls the InputFormat to use * @throws IllegalStateException if the job is submitted */ public void setInputFormatClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class); } /** * Set the {@link OutputFormat} for the job. * @param cls the OutputFormat to use * @throws IllegalStateException if the job is submitted */ public void setOutputFormatClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class); } /** * Set the {@link Mapper} for the job. * @param cls the Mapper to use * @throws IllegalStateException if the job is submitted */ public void setMapperClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(MAP_CLASS_ATTR, cls, Mapper.class); } /** * Set the Jar by finding where a given class came from. * @param cls the example class */ public void setJarByClass(Class cls) { ensureState(JobState.DEFINE); conf.setJarByClass(cls); } /** * Set the job jar */ public void setJar(String jar) { ensureState(JobState.DEFINE); conf.setJar(jar); } /** * Set the reported username for this job. * * @param user the username for this job. */ public void setUser(String user) { ensureState(JobState.DEFINE); conf.setUser(user); } /** * Set the combiner class for the job. * @param cls the combiner to use * @throws IllegalStateException if the job is submitted */ public void setCombinerClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(COMBINE_CLASS_ATTR, cls, Reducer.class); } /** * Set the {@link Reducer} for the job. * @param cls the Reducer to use * @throws IllegalStateException if the job is submitted */ public void setReducerClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class); } /** * Set the {@link Partitioner} for the job. * @param cls the Partitioner to use * @throws IllegalStateException if the job is submitted */ public void setPartitionerClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setClass(PARTITIONER_CLASS_ATTR, cls, Partitioner.class); } /** * Set the key class for the map output data. This allows the user to * specify the map output key class to be different than the final output * value class. * * @param theClass the map output key class. * @throws IllegalStateException if the job is submitted */ public void setMapOutputKeyClass(Class theClass ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setMapOutputKeyClass(theClass); } /** * Set the value class for the map output data. This allows the user to * specify the map output value class to be different than the final output * value class. * * @param theClass the map output value class. * @throws IllegalStateException if the job is submitted */ public void setMapOutputValueClass(Class theClass ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setMapOutputValueClass(theClass); } /** * Set the key class for the job output data. * * @param theClass the key class for the job output data. * @throws IllegalStateException if the job is submitted */ public void setOutputKeyClass(Class theClass ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputKeyClass(theClass); } /** * Set the value class for job outputs. * * @param theClass the value class for job outputs. * @throws IllegalStateException if the job is submitted */ public void setOutputValueClass(Class theClass ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueClass(theClass); } /** * Define the comparator that controls which keys are grouped together * for a single call to combiner, * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted */ public void setCombinerKeyGroupingComparatorClass( Class cls) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setCombinerKeyGroupingComparator(cls); } /** * Define the comparator that controls how the keys are sorted before they * are passed to the {@link Reducer}. * @param cls the raw comparator * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setSortComparatorClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputKeyComparatorClass(cls); } /** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setGroupingComparatorClass(Class cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); } /** * Set the user-specified job name. * * @param name the job's new name. * @throws IllegalStateException if the job is submitted */ public void setJobName(String name) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setJobName(name); } /** * Turn speculative execution on or off for this job. * * @param speculativeExecution true if speculative execution * should be turned on, else false. */ public void setSpeculativeExecution(boolean speculativeExecution) { ensureState(JobState.DEFINE); conf.setSpeculativeExecution(speculativeExecution); } /** * Turn speculative execution on or off for this job for map tasks. * * @param speculativeExecution true if speculative execution * should be turned on for map tasks, * else false. */ public void setMapSpeculativeExecution(boolean speculativeExecution) { ensureState(JobState.DEFINE); conf.setMapSpeculativeExecution(speculativeExecution); } /** * Turn speculative execution on or off for this job for reduce tasks. * * @param speculativeExecution true if speculative execution * should be turned on for reduce tasks, * else false. */ public void setReduceSpeculativeExecution(boolean speculativeExecution) { ensureState(JobState.DEFINE); conf.setReduceSpeculativeExecution(speculativeExecution); } /** * Specify whether job-setup and job-cleanup is needed for the job * * @param needed If true, job-setup and job-cleanup will be * considered from {@link OutputCommitter} * else ignored. */ public void setJobSetupCleanupNeeded(boolean needed) { ensureState(JobState.DEFINE); conf.setBoolean(SETUP_CLEANUP_NEEDED, needed); } /** * Set the given set of archives * @param archives The list of archives that need to be localized */ public void setCacheArchives(URI[] archives) { ensureState(JobState.DEFINE); DistributedCache.setCacheArchives(archives, conf); } /** * Set the given set of files * @param files The list of files that need to be localized */ public void setCacheFiles(URI[] files) { ensureState(JobState.DEFINE); DistributedCache.setCacheFiles(files, conf); } /** * Add a archives to be localized * @param uri The uri of the cache to be localized */ public void addCacheArchive(URI uri) { ensureState(JobState.DEFINE); DistributedCache.addCacheArchive(uri, conf); } /** * Add a file to be localized * @param uri The uri of the cache to be localized */ public void addCacheFile(URI uri) { ensureState(JobState.DEFINE); DistributedCache.addCacheFile(uri, conf); } /** * Add an file path to the current set of classpath entries It adds the file * to cache as well. * * Files added with this method will not be unpacked while being added to the * classpath. * To add archives to classpath, use the {@link #addArchiveToClassPath(Path)} * method instead. * * @param file Path of the file to be added */ public void addFileToClassPath(Path file) throws IOException { ensureState(JobState.DEFINE); DistributedCache.addFileToClassPath(file, conf, file.getFileSystem(conf)); } /** * Add an archive path to the current set of classpath entries. It adds the * archive to cache as well. * * Archive files will be unpacked and added to the classpath * when being distributed. * * @param archive Path of the archive to be added */ public void addArchiveToClassPath(Path archive) throws IOException { ensureState(JobState.DEFINE); DistributedCache.addArchiveToClassPath(archive, conf, archive.getFileSystem(conf)); } /** * Originally intended to enable symlinks, but currently symlinks cannot be * disabled. */ @Deprecated public void createSymlink() { ensureState(JobState.DEFINE); DistributedCache.createSymlink(conf); } /** * Expert: Set the number of maximum attempts that will be made to run a * map task. * * @param n the number of attempts per map task. */ public void setMaxMapAttempts(int n) { ensureState(JobState.DEFINE); conf.setMaxMapAttempts(n); } /** * Expert: Set the number of maximum attempts that will be made to run a * reduce task. * * @param n the number of attempts per reduce task. */ public void setMaxReduceAttempts(int n) { ensureState(JobState.DEFINE); conf.setMaxReduceAttempts(n); } /** * Set whether the system should collect profiler information for some of * the tasks in this job? The information is stored in the user log * directory. * @param newValue true means it should be gathered */ public void setProfileEnabled(boolean newValue) { ensureState(JobState.DEFINE); conf.setProfileEnabled(newValue); } /** * Set the profiler configuration arguments. If the string contains a '%s' it * will be replaced with the name of the profiling output file when the task * runs. * * This value is passed to the task child JVM on the command line. * * @param value the configuration string */ public void setProfileParams(String value) { ensureState(JobState.DEFINE); conf.setProfileParams(value); } /** * Set the ranges of maps or reduces to profile. setProfileEnabled(true) * must also be called. * @param newValue a set of integer ranges of the map ids */ public void setProfileTaskRange(boolean isMap, String newValue) { ensureState(JobState.DEFINE); conf.setProfileTaskRange(isMap, newValue); } private void ensureNotSet(String attr, String msg) throws IOException { if (conf.get(attr) != null) { throw new IOException(attr + " is incompatible with " + msg + " mode."); } } /** * Sets the flag that will allow the JobTracker to cancel the HDFS delegation * tokens upon job completion. Defaults to true. */ public void setCancelDelegationTokenUponJobCompletion(boolean value) { ensureState(JobState.DEFINE); conf.setBoolean(JOB_CANCEL_DELEGATION_TOKEN, value); } /** * Default to the new APIs unless they are explicitly set or the old mapper or * reduce attributes are used. * @throws IOException if the configuration is inconsistent */ private void setUseNewAPI() throws IOException { int numReduces = conf.getNumReduceTasks(); String oldMapperClass = "mapred.mapper.class"; String oldReduceClass = "mapred.reducer.class"; conf.setBooleanIfUnset("mapred.mapper.new-api", conf.get(oldMapperClass) == null); if (conf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet("mapred.input.format.class", mode); ensureNotSet(oldMapperClass, mode); if (numReduces != 0) { ensureNotSet("mapred.partitioner.class", mode); } else { ensureNotSet("mapred.output.format.class", mode); } } else { String mode = "map compatibility"; ensureNotSet(INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(MAP_CLASS_ATTR, mode); if (numReduces != 0) { ensureNotSet(PARTITIONER_CLASS_ATTR, mode); } else { ensureNotSet(OUTPUT_FORMAT_CLASS_ATTR, mode); } } if (numReduces != 0) { conf.setBooleanIfUnset("mapred.reducer.new-api", conf.get(oldReduceClass) == null); if (conf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet("mapred.output.format.class", mode); ensureNotSet(oldReduceClass, mode); } else { String mode = "reduce compatibility"; ensureNotSet(OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(REDUCE_CLASS_ATTR, mode); } } } /** * Add a file to job config for shared cache processing. If shared cache is * enabled, it will return true, otherwise, return false. We don't check with * SCM here given application might not be able to provide the job id; * ClientSCMProtocol.use requires the application id. Job Submitter will read * the files from job config and take care of things. * * @param resource The resource that Job Submitter will process later using * shared cache. * @param conf Configuration to add the resource to * @return whether the resource has been added to the configuration */ @Unstable public static boolean addFileToSharedCache(URI resource, Configuration conf) { SharedCacheConfig scConfig = new SharedCacheConfig(); scConfig.init(conf); if (scConfig.isSharedCacheFilesEnabled()) { String files = conf.get(MRJobConfig.FILES_FOR_SHARED_CACHE); conf.set( MRJobConfig.FILES_FOR_SHARED_CACHE, files == null ? resource.toString() : files + "," + resource.toString()); return true; } else { return false; } } /** * Add a file to job config for shared cache processing. If shared cache is * enabled, it will return true, otherwise, return false. We don't check with * SCM here given application might not be able to provide the job id; * ClientSCMProtocol.use requires the application id. Job Submitter will read * the files from job config and take care of things. Job Submitter will also * add the file to classpath. Intended to be used by user code. * * @param resource The resource that Job Submitter will process later using * shared cache. * @param conf Configuration to add the resource to * @return whether the resource has been added to the configuration */ @Unstable public static boolean addFileToSharedCacheAndClasspath(URI resource, Configuration conf) { SharedCacheConfig scConfig = new SharedCacheConfig(); scConfig.init(conf); if (scConfig.isSharedCacheLibjarsEnabled()) { String files = conf.get(MRJobConfig.FILES_FOR_CLASSPATH_AND_SHARED_CACHE); conf.set( MRJobConfig.FILES_FOR_CLASSPATH_AND_SHARED_CACHE, files == null ? resource.toString() : files + "," + resource.toString()); return true; } else { return false; } } /** * Add an archive to job config for shared cache processing. If shared cache * is enabled, it will return true, otherwise, return false. We don't check * with SCM here given application might not be able to provide the job id; * ClientSCMProtocol.use requires the application id. Job Submitter will read * the files from job config and take care of things. Intended to be used by * user code. * * @param resource The resource that Job Submitter will process later using * shared cache. * @param conf Configuration to add the resource to * @return whether the resource has been added to the configuration */ @Unstable public static boolean addArchiveToSharedCache(URI resource, Configuration conf) { SharedCacheConfig scConfig = new SharedCacheConfig(); scConfig.init(conf); if (scConfig.isSharedCacheArchivesEnabled()) { String files = conf.get(MRJobConfig.ARCHIVES_FOR_SHARED_CACHE); conf.set( MRJobConfig.ARCHIVES_FOR_SHARED_CACHE, files == null ? resource.toString() : files + "," + resource.toString()); return true; } else { return false; } } /** * This is to set the shared cache upload policies for files. If the parameter * was previously set, this method will replace the old value with the new * provided map. * * @param conf Configuration which stores the shared cache upload policies * @param policies A map containing the shared cache upload policies for a set * of resources. The key is the url of the resource and the value is * the upload policy. True if it should be uploaded, false otherwise. */ @Unstable public static void setFileSharedCacheUploadPolicies(Configuration conf, Map policies) { setSharedCacheUploadPolicies(conf, policies, true); } /** * This is to set the shared cache upload policies for archives. If the * parameter was previously set, this method will replace the old value with * the new provided map. * * @param conf Configuration which stores the shared cache upload policies * @param policies A map containing the shared cache upload policies for a set * of resources. The key is the url of the resource and the value is * the upload policy. True if it should be uploaded, false otherwise. */ @Unstable public static void setArchiveSharedCacheUploadPolicies(Configuration conf, Map policies) { setSharedCacheUploadPolicies(conf, policies, false); } // We use a double colon because a colon is a reserved character in a URI and // there should not be two colons next to each other. private static final String DELIM = "::"; /** * Set the shared cache upload policies config parameter. This is done by * serializing the provided map of shared cache upload policies into a config * parameter. If the parameter was previously set, this method will replace * the old value with the new provided map. * * @param conf Configuration which stores the shared cache upload policies * @param policies A map containing the shared cache upload policies for a set * of resources. The key is the url of the resource and the value is * the upload policy. True if it should be uploaded, false otherwise. * @param areFiles True if these policies are for files, false if they are for * archives. */ private static void setSharedCacheUploadPolicies(Configuration conf, Map policies, boolean areFiles) { if (policies != null) { StringBuilder sb = new StringBuilder(); Iterator> it = policies.entrySet().iterator(); Map.Entry e; if (it.hasNext()) { e = it.next(); sb.append(e.getKey() + DELIM + e.getValue()); } else { // policies is an empty map, just skip setting the parameter return; } while (it.hasNext()) { e = it.next(); sb.append("," + e.getKey() + DELIM + e.getValue()); } String confParam = areFiles ? MRJobConfig.CACHE_FILES_SHARED_CACHE_UPLOAD_POLICIES : MRJobConfig.CACHE_ARCHIVES_SHARED_CACHE_UPLOAD_POLICIES; conf.set(confParam, sb.toString()); } } /** * Deserialize a map of shared cache upload policies from a config parameter. * * @param conf Configuration which stores the shared cache upload policies * @param areFiles True if these policies are for files, false if they are for * archives. * @return A map containing the shared cache upload policies for a set of * resources. The key is the url of the resource and the value is the * upload policy. True if it should be uploaded, false otherwise. */ private static Map getSharedCacheUploadPolicies( Configuration conf, boolean areFiles) { String confParam = areFiles ? MRJobConfig.CACHE_FILES_SHARED_CACHE_UPLOAD_POLICIES : MRJobConfig.CACHE_ARCHIVES_SHARED_CACHE_UPLOAD_POLICIES; Collection policies = conf.getStringCollection(confParam); String[] policy; Map policyMap = new LinkedHashMap(); for (String s : policies) { policy = s.split(DELIM); if (policy.length != 2) { LOG.error(confParam + " is mis-formatted, returning empty shared cache upload policies." + " Error on [" + s + "]"); return new LinkedHashMap(); } policyMap.put(policy[0], Boolean.parseBoolean(policy[1])); } return policyMap; } /** * This is to get the shared cache upload policies for files. * * @param conf Configuration which stores the shared cache upload policies * @return A map containing the shared cache upload policies for a set of * resources. The key is the url of the resource and the value is the * upload policy. True if it should be uploaded, false otherwise. */ @Unstable public static Map getFileSharedCacheUploadPolicies( Configuration conf) { return getSharedCacheUploadPolicies(conf, true); } /** * This is to get the shared cache upload policies for archives. * * @param conf Configuration which stores the shared cache upload policies * @return A map containing the shared cache upload policies for a set of * resources. The key is the url of the resource and the value is the * upload policy. True if it should be uploaded, false otherwise. */ @Unstable public static Map getArchiveSharedCacheUploadPolicies( Configuration conf) { return getSharedCacheUploadPolicies(conf, false); } private synchronized void connect() throws IOException, InterruptedException, ClassNotFoundException { if (cluster == null) { cluster = ugi.doAs(new PrivilegedExceptionAction() { public Cluster run() throws IOException, InterruptedException, ClassNotFoundException { return new Cluster(getConfiguration()); } }); } } boolean isConnected() { return cluster != null; } /** Only for mocking via unit tests. */ @Private public JobSubmitter getJobSubmitter(FileSystem fs, ClientProtocol submitClient) throws IOException { return new JobSubmitter(fs, submitClient); } /** * Submit the job to the cluster and return immediately. * @throws IOException */ public void submit() throws IOException, InterruptedException, ClassNotFoundException { ensureState(JobState.DEFINE); setUseNewAPI(); connect(); final JobSubmitter submitter = getJobSubmitter(cluster.getFileSystem(), cluster.getClient()); status = ugi.doAs(new PrivilegedExceptionAction() { public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException { return submitter.submitJobInternal(Job.this, cluster); } }); state = JobState.RUNNING; LOG.info("The url to track the job: " + getTrackingURL()); } /** * Submit the job to the cluster and wait for it to finish. * @param verbose print the progress to the user * @return true if the job succeeded * @throws IOException thrown if the communication with the * JobTracker is lost */ public boolean waitForCompletion(boolean verbose ) throws IOException, InterruptedException, ClassNotFoundException { if (state == JobState.DEFINE) { submit(); } if (verbose) { monitorAndPrintJob(); } else { // get the completion poll interval from the client. int completionPollIntervalMillis = Job.getCompletionPollInterval(cluster.getConf()); while (!isComplete()) { try { Thread.sleep(completionPollIntervalMillis); } catch (InterruptedException ie) { } } } return isSuccessful(); } /** * Monitor a job and print status in real-time as progress is made and tasks * fail. * @return true if the job succeeded * @throws IOException if communication to the JobTracker fails */ public boolean monitorAndPrintJob() throws IOException, InterruptedException { String lastReport = null; Job.TaskStatusFilter filter; Configuration clientConf = getConfiguration(); filter = Job.getTaskOutputFilter(clientConf); JobID jobId = getJobID(); LOG.info("Running job: " + jobId); int eventCounter = 0; boolean profiling = getProfileEnabled(); IntegerRanges mapRanges = getProfileTaskRange(true); IntegerRanges reduceRanges = getProfileTaskRange(false); int progMonitorPollIntervalMillis = Job.getProgressPollInterval(clientConf); /* make sure to report full progress after the job is done */ boolean reportedAfterCompletion = false; boolean reportedUberMode = false; while (!isComplete() || !reportedAfterCompletion) { if (isComplete()) { reportedAfterCompletion = true; } else { Thread.sleep(progMonitorPollIntervalMillis); } if (status.getState() == JobStatus.State.PREP) { continue; } if (!reportedUberMode) { reportedUberMode = true; LOG.info("Job " + jobId + " running in uber mode : " + isUber()); } String report = (" map " + StringUtils.formatPercent(mapProgress(), 0)+ " reduce " + StringUtils.formatPercent(reduceProgress(), 0)); if (!report.equals(lastReport)) { LOG.info(report); lastReport = report; } TaskCompletionEvent[] events = getTaskCompletionEvents(eventCounter, 10); eventCounter += events.length; printTaskEvents(events, filter, profiling, mapRanges, reduceRanges); } boolean success = isSuccessful(); if (success) { LOG.info("Job " + jobId + " completed successfully"); } else { LOG.info("Job " + jobId + " failed with state " + status.getState() + " due to: " + status.getFailureInfo()); } Counters counters = getCounters(); if (counters != null) { LOG.info(counters.toString()); } return success; } private void printTaskEvents(TaskCompletionEvent[] events, Job.TaskStatusFilter filter, boolean profiling, IntegerRanges mapRanges, IntegerRanges reduceRanges) throws IOException, InterruptedException { for (TaskCompletionEvent event : events) { switch (filter) { case NONE: break; case SUCCEEDED: if (event.getStatus() == TaskCompletionEvent.Status.SUCCEEDED) { LOG.info(event.toString()); } break; case FAILED: if (event.getStatus() == TaskCompletionEvent.Status.FAILED) { LOG.info(event.toString()); // Displaying the task diagnostic information TaskAttemptID taskId = event.getTaskAttemptId(); String[] taskDiagnostics = getTaskDiagnostics(taskId); if (taskDiagnostics != null) { for (String diagnostics : taskDiagnostics) { System.err.println(diagnostics); } } } break; case KILLED: if (event.getStatus() == TaskCompletionEvent.Status.KILLED){ LOG.info(event.toString()); } break; case ALL: LOG.info(event.toString()); break; } } } /** The interval at which monitorAndPrintJob() prints status */ public static int getProgressPollInterval(Configuration conf) { // Read progress monitor poll interval from config. Default is 1 second. int progMonitorPollIntervalMillis = conf.getInt( PROGRESS_MONITOR_POLL_INTERVAL_KEY, DEFAULT_MONITOR_POLL_INTERVAL); if (progMonitorPollIntervalMillis < 1) { LOG.warn(PROGRESS_MONITOR_POLL_INTERVAL_KEY + " has been set to an invalid value; " + " replacing with " + DEFAULT_MONITOR_POLL_INTERVAL); progMonitorPollIntervalMillis = DEFAULT_MONITOR_POLL_INTERVAL; } return progMonitorPollIntervalMillis; } /** The interval at which waitForCompletion() should check. */ public static int getCompletionPollInterval(Configuration conf) { int completionPollIntervalMillis = conf.getInt( COMPLETION_POLL_INTERVAL_KEY, DEFAULT_COMPLETION_POLL_INTERVAL); if (completionPollIntervalMillis < 1) { LOG.warn(COMPLETION_POLL_INTERVAL_KEY + " has been set to an invalid value; " + "replacing with " + DEFAULT_COMPLETION_POLL_INTERVAL); completionPollIntervalMillis = DEFAULT_COMPLETION_POLL_INTERVAL; } return completionPollIntervalMillis; } /** * Get the task output filter. * * @param conf the configuration. * @return the filter level. */ public static TaskStatusFilter getTaskOutputFilter(Configuration conf) { return TaskStatusFilter.valueOf(conf.get(Job.OUTPUT_FILTER, "FAILED")); } /** * Modify the Configuration to set the task output filter. * * @param conf the Configuration to modify. * @param newValue the value to set. */ public static void setTaskOutputFilter(Configuration conf, TaskStatusFilter newValue) { conf.set(Job.OUTPUT_FILTER, newValue.toString()); } public boolean isUber() throws IOException, InterruptedException { ensureState(JobState.RUNNING); updateStatus(); return status.isUber(); } /** * Get the reservation to which the job is submitted to, if any * * @return the reservationId the identifier of the job's reservation, null if * the job does not have any reservation associated with it */ public ReservationId getReservationId() { return reservationId; } /** * Set the reservation to which the job is submitted to * * @param reservationId the reservationId to set */ public void setReservationId(ReservationId reservationId) { this.reservationId = reservationId; } /** * Close the Job. * @throws IOException if fail to close. */ @Override public void close() throws IOException { if (cluster != null) { cluster.close(); cluster = null; } } }