org.apache.hadoop.mapreduce.Job Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.util.ConfigUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The job submitter's view of the Job.
 * 
 * It allows the user to configure the
 * job, submit it, control its execution, and query the state. The set methods
 * only work until the job is submitted, afterwards they will throw an 
 * IllegalStateException. 
 * 
 * 
 * Normally the user creates the application, describes various facets of the
 * job via {@link Job} and then submits the job and monitor its progress.
 * 
 * Here is an example on how to submit a job:
 * 
 *     // Create a new Job
 *     Job job = Job.getInstance();
 *     job.setJarByClass(MyJob.class);
 *     
 *     // Specify various job-specific parameters     
 *     job.setJobName("myjob");
 *     
 *     job.setInputPath(new Path("in"));
 *     job.setOutputPath(new Path("out"));
 *     
 *     job.setMapperClass(MyJob.MyMapper.class);
 *     job.setReducerClass(MyJob.MyReducer.class);
 *
 *     // Submit the job, then poll for progress until the job is complete
 *     job.waitForCompletion(true);
 * 
 * 
 * 
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class Job extends JobContextImpl implements JobContext, AutoCloseable {
  private static final Logger LOG = LoggerFactory.getLogger(Job.class);

  @InterfaceStability.Evolving
  public enum JobState {DEFINE, RUNNING};
  private static final long MAX_JOBSTATUS_AGE = 1000 * 2;
  public static final String OUTPUT_FILTER = "mapreduce.client.output.filter";
  /** Key in mapred-*.xml that sets completionPollInvervalMillis */
  public static final String COMPLETION_POLL_INTERVAL_KEY = 
    "mapreduce.client.completion.pollinterval";
  
  /** Default completionPollIntervalMillis is 5000 ms. */
  static final int DEFAULT_COMPLETION_POLL_INTERVAL = 5000;
  /** Key in mapred-*.xml that sets progMonitorPollIntervalMillis */
  public static final String PROGRESS_MONITOR_POLL_INTERVAL_KEY =
    "mapreduce.client.progressmonitor.pollinterval";
  /** Default progMonitorPollIntervalMillis is 1000 ms. */
  static final int DEFAULT_MONITOR_POLL_INTERVAL = 1000;

  public static final String USED_GENERIC_PARSER = 
      "mapreduce.client.genericoptionsparser.used";
  public static final String SUBMIT_REPLICATION = 
      "mapreduce.client.submit.file.replication";
  public static final int DEFAULT_SUBMIT_REPLICATION = 10;
  public static final String USE_WILDCARD_FOR_LIBJARS =
      "mapreduce.client.libjars.wildcard";
  public static final boolean DEFAULT_USE_WILDCARD_FOR_LIBJARS = true;

  @InterfaceStability.Evolving
  public enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }

  static {
    ConfigUtil.loadResources();
  }

  private JobState state = JobState.DEFINE;
  private JobStatus status;
  private long statustime;
  private Cluster cluster;
  private ReservationId reservationId;

  /**
   * @deprecated Use {@link #getInstance()}
   */
  @Deprecated
  public Job() throws IOException {
    this(new JobConf(new Configuration()));
  }

  /**
   * @deprecated Use {@link #getInstance(Configuration)}
   */
  @Deprecated
  public Job(Configuration conf) throws IOException {
    this(new JobConf(conf));
  }

  /**
   * @deprecated Use {@link #getInstance(Configuration, String)}
   */
  @Deprecated
  public Job(Configuration conf, String jobName) throws IOException {
    this(new JobConf(conf));
    setJobName(jobName);
  }

  Job(JobConf conf) throws IOException {
    super(conf, null);
    // propagate existing user credentials to job
    this.credentials.mergeAll(this.ugi.getCredentials());
    this.cluster = null;
  }

  Job(JobStatus status, JobConf conf) throws IOException {
    this(conf);
    setJobID(status.getJobID());
    this.status = status;
    state = JobState.RUNNING;
  }

      
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} .
   * A Cluster will be created with a generic {@link Configuration}.
   * 
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   */
  public static Job getInstance() throws IOException {
    // create with a null Cluster
    return getInstance(new Configuration());
  }
      
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} and a 
   * given {@link Configuration}.
   * 
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * A Cluster will be created from the conf parameter only when it's needed.
   * 
   * @param conf the configuration
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   */
  public static Job getInstance(Configuration conf) throws IOException {
    // create with a null Cluster
    JobConf jobConf = new JobConf(conf);
    return new Job(jobConf);
  }

      
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} and a given jobName.
   * A Cluster will be created from the conf parameter only when it's needed.
   *
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * @param conf the configuration
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   */
  public static Job getInstance(Configuration conf, String jobName)
           throws IOException {
    // create with a null Cluster
    Job result = getInstance(conf);
    result.setJobName(jobName);
    return result;
  }
  
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} and given
   * {@link Configuration} and {@link JobStatus}.
   * A Cluster will be created from the conf parameter only when it's needed.
   * 
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * @param status job status
   * @param conf job configuration
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   */
  public static Job getInstance(JobStatus status, Configuration conf) 
  throws IOException {
    return new Job(status, new JobConf(conf));
  }

  /**
   * Creates a new {@link Job} with no particular {@link Cluster}.
   * A Cluster will be created from the conf parameter only when it's needed.
   *
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * @param ignored
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   * @deprecated Use {@link #getInstance()}
   */
  @Deprecated
  public static Job getInstance(Cluster ignored) throws IOException {
    return getInstance();
  }
  
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} and given
   * {@link Configuration}.
   * A Cluster will be created from the conf parameter only when it's needed.
   * 
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * @param ignored
   * @param conf job configuration
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   * @deprecated Use {@link #getInstance(Configuration)}
   */
  @Deprecated
  public static Job getInstance(Cluster ignored, Configuration conf) 
      throws IOException {
    return getInstance(conf);
  }
  
  /**
   * Creates a new {@link Job} with no particular {@link Cluster} and given
   * {@link Configuration} and {@link JobStatus}.
   * A Cluster will be created from the conf parameter only when it's needed.
   * 
   * The Job makes a copy of the Configuration so 
   * that any necessary internal modifications do not reflect on the incoming 
   * parameter.
   * 
   * @param cluster cluster
   * @param status job status
   * @param conf job configuration
   * @return the {@link Job} , with no connection to a cluster yet.
   * @throws IOException
   */
  @Private
  public static Job getInstance(Cluster cluster, JobStatus status, 
      Configuration conf) throws IOException {
    Job job = getInstance(status, conf);
    job.setCluster(cluster);
    return job;
  }

  private void ensureState(JobState state) throws IllegalStateException {
    if (state != this.state) {
      throw new IllegalStateException("Job in state "+ this.state + 
                                      " instead of " + state);
    }

    if (state == JobState.RUNNING && cluster == null) {
      throw new IllegalStateException
        ("Job in state " + this.state
         + ", but it isn't attached to any job tracker!");
    }
  }

  /**
   * Some methods rely on having a recent job status object.  Refresh
   * it, if necessary
   */
  synchronized void ensureFreshStatus() 
      throws IOException {
    if (System.currentTimeMillis() - statustime > MAX_JOBSTATUS_AGE) {
      updateStatus();
    }
  }
    
  /** Some methods need to update status immediately. So, refresh
   * immediately
   * @throws IOException
   */
  synchronized void updateStatus() throws IOException {
    try {
      this.status = ugi.doAs(new PrivilegedExceptionAction() {
        @Override
        public JobStatus run() throws IOException, InterruptedException {
          return cluster.getClient().getJobStatus(getJobID());
        }
      });
    }
    catch (InterruptedException ie) {
      throw new IOException(ie);
    }
    if (this.status == null) {
      throw new IOException("Job status not available ");
    }
    this.statustime = System.currentTimeMillis();
  }
  
  public JobStatus getStatus() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status;
  }

  /**
   * Returns the current state of the Job.
   * 
   * @return JobStatus#State
   * @throws IOException
   * @throws InterruptedException
   */
  public JobStatus.State getJobState() 
      throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.getState();
  }
  
  /**
   * Get the URL where some job progress information will be displayed.
   * 
   * @return the URL where some job progress information will be displayed.
   */
  public String getTrackingURL(){
    ensureState(JobState.RUNNING);
    return status.getTrackingUrl().toString();
  }

  /**
   * Get the path of the submitted job configuration.
   * 
   * @return the path of the submitted job configuration.
   */
  public String getJobFile() {
    ensureState(JobState.RUNNING);
    return status.getJobFile();
  }

  /**
   * Get start time of the job.
   * 
   * @return the start time of the job
   */
  public long getStartTime() {
    ensureState(JobState.RUNNING);
    return status.getStartTime();
  }

  /**
   * Get finish time of the job.
   * 
   * @return the finish time of the job
   */
  public long getFinishTime() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.getFinishTime();
  }

  /**
   * Get scheduling info of the job.
   * 
   * @return the scheduling info of the job
   */
  public String getSchedulingInfo() {
    ensureState(JobState.RUNNING);
    return status.getSchedulingInfo();
  }

  /**
   * Get scheduling info of the job.
   * 
   * @return the priority info of the job
   */
  public JobPriority getPriority() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.getPriority();
  }

  /**
   * The user-specified job name.
   */
  public String getJobName() {
    if (state == JobState.DEFINE || status == null) {
      return super.getJobName();
    }
    ensureState(JobState.RUNNING);
    return status.getJobName();
  }

  public String getHistoryUrl() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.getHistoryFile();
  }

  public boolean isRetired() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.isRetired();
  }
  
  @Private
  public Cluster getCluster() {
    return cluster;
  }

  /** Only for mocks in unit tests. */
  @Private
  private void setCluster(Cluster cluster) {
    this.cluster = cluster;
  }

  /**
   * Dump stats to screen.
   */
  @Override
  public String toString() {
    ensureState(JobState.RUNNING);
    String reasonforFailure = " ";
    int numMaps = 0;
    int numReduces = 0;
    try {
      updateStatus();
      if (status.getState().equals(JobStatus.State.FAILED))
        reasonforFailure = getTaskFailureEventString();
      numMaps = getTaskReports(TaskType.MAP).length;
      numReduces = getTaskReports(TaskType.REDUCE).length;
    } catch (IOException e) {
    } catch (InterruptedException ie) {
    }
    StringBuffer sb = new StringBuffer();
    sb.append("Job: ").append(status.getJobID()).append("\n");
    sb.append("Job File: ").append(status.getJobFile()).append("\n");
    sb.append("Job Tracking URL : ").append(status.getTrackingUrl());
    sb.append("\n");
    sb.append("Uber job : ").append(status.isUber()).append("\n");
    sb.append("Number of maps: ").append(numMaps).append("\n");
    sb.append("Number of reduces: ").append(numReduces).append("\n");
    sb.append("map() completion: ");
    sb.append(status.getMapProgress()).append("\n");
    sb.append("reduce() completion: ");
    sb.append(status.getReduceProgress()).append("\n");
    sb.append("Job state: ");
    sb.append(status.getState()).append("\n");
    sb.append("retired: ").append(status.isRetired()).append("\n");
    sb.append("reason for failure: ").append(reasonforFailure);
    return sb.toString();
  }

  /**
   * @return taskid which caused job failure
   * @throws IOException
   * @throws InterruptedException
   */
  String getTaskFailureEventString() throws IOException,
      InterruptedException {
    int failCount = 1;
    TaskCompletionEvent lastEvent = null;
    TaskCompletionEvent[] events = ugi.doAs(new 
        PrivilegedExceptionAction() {
          @Override
          public TaskCompletionEvent[] run() throws IOException,
          InterruptedException {
            return cluster.getClient().getTaskCompletionEvents(
                status.getJobID(), 0, 10);
          }
        });
    for (TaskCompletionEvent event : events) {
      if (event.getStatus().equals(TaskCompletionEvent.Status.FAILED)) {
        failCount++;
        lastEvent = event;
      }
    }
    if (lastEvent == null) {
      return "There are no failed tasks for the job. "
          + "Job is failed due to some other reason and reason "
          + "can be found in the logs.";
    }
    String[] taskAttemptID = lastEvent.getTaskAttemptId().toString().split("_", 2);
    String taskID = taskAttemptID[1].substring(0, taskAttemptID[1].length()-2);
    return (" task " + taskID + " failed " +
      failCount + " times " + "For details check tasktracker at: " +
      lastEvent.getTaskTrackerHttp());
  }

  /**
   * Get the information of the current state of the tasks of a job.
   * 
   * @param type Type of the task
   * @return the list of all of the map tips.
   * @throws IOException
   */
  public TaskReport[] getTaskReports(TaskType type) 
      throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    final TaskType tmpType = type;
    return ugi.doAs(new PrivilegedExceptionAction() {
      public TaskReport[] run() throws IOException, InterruptedException {
        return cluster.getClient().getTaskReports(getJobID(), tmpType);
      }
    });
  }

  /**
   * Get the progress of the job's map-tasks, as a float between 0.0 
   * and 1.0.  When all map tasks have completed, the function returns 1.0.
   * 
   * @return the progress of the job's map-tasks.
   * @throws IOException
   */
  public float mapProgress() throws IOException {
    ensureState(JobState.RUNNING);
    ensureFreshStatus();
    return status.getMapProgress();
  }

  /**
   * Get the progress of the job's reduce-tasks, as a float between 0.0 
   * and 1.0.  When all reduce tasks have completed, the function returns 1.0.
   * 
   * @return the progress of the job's reduce-tasks.
   * @throws IOException
   */
  public float reduceProgress() throws IOException {
    ensureState(JobState.RUNNING);
    ensureFreshStatus();
    return status.getReduceProgress();
  }

  /**
   * Get the progress of the job's cleanup-tasks, as a float between 0.0 
   * and 1.0.  When all cleanup tasks have completed, the function returns 1.0.
   * 
   * @return the progress of the job's cleanup-tasks.
   * @throws IOException
   */
  public float cleanupProgress() throws IOException, InterruptedException {
    ensureState(JobState.RUNNING);
    ensureFreshStatus();
    return status.getCleanupProgress();
  }

  /**
   * Get the progress of the job's setup-tasks, as a float between 0.0 
   * and 1.0.  When all setup tasks have completed, the function returns 1.0.
   * 
   * @return the progress of the job's setup-tasks.
   * @throws IOException
   */
  public float setupProgress() throws IOException {
    ensureState(JobState.RUNNING);
    ensureFreshStatus();
    return status.getSetupProgress();
  }

  /**
   * Check if the job is finished or not. 
   * This is a non-blocking call.
   * 
   * @return true if the job is complete, else false.
   * @throws IOException
   */
  public boolean isComplete() throws IOException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.isJobComplete();
  }

  /**
   * Check if the job completed successfully. 
   * 
   * @return true if the job succeeded, else false.
   * @throws IOException
   */
  public boolean isSuccessful() throws IOException {
    ensureState(JobState.RUNNING);
    updateStatus();
    return status.getState() == JobStatus.State.SUCCEEDED;
  }

  /**
   * Kill the running job.  Blocks until all job tasks have been
   * killed as well.  If the job is no longer running, it simply returns.
   * 
   * @throws IOException
   */
  public void killJob() throws IOException {
    ensureState(JobState.RUNNING);
    try {
      cluster.getClient().killJob(getJobID());
    }
    catch (InterruptedException ie) {
      throw new IOException(ie);
    }
  }

  /**
   * Set the priority of a running job.
   * @param jobPriority the new priority for the job.
   * @throws IOException
   */
  public void setPriority(JobPriority jobPriority) throws IOException,
      InterruptedException {
    if (state == JobState.DEFINE) {
      if (jobPriority == JobPriority.UNDEFINED_PRIORITY) {
        conf.setJobPriorityAsInteger(convertPriorityToInteger(jobPriority));
      } else {
        conf.setJobPriority(org.apache.hadoop.mapred.JobPriority
            .valueOf(jobPriority.name()));
      }
    } else {
      ensureState(JobState.RUNNING);
      final int tmpPriority = convertPriorityToInteger(jobPriority);
      ugi.doAs(new PrivilegedExceptionAction