All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapred.JobClient Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.shaded.org.apache.hadoop.mapred;

import java.org.apache.hadoop.shaded.io.FileNotFoundException;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.net.InetSocketAddress;
import java.org.apache.hadoop.shaded.net.URL;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.Path;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.Text;
import org.apache.hadoop.shaded.org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.ClusterMetrics;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.QueueInfo;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskTrackerInfo;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.tools.CLI;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.util.ConfigUtil;
import org.apache.hadoop.shaded.org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.Token;
import org.apache.hadoop.shaded.org.apache.hadoop.security.token.TokenRenewer;
import org.apache.hadoop.shaded.org.apache.hadoop.util.Tool;
import org.apache.hadoop.shaded.org.apache.hadoop.util.ToolRunner;

/**
 * JobClient is the primary interface for the user-job to interact
 * with the cluster.
 * 
 * JobClient provides facilities to submit jobs, track their 
 * progress, access org.apache.hadoop.shaded.com.onent-tasks' reports/logs, get the Map-Reduce cluster
 * status information etc.
 * 
 * 

The job submission process involves: *

    *
  1. * Checking the input and output specifications of the job. *
  2. *
  3. * Computing the {@link InputSplit}s for the job. *
  4. *
  5. * Setup the requisite accounting information for the {@link DistributedCache} * of the job, if necessary. *
  6. *
  7. * Copying the job's jar and configuration to the map-reduce system directory * on the distributed file-system. *
  8. *
  9. * Submitting the job to the cluster and optionally monitoring * it's status. *
  10. *
* * Normally the user creates the application, describes various facets of the * job via {@link JobConf} and then uses the JobClient to submit * the job and monitor its progress. * *

Here is an example on how to use JobClient:

*

 *     // Create a new JobConf
 *     JobConf job = new JobConf(new Configuration(), MyJob.class);
 *     
 *     // Specify various job-specific parameters     
 *     job.setJobName("myjob");
 *     
 *     job.setInputPath(new Path("in"));
 *     job.setOutputPath(new Path("out"));
 *     
 *     job.setMapperClass(MyJob.MyMapper.class);
 *     job.setReducerClass(MyJob.MyReducer.class);
 *
 *     // Submit the job, then poll for progress until the job is org.apache.hadoop.shaded.com.lete
 *     JobClient.runJob(job);
 * 
* * Job Control * *

At times clients would chain map-reduce jobs to accomplish org.apache.hadoop.shaded.com.lex tasks * which cannot be done via a single map-reduce job. This is fairly easy since * the output of the job, typically, goes to distributed file-system and that * can be used as the input for the next job.

* *

However, this also means that the onus on ensuring jobs are org.apache.hadoop.shaded.com.lete * (success/failure) lies squarely on the clients. In such situations the * various job-control options are: *

    *
  1. * {@link #runJob(JobConf)} : submits the job and returns only after * the job has org.apache.hadoop.shaded.com.leted. *
  2. *
  3. * {@link #submitJob(JobConf)} : only submits the job, then poll the * returned handle to the {@link RunningJob} to query status and make * scheduling decisions. *
  4. *
  5. * {@link JobConf#setJobEndNotificationURI(String)} : setup a notification * on job-org.apache.hadoop.shaded.com.letion, thus avoiding polling. *
  6. *
* * @see JobConf * @see ClusterStatus * @see Tool * @see DistributedCache */ @InterfaceAudience.Public @InterfaceStability.Stable public class JobClient extends CLI implements AutoCloseable { @InterfaceAudience.Private public static final String MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY = "mapreduce.jobclient.retry.policy.enabled"; @InterfaceAudience.Private public static final boolean MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT = false; @InterfaceAudience.Private public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY = "mapreduce.jobclient.retry.policy.spec"; @InterfaceAudience.Private public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT = "10000,6,60000,10"; // t1,n1,t2,n2,... public enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL } private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; private int maxRetry = MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES; private long retryInterval = MRJobConfig.DEFAULT_MR_CLIENT_JOB_RETRY_INTERVAL; static{ ConfigUtil.loadResources(); } /** * A NetworkedJob is an implementation of RunningJob. It holds * a JobProfile object to provide some info, and interacts with the * remote service to provide certain functionality. */ static class NetworkedJob implements RunningJob { Job job; /** * We store a JobProfile and a timestamp for when we last * acquired the job profile. If the job is null, then we cannot * perform any of the tasks. The job might be null if the cluster * has org.apache.hadoop.shaded.com.letely forgotten about the job. (eg, 24 hours after the * job org.apache.hadoop.shaded.com.letes.) */ public NetworkedJob(JobStatus status, Cluster cluster) throws IOException { this(status, cluster, new JobConf(status.getJobFile())); } private NetworkedJob(JobStatus status, Cluster cluster, JobConf conf) throws IOException { this(Job.getInstance(cluster, status, conf)); } public NetworkedJob(Job job) throws IOException { this.job = job; } public Configuration getConfiguration() { return job.getConfiguration(); } /** * An identifier for the job */ public JobID getID() { return JobID.downgrade(job.getJobID()); } /** @deprecated This method is deprecated and will be removed. Applications should * rather use {@link #getID()}.*/ @Deprecated public String getJobID() { return getID().toString(); } /** * The user-specified job name */ public String getJobName() { return job.getJobName(); } /** * The name of the job file */ public String getJobFile() { return job.getJobFile(); } /** * A URL where the job's status can be seen */ public String getTrackingURL() { return job.getTrackingURL(); } /** * A float between 0.0 and 1.0, indicating the % of map work * org.apache.hadoop.shaded.com.leted. */ public float mapProgress() throws IOException { return job.mapProgress(); } /** * A float between 0.0 and 1.0, indicating the % of reduce work * org.apache.hadoop.shaded.com.leted. */ public float reduceProgress() throws IOException { return job.reduceProgress(); } /** * A float between 0.0 and 1.0, indicating the % of cleanup work * org.apache.hadoop.shaded.com.leted. */ public float cleanupProgress() throws IOException { try { return job.cleanupProgress(); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * A float between 0.0 and 1.0, indicating the % of setup work * org.apache.hadoop.shaded.com.leted. */ public float setupProgress() throws IOException { return job.setupProgress(); } /** * Returns immediately whether the whole job is done yet or not. */ public synchronized boolean isComplete() throws IOException { return job.isComplete(); } /** * True iff job org.apache.hadoop.shaded.com.leted successfully. */ public synchronized boolean isSuccessful() throws IOException { return job.isSuccessful(); } /** * Blocks until the job is finished */ public void waitForCompletion() throws IOException { try { job.waitForCompletion(false); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException ce) { throw new IOException(ce); } } /** * Tells the service to get the state of the current job. */ public synchronized int getJobState() throws IOException { try { return job.getJobState().getValue(); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Tells the service to terminate the current job. */ public synchronized void killJob() throws IOException { job.killJob(); } /** Set the priority of the job. * @param priority new priority of the job. */ public synchronized void setJobPriority(String priority) throws IOException { try { job.setPriority( org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobPriority.valueOf(priority)); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Kill indicated task attempt. * @param taskId the id of the task to kill. * @param shouldFail if true the task is failed and added to failed tasks list, otherwise * it is just killed, w/o affecting job failure status. */ public synchronized void killTask(TaskAttemptID taskId, boolean shouldFail) throws IOException { if (shouldFail) { job.failTask(taskId); } else { job.killTask(taskId); } } /** @deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}*/ @Deprecated public synchronized void killTask(String taskId, boolean shouldFail) throws IOException { killTask(TaskAttemptID.forName(taskId), shouldFail); } /** * Fetch task org.apache.hadoop.shaded.com.letion events from cluster for this job. */ public synchronized TaskCompletionEvent[] getTaskCompletionEvents( int startFrom) throws IOException { try { org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskCompletionEvent[] acls = job.getTaskCompletionEvents(startFrom, 10); TaskCompletionEvent[] ret = new TaskCompletionEvent[acls.length]; for (int i = 0 ; i < acls.length; i++ ) { ret[i] = TaskCompletionEvent.downgrade(acls[i]); } return ret; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Dump stats to screen */ @Override public String toString() { return job.toString(); } /** * Returns the counters for this job */ public Counters getCounters() throws IOException { Counters result = null; org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Counters temp = job.getCounters(); if(temp != null) { result = Counters.downgrade(temp); } return result; } @Override public String[] getTaskDiagnostics(TaskAttemptID id) throws IOException { try { return job.getTaskDiagnostics(id); } catch (InterruptedException ie) { throw new IOException(ie); } } public String getHistoryUrl() throws IOException { try { return job.getHistoryUrl(); } catch (InterruptedException ie) { throw new IOException(ie); } } public boolean isRetired() throws IOException { try { return job.isRetired(); } catch (InterruptedException ie) { throw new IOException(ie); } } boolean monitorAndPrintJob() throws IOException, InterruptedException { return job.monitorAndPrintJob(); } @Override public String getFailureInfo() throws IOException { try { return job.getStatus().getFailureInfo(); } catch (InterruptedException ie) { throw new IOException(ie); } } @Override public JobStatus getJobStatus() throws IOException { try { return JobStatus.downgrade(job.getStatus()); } catch (InterruptedException ie) { throw new IOException(ie); } } } /** * Ugi of the client. We store this ugi when the client is created and * then make sure that the same ugi is used to run the various protocols. */ UserGroupInformation clientUgi; /** * Create a job client. */ public JobClient() { } /** * Build a job client with the given {@link JobConf}, and connect to the * default cluster * * @param conf the job configuration. * @throws IOException */ public JobClient(JobConf conf) throws IOException { init(conf); } /** * Build a job client with the given {@link Configuration}, * and connect to the default cluster * * @param conf the configuration. * @throws IOException */ public JobClient(Configuration conf) throws IOException { init(new JobConf(conf)); } /** * Connect to the default cluster * @param conf the job configuration. * @throws IOException */ public void init(JobConf conf) throws IOException { setConf(conf); cluster = new Cluster(conf); clientUgi = UserGroupInformation.getCurrentUser(); maxRetry = conf.getInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES); retryInterval = conf.getLong(MRJobConfig.MR_CLIENT_JOB_RETRY_INTERVAL, MRJobConfig.DEFAULT_MR_CLIENT_JOB_RETRY_INTERVAL); } /** * Build a job client, connect to the indicated job tracker. * * @param jobTrackAddr the job tracker to connect to. * @param conf configuration. */ public JobClient(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException { cluster = new Cluster(jobTrackAddr, conf); clientUgi = UserGroupInformation.getCurrentUser(); } /** * Close the JobClient. */ @Override public synchronized void close() throws IOException { cluster.close(); } /** * Get a filesystem handle. We need this to prepare jobs * for submission to the MapReduce system. * * @return the filesystem handle. */ public synchronized FileSystem getFs() throws IOException { try { return cluster.getFileSystem(); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Get a handle to the Cluster */ public Cluster getClusterHandle() { return cluster; } /** * Submit a job to the MR system. * * This returns a handle to the {@link RunningJob} which can be used to track * the running-job. * * @param jobFile the job configuration. * @return a handle to the {@link RunningJob} which can be used to track the * running-job. * @throws FileNotFoundException * @throws InvalidJobConfException * @throws IOException */ public RunningJob submitJob(String jobFile) throws FileNotFoundException, InvalidJobConfException, IOException { // Load in the submitted job details JobConf job = new JobConf(jobFile); return submitJob(job); } /** * Submit a job to the MR system. * This returns a handle to the {@link RunningJob} which can be used to track * the running-job. * * @param conf the job configuration. * @return a handle to the {@link RunningJob} which can be used to track the * running-job. * @throws FileNotFoundException * @throws IOException */ public RunningJob submitJob(final JobConf conf) throws FileNotFoundException, IOException { return submitJobInternal(conf); } @InterfaceAudience.Private public RunningJob submitJobInternal(final JobConf conf) throws FileNotFoundException, IOException { try { conf.setBooleanIfUnset("mapred.mapper.new-api", false); conf.setBooleanIfUnset("mapred.reducer.new-api", false); Job job = clientUgi.doAs(new PrivilegedExceptionAction () { @Override public Job run() throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(conf); job.submit(); return job; } }); Cluster prev = cluster; // update our Cluster instance with the one created by Job for submission // (we can't pass our Cluster instance to Job, since Job wraps the config // instance, and the two configs would then diverge) cluster = job.getCluster(); // It is important to close the previous cluster instance // to cleanup resources. if (prev != null) { prev.close(); } return new NetworkedJob(job); } catch (InterruptedException ie) { throw new IOException("interrupted", ie); } } private Job getJobUsingCluster(final JobID jobid) throws IOException, InterruptedException { return clientUgi.doAs(new PrivilegedExceptionAction() { public Job run() throws IOException, InterruptedException { return cluster.getJob(jobid); } }); } protected RunningJob getJobInner(final JobID jobid) throws IOException { try { Job job = getJobUsingCluster(jobid); if (job != null) { JobStatus status = JobStatus.downgrade(job.getStatus()); if (status != null) { return new NetworkedJob(status, cluster, new JobConf(job.getConfiguration())); } } } catch (InterruptedException ie) { throw new IOException(ie); } return null; } /** * Get an {@link RunningJob} object to track an ongoing job. Returns * null if the id does not correspond to any known job. * * @param jobid the jobid of the job. * @return the {@link RunningJob} handle to track the job, null if the * jobid doesn't correspond to any known job. * @throws IOException */ public RunningJob getJob(final JobID jobid) throws IOException { for (int i = 0;i <= maxRetry;i++) { if (i > 0) { try { Thread.sleep(retryInterval); } catch (Exception e) { } } RunningJob job = getJobInner(jobid); if (job != null) { return job; } } return null; } /**@deprecated Applications should rather use {@link #getJob(JobID)}. */ @Deprecated public RunningJob getJob(String jobid) throws IOException { return getJob(JobID.forName(jobid)); } private static final TaskReport[] EMPTY_TASK_REPORTS = new TaskReport[0]; /** * Get the information of the current state of the map tasks of a job. * * @param jobId the job to query. * @return the list of all of the map tips. * @throws IOException */ public TaskReport[] getMapTaskReports(JobID jobId) throws IOException { return getTaskReports(jobId, TaskType.MAP); } private TaskReport[] getTaskReports(final JobID jobId, TaskType type) throws IOException { try { Job j = getJobUsingCluster(jobId); if(j == null) { return EMPTY_TASK_REPORTS; } return TaskReport.downgradeArray(j.getTaskReports(type)); } catch (InterruptedException ie) { throw new IOException(ie); } } /**@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}*/ @Deprecated public TaskReport[] getMapTaskReports(String jobId) throws IOException { return getMapTaskReports(JobID.forName(jobId)); } /** * Get the information of the current state of the reduce tasks of a job. * * @param jobId the job to query. * @return the list of all of the reduce tips. * @throws IOException */ public TaskReport[] getReduceTaskReports(JobID jobId) throws IOException { return getTaskReports(jobId, TaskType.REDUCE); } /** * Get the information of the current state of the cleanup tasks of a job. * * @param jobId the job to query. * @return the list of all of the cleanup tips. * @throws IOException */ public TaskReport[] getCleanupTaskReports(JobID jobId) throws IOException { return getTaskReports(jobId, TaskType.JOB_CLEANUP); } /** * Get the information of the current state of the setup tasks of a job. * * @param jobId the job to query. * @return the list of all of the setup tips. * @throws IOException */ public TaskReport[] getSetupTaskReports(JobID jobId) throws IOException { return getTaskReports(jobId, TaskType.JOB_SETUP); } /**@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}*/ @Deprecated public TaskReport[] getReduceTaskReports(String jobId) throws IOException { return getReduceTaskReports(JobID.forName(jobId)); } /** * Display the information about a job's tasks, of a particular type and * in a particular state * * @param jobId the ID of the job * @param type the type of the task (map/reduce/setup/cleanup) * @param state the state of the task * (pending/running/org.apache.hadoop.shaded.com.leted/failed/killed) * @throws IOException when there is an error org.apache.hadoop.shaded.com.unicating with the master * @throws IllegalArgumentException if an invalid type/state is passed */ public void displayTasks(final JobID jobId, String type, String state) throws IOException { try { Job job = getJobUsingCluster(jobId); super.displayTasks(job, type, state); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Get status information about the Map-Reduce cluster. * * @return the status information about the Map-Reduce cluster as an object * of {@link ClusterStatus}. * @throws IOException */ public ClusterStatus getClusterStatus() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { public ClusterStatus run() throws IOException, InterruptedException { ClusterMetrics metrics = cluster.getClusterStatus(); return new ClusterStatus(metrics.getTaskTrackerCount(), metrics .getBlackListedTaskTrackerCount(), cluster .getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(), metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(), metrics.getReduceSlotCapacity(), cluster.getJobTrackerStatus(), metrics.getDecommissionedTaskTrackerCount(), metrics .getGrayListedTaskTrackerCount()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } private Collection arrayToStringList(TaskTrackerInfo[] objs) { Collection list = new ArrayList(); for (TaskTrackerInfo info: objs) { list.add(info.getTaskTrackerName()); } return list; } private Collection arrayToBlackListInfo(TaskTrackerInfo[] objs) { Collection list = new ArrayList(); for (TaskTrackerInfo info: objs) { BlackListInfo binfo = new BlackListInfo(); binfo.setTrackerName(info.getTaskTrackerName()); binfo.setReasonForBlackListing(info.getReasonForBlacklist()); binfo.setBlackListReport(info.getBlacklistReport()); list.add(binfo); } return list; } /** * Get status information about the Map-Reduce cluster. * * @param detailed if true then get a detailed status including the * tracker names * @return the status information about the Map-Reduce cluster as an object * of {@link ClusterStatus}. * @throws IOException */ public ClusterStatus getClusterStatus(boolean detailed) throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { public ClusterStatus run() throws IOException, InterruptedException { ClusterMetrics metrics = cluster.getClusterStatus(); return new ClusterStatus(arrayToStringList(cluster.getActiveTaskTrackers()), arrayToBlackListInfo(cluster.getBlackListedTaskTrackers()), cluster.getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(), metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(), metrics.getReduceSlotCapacity(), cluster.getJobTrackerStatus()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Get the jobs that are not org.apache.hadoop.shaded.com.leted and not failed. * * @return array of {@link JobStatus} for the running/to-be-run jobs. * @throws IOException */ public JobStatus[] jobsToComplete() throws IOException { List stats = new ArrayList(); for (JobStatus stat : getAllJobs()) { if (!stat.isJobComplete()) { stats.add(stat); } } return stats.toArray(new JobStatus[0]); } /** * Get the jobs that are submitted. * * @return array of {@link JobStatus} for the submitted jobs. * @throws IOException */ public JobStatus[] getAllJobs() throws IOException { try { org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobStatus[] jobs = clientUgi.doAs(new PrivilegedExceptionAction< org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobStatus[]> () { public org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobStatus[] run() throws IOException, InterruptedException { return cluster.getAllJobStatuses(); } }); JobStatus[] stats = new JobStatus[jobs.length]; for (int i = 0; i < jobs.length; i++) { stats[i] = JobStatus.downgrade(jobs[i]); } return stats; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Utility that submits a job, then polls for progress until the job is * org.apache.hadoop.shaded.com.lete. * * @param job the job configuration. * @throws IOException if the job fails */ public static RunningJob runJob(JobConf job) throws IOException { JobClient jc = new JobClient(job); RunningJob rj = jc.submitJob(job); try { if (!jc.monitorAndPrintJob(job, rj)) { throw new IOException("Job failed!"); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } return rj; } /** * Monitor a job and print status in real-time as progress is made and tasks * fail. * @param conf the job's configuration * @param job the job to track * @return true if the job succeeded * @throws IOException if org.apache.hadoop.shaded.com.unication to the JobTracker fails */ public boolean monitorAndPrintJob(JobConf conf, RunningJob job ) throws IOException, InterruptedException { return ((NetworkedJob)job).monitorAndPrintJob(); } static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) { return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); } static Configuration getConfiguration(String jobTrackerSpec) { Configuration conf = new Configuration(); if (jobTrackerSpec != null) { if (jobTrackerSpec.indexOf(":") >= 0) { conf.set("mapred.job.tracker", jobTrackerSpec); } else { String classpathFile = "hadoop-" + jobTrackerSpec + ".xml"; URL validate = conf.getResource(classpathFile); if (validate == null) { throw new RuntimeException(classpathFile + " not found on CLASSPATH"); } conf.addResource(classpathFile); } } return conf; } /** * Sets the output filter for tasks. only those tasks are printed whose * output matches the filter. * @param newValue task filter. */ @Deprecated public void setTaskOutputFilter(TaskStatusFilter newValue){ this.taskOutputFilter = newValue; } /** * Get the task output filter out of the JobConf. * * @param job the JobConf to examine. * @return the filter level. */ public static TaskStatusFilter getTaskOutputFilter(JobConf job) { return TaskStatusFilter.valueOf(job.get("jobclient.output.filter", "FAILED")); } /** * Modify the JobConf to set the task output filter. * * @param job the JobConf to modify. * @param newValue the value to set. */ public static void setTaskOutputFilter(JobConf job, TaskStatusFilter newValue) { job.set("jobclient.output.filter", newValue.toString()); } /** * Returns task output filter. * @return task filter. */ @Deprecated public TaskStatusFilter getTaskOutputFilter(){ return this.taskOutputFilter; } protected long getCounter(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Counters cntrs, String counterGroupName, String counterName) throws IOException { Counters counters = Counters.downgrade(cntrs); return counters.findCounter(counterGroupName, counterName).getValue(); } /** * Get status information about the max available Maps in the cluster. * * @return the max available Maps in the cluster * @throws IOException */ public int getDefaultMaps() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { @Override public Integer run() throws IOException, InterruptedException { return cluster.getClusterStatus().getMapSlotCapacity(); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Get status information about the max available Reduces in the cluster. * * @return the max available Reduces in the cluster * @throws IOException */ public int getDefaultReduces() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { @Override public Integer run() throws IOException, InterruptedException { return cluster.getClusterStatus().getReduceSlotCapacity(); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Grab the jobtracker system directory path where job-specific files are to be placed. * * @return the system directory where job-specific files are to be placed. */ public Path getSystemDir() { try { return clientUgi.doAs(new PrivilegedExceptionAction() { @Override public Path run() throws IOException, InterruptedException { return cluster.getSystemDir(); } }); } catch (IOException org.apache.hadoop.shaded.io.) { return null; } catch (InterruptedException ie) { return null; } } /** * Checks if the job directory is clean and has all the required org.apache.hadoop.shaded.com.onents * for (re) starting the job */ public static boolean isJobDirValid(Path jobDirPath, FileSystem fs) throws IOException { FileStatus[] contents = fs.listStatus(jobDirPath); int matchCount = 0; if (contents != null && contents.length >= 2) { for (FileStatus status : contents) { if ("job.xml".equals(status.getPath().getName())) { ++matchCount; } if ("job.split".equals(status.getPath().getName())) { ++matchCount; } } if (matchCount == 2) { return true; } } return false; } /** * Fetch the staging area directory for the application * * @return path to staging area directory * @throws IOException */ public Path getStagingAreaDir() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { @Override public Path run() throws IOException, InterruptedException { return cluster.getStagingAreaDir(); } }); } catch (InterruptedException ie) { // throw RuntimeException instead for org.apache.hadoop.shaded.com.atibility reasons throw new RuntimeException(ie); } } private JobQueueInfo getJobQueueInfo(QueueInfo queue) { JobQueueInfo ret = new JobQueueInfo(queue); // make sure to convert any children if (queue.getQueueChildren().size() > 0) { List childQueues = new ArrayList(queue .getQueueChildren().size()); for (QueueInfo child : queue.getQueueChildren()) { childQueues.add(getJobQueueInfo(child)); } ret.setChildren(childQueues); } return ret; } private JobQueueInfo[] getJobQueueInfoArray(QueueInfo[] queues) throws IOException { JobQueueInfo[] ret = new JobQueueInfo[queues.length]; for (int i = 0; i < queues.length; i++) { ret[i] = getJobQueueInfo(queues[i]); } return ret; } /** * Returns an array of queue information objects about root level queues * configured * * @return the array of root level JobQueueInfo objects * @throws IOException */ public JobQueueInfo[] getRootQueues() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { public JobQueueInfo[] run() throws IOException, InterruptedException { return getJobQueueInfoArray(cluster.getRootQueues()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Returns an array of queue information objects about immediate children * of queue queueName. * * @param queueName * @return the array of immediate children JobQueueInfo objects * @throws IOException */ public JobQueueInfo[] getChildQueues(final String queueName) throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { public JobQueueInfo[] run() throws IOException, InterruptedException { return getJobQueueInfoArray(cluster.getChildQueues(queueName)); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Return an array of queue information objects about all the Job Queues * configured. * * @return Array of JobQueueInfo objects * @throws IOException */ public JobQueueInfo[] getQueues() throws IOException { try { return clientUgi.doAs(new PrivilegedExceptionAction() { public JobQueueInfo[] run() throws IOException, InterruptedException { return getJobQueueInfoArray(cluster.getQueues()); } }); } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Gets all the jobs which were added to particular Job Queue * * @param queueName name of the Job Queue * @return Array of jobs present in the job queue * @throws IOException */ public JobStatus[] getJobsFromQueue(final String queueName) throws IOException { try { QueueInfo queue = clientUgi.doAs(new PrivilegedExceptionAction() { @Override public QueueInfo run() throws IOException, InterruptedException { return cluster.getQueue(queueName); } }); if (queue == null) { return null; } org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobStatus[] stats = queue.getJobStatuses(); JobStatus[] ret = new JobStatus[stats.length]; for (int i = 0 ; i < stats.length; i++ ) { ret[i] = JobStatus.downgrade(stats[i]); } return ret; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Gets the queue information associated to a particular Job Queue * * @param queueName name of the job queue. * @return Queue information associated to particular queue. * @throws IOException */ public JobQueueInfo getQueueInfo(final String queueName) throws IOException { try { QueueInfo queueInfo = clientUgi.doAs(new PrivilegedExceptionAction() { public QueueInfo run() throws IOException, InterruptedException { return cluster.getQueue(queueName); } }); if (queueInfo != null) { return new JobQueueInfo(queueInfo); } return null; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Gets the Queue ACLs for current user * @return array of QueueAclsInfo object for current user. * @throws IOException */ public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException { try { org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.QueueAclsInfo[] acls = clientUgi.doAs(new PrivilegedExceptionAction () { public org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.QueueAclsInfo[] run() throws IOException, InterruptedException { return cluster.getQueueAclsForCurrentUser(); } }); QueueAclsInfo[] ret = new QueueAclsInfo[acls.length]; for (int i = 0 ; i < acls.length; i++ ) { ret[i] = QueueAclsInfo.downgrade(acls[i]); } return ret; } catch (InterruptedException ie) { throw new IOException(ie); } } /** * Get a delegation token for the user from the JobTracker. * @param renewer the user who can renew the token * @return the new token * @throws IOException */ public Token getDelegationToken(final Text renewer) throws IOException, InterruptedException { return clientUgi.doAs(new PrivilegedExceptionAction>() { public Token run() throws IOException, InterruptedException { return cluster.getDelegationToken(renewer); } }); } /** * Renew a delegation token * @param token the token to renew * @return true if the renewal went well * @throws InvalidToken * @throws IOException * @deprecated Use {@link Token#renew} instead */ public long renewDelegationToken(Token token ) throws InvalidToken, IOException, InterruptedException { return token.renew(getConf()); } /** * Cancel a delegation token from the JobTracker * @param token the token to cancel * @throws IOException * @deprecated Use {@link Token#cancel} instead */ public void cancelDelegationToken(Token token ) throws InvalidToken, IOException, InterruptedException { token.cancel(getConf()); } /** */ public static void main(String argv[]) throws Exception { int res = ToolRunner.run(new JobClient(), argv); System.exit(res); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy