Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.util.ConfigUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The job submitter's view of the Job.
*
*
It allows the user to configure the
* job, submit it, control its execution, and query the state. The set methods
* only work until the job is submitted, afterwards they will throw an
* IllegalStateException.
*
*
* Normally the user creates the application, describes various facets of the
* job via {@link Job} and then submits the job and monitor its progress.
*
*
Here is an example on how to submit a job:
*
* // Create a new Job
* Job job = Job.getInstance();
* job.setJarByClass(MyJob.class);
*
* // Specify various job-specific parameters
* job.setJobName("myjob");
*
* job.setInputPath(new Path("in"));
* job.setOutputPath(new Path("out"));
*
* job.setMapperClass(MyJob.MyMapper.class);
* job.setReducerClass(MyJob.MyReducer.class);
*
* // Submit the job, then poll for progress until the job is complete
* job.waitForCompletion(true);
*
*
*
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class Job extends JobContextImpl implements JobContext, AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
@InterfaceStability.Evolving
public enum JobState {DEFINE, RUNNING};
private static final long MAX_JOBSTATUS_AGE = 1000 * 2;
public static final String OUTPUT_FILTER = "mapreduce.client.output.filter";
/** Key in mapred-*.xml that sets completionPollInvervalMillis */
public static final String COMPLETION_POLL_INTERVAL_KEY =
"mapreduce.client.completion.pollinterval";
/** Default completionPollIntervalMillis is 5000 ms. */
static final int DEFAULT_COMPLETION_POLL_INTERVAL = 5000;
/** Key in mapred-*.xml that sets progMonitorPollIntervalMillis */
public static final String PROGRESS_MONITOR_POLL_INTERVAL_KEY =
"mapreduce.client.progressmonitor.pollinterval";
/** Default progMonitorPollIntervalMillis is 1000 ms. */
static final int DEFAULT_MONITOR_POLL_INTERVAL = 1000;
public static final String USED_GENERIC_PARSER =
"mapreduce.client.genericoptionsparser.used";
public static final String SUBMIT_REPLICATION =
"mapreduce.client.submit.file.replication";
public static final int DEFAULT_SUBMIT_REPLICATION = 10;
public static final String USE_WILDCARD_FOR_LIBJARS =
"mapreduce.client.libjars.wildcard";
public static final boolean DEFAULT_USE_WILDCARD_FOR_LIBJARS = true;
@InterfaceStability.Evolving
public enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
static {
ConfigUtil.loadResources();
}
private JobState state = JobState.DEFINE;
private JobStatus status;
private long statustime;
private Cluster cluster;
private ReservationId reservationId;
/**
* @deprecated Use {@link #getInstance()}
*/
@Deprecated
public Job() throws IOException {
this(new JobConf(new Configuration()));
}
/**
* @deprecated Use {@link #getInstance(Configuration)}
*/
@Deprecated
public Job(Configuration conf) throws IOException {
this(new JobConf(conf));
}
/**
* @deprecated Use {@link #getInstance(Configuration, String)}
*/
@Deprecated
public Job(Configuration conf, String jobName) throws IOException {
this(new JobConf(conf));
setJobName(jobName);
}
Job(JobConf conf) throws IOException {
super(conf, null);
// propagate existing user credentials to job
this.credentials.mergeAll(this.ugi.getCredentials());
this.cluster = null;
}
Job(JobStatus status, JobConf conf) throws IOException {
this(conf);
setJobID(status.getJobID());
this.status = status;
state = JobState.RUNNING;
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} .
* A Cluster will be created with a generic {@link Configuration}.
*
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
*/
public static Job getInstance() throws IOException {
// create with a null Cluster
return getInstance(new Configuration());
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} and a
* given {@link Configuration}.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* A Cluster will be created from the conf parameter only when it's needed.
*
* @param conf the configuration
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
*/
public static Job getInstance(Configuration conf) throws IOException {
// create with a null Cluster
JobConf jobConf = new JobConf(conf);
return new Job(jobConf);
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} and a given jobName.
* A Cluster will be created from the conf parameter only when it's needed.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param conf the configuration
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
*/
public static Job getInstance(Configuration conf, String jobName)
throws IOException {
// create with a null Cluster
Job result = getInstance(conf);
result.setJobName(jobName);
return result;
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} and given
* {@link Configuration} and {@link JobStatus}.
* A Cluster will be created from the conf parameter only when it's needed.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param status job status
* @param conf job configuration
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
*/
public static Job getInstance(JobStatus status, Configuration conf)
throws IOException {
return new Job(status, new JobConf(conf));
}
/**
* Creates a new {@link Job} with no particular {@link Cluster}.
* A Cluster will be created from the conf parameter only when it's needed.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param ignored
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
* @deprecated Use {@link #getInstance()}
*/
@Deprecated
public static Job getInstance(Cluster ignored) throws IOException {
return getInstance();
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} and given
* {@link Configuration}.
* A Cluster will be created from the conf parameter only when it's needed.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param ignored
* @param conf job configuration
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
* @deprecated Use {@link #getInstance(Configuration)}
*/
@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf)
throws IOException {
return getInstance(conf);
}
/**
* Creates a new {@link Job} with no particular {@link Cluster} and given
* {@link Configuration} and {@link JobStatus}.
* A Cluster will be created from the conf parameter only when it's needed.
*
* The Job makes a copy of the Configuration so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param cluster cluster
* @param status job status
* @param conf job configuration
* @return the {@link Job} , with no connection to a cluster yet.
* @throws IOException
*/
@Private
public static Job getInstance(Cluster cluster, JobStatus status,
Configuration conf) throws IOException {
Job job = getInstance(status, conf);
job.setCluster(cluster);
return job;
}
private void ensureState(JobState state) throws IllegalStateException {
if (state != this.state) {
throw new IllegalStateException("Job in state "+ this.state +
" instead of " + state);
}
if (state == JobState.RUNNING && cluster == null) {
throw new IllegalStateException
("Job in state " + this.state
+ ", but it isn't attached to any job tracker!");
}
}
/**
* Some methods rely on having a recent job status object. Refresh
* it, if necessary
*/
synchronized void ensureFreshStatus()
throws IOException {
if (System.currentTimeMillis() - statustime > MAX_JOBSTATUS_AGE) {
updateStatus();
}
}
/** Some methods need to update status immediately. So, refresh
* immediately
* @throws IOException
*/
synchronized void updateStatus() throws IOException {
try {
this.status = ugi.doAs(new PrivilegedExceptionAction() {
@Override
public JobStatus run() throws IOException, InterruptedException {
return cluster.getClient().getJobStatus(getJobID());
}
});
}
catch (InterruptedException ie) {
throw new IOException(ie);
}
if (this.status == null) {
throw new IOException("Job status not available ");
}
this.statustime = System.currentTimeMillis();
}
public JobStatus getStatus() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status;
}
/**
* Returns the current state of the Job.
*
* @return JobStatus#State
* @throws IOException
* @throws InterruptedException
*/
public JobStatus.State getJobState()
throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status.getState();
}
/**
* Get the URL where some job progress information will be displayed.
*
* @return the URL where some job progress information will be displayed.
*/
public String getTrackingURL(){
ensureState(JobState.RUNNING);
return status.getTrackingUrl().toString();
}
/**
* Get the path of the submitted job configuration.
*
* @return the path of the submitted job configuration.
*/
public String getJobFile() {
ensureState(JobState.RUNNING);
return status.getJobFile();
}
/**
* Get start time of the job.
*
* @return the start time of the job
*/
public long getStartTime() {
ensureState(JobState.RUNNING);
return status.getStartTime();
}
/**
* Get finish time of the job.
*
* @return the finish time of the job
*/
public long getFinishTime() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status.getFinishTime();
}
/**
* Get scheduling info of the job.
*
* @return the scheduling info of the job
*/
public String getSchedulingInfo() {
ensureState(JobState.RUNNING);
return status.getSchedulingInfo();
}
/**
* Get scheduling info of the job.
*
* @return the priority info of the job
*/
public JobPriority getPriority() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status.getPriority();
}
/**
* The user-specified job name.
*/
public String getJobName() {
if (state == JobState.DEFINE || status == null) {
return super.getJobName();
}
ensureState(JobState.RUNNING);
return status.getJobName();
}
public String getHistoryUrl() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status.getHistoryFile();
}
public boolean isRetired() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
updateStatus();
return status.isRetired();
}
@Private
public Cluster getCluster() {
return cluster;
}
/** Only for mocks in unit tests. */
@Private
private void setCluster(Cluster cluster) {
this.cluster = cluster;
}
/**
* Dump stats to screen.
*/
@Override
public String toString() {
ensureState(JobState.RUNNING);
String reasonforFailure = " ";
int numMaps = 0;
int numReduces = 0;
try {
updateStatus();
if (status.getState().equals(JobStatus.State.FAILED))
reasonforFailure = getTaskFailureEventString();
numMaps = getTaskReports(TaskType.MAP).length;
numReduces = getTaskReports(TaskType.REDUCE).length;
} catch (IOException e) {
} catch (InterruptedException ie) {
}
StringBuffer sb = new StringBuffer();
sb.append("Job: ").append(status.getJobID()).append("\n");
sb.append("Job File: ").append(status.getJobFile()).append("\n");
sb.append("Job Tracking URL : ").append(status.getTrackingUrl());
sb.append("\n");
sb.append("Uber job : ").append(status.isUber()).append("\n");
sb.append("Number of maps: ").append(numMaps).append("\n");
sb.append("Number of reduces: ").append(numReduces).append("\n");
sb.append("map() completion: ");
sb.append(status.getMapProgress()).append("\n");
sb.append("reduce() completion: ");
sb.append(status.getReduceProgress()).append("\n");
sb.append("Job state: ");
sb.append(status.getState()).append("\n");
sb.append("retired: ").append(status.isRetired()).append("\n");
sb.append("reason for failure: ").append(reasonforFailure);
return sb.toString();
}
/**
* @return taskid which caused job failure
* @throws IOException
* @throws InterruptedException
*/
String getTaskFailureEventString() throws IOException,
InterruptedException {
int failCount = 1;
TaskCompletionEvent lastEvent = null;
TaskCompletionEvent[] events = ugi.doAs(new
PrivilegedExceptionAction() {
@Override
public TaskCompletionEvent[] run() throws IOException,
InterruptedException {
return cluster.getClient().getTaskCompletionEvents(
status.getJobID(), 0, 10);
}
});
for (TaskCompletionEvent event : events) {
if (event.getStatus().equals(TaskCompletionEvent.Status.FAILED)) {
failCount++;
lastEvent = event;
}
}
if (lastEvent == null) {
return "There are no failed tasks for the job. "
+ "Job is failed due to some other reason and reason "
+ "can be found in the logs.";
}
String[] taskAttemptID = lastEvent.getTaskAttemptId().toString().split("_", 2);
String taskID = taskAttemptID[1].substring(0, taskAttemptID[1].length()-2);
return (" task " + taskID + " failed " +
failCount + " times " + "For details check tasktracker at: " +
lastEvent.getTaskTrackerHttp());
}
/**
* Get the information of the current state of the tasks of a job.
*
* @param type Type of the task
* @return the list of all of the map tips.
* @throws IOException
*/
public TaskReport[] getTaskReports(TaskType type)
throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
final TaskType tmpType = type;
return ugi.doAs(new PrivilegedExceptionAction() {
public TaskReport[] run() throws IOException, InterruptedException {
return cluster.getClient().getTaskReports(getJobID(), tmpType);
}
});
}
/**
* Get the progress of the job's map-tasks, as a float between 0.0
* and 1.0. When all map tasks have completed, the function returns 1.0.
*
* @return the progress of the job's map-tasks.
* @throws IOException
*/
public float mapProgress() throws IOException {
ensureState(JobState.RUNNING);
ensureFreshStatus();
return status.getMapProgress();
}
/**
* Get the progress of the job's reduce-tasks, as a float between 0.0
* and 1.0. When all reduce tasks have completed, the function returns 1.0.
*
* @return the progress of the job's reduce-tasks.
* @throws IOException
*/
public float reduceProgress() throws IOException {
ensureState(JobState.RUNNING);
ensureFreshStatus();
return status.getReduceProgress();
}
/**
* Get the progress of the job's cleanup-tasks, as a float between 0.0
* and 1.0. When all cleanup tasks have completed, the function returns 1.0.
*
* @return the progress of the job's cleanup-tasks.
* @throws IOException
*/
public float cleanupProgress() throws IOException, InterruptedException {
ensureState(JobState.RUNNING);
ensureFreshStatus();
return status.getCleanupProgress();
}
/**
* Get the progress of the job's setup-tasks, as a float between 0.0
* and 1.0. When all setup tasks have completed, the function returns 1.0.
*
* @return the progress of the job's setup-tasks.
* @throws IOException
*/
public float setupProgress() throws IOException {
ensureState(JobState.RUNNING);
ensureFreshStatus();
return status.getSetupProgress();
}
/**
* Check if the job is finished or not.
* This is a non-blocking call.
*
* @return true if the job is complete, else false.
* @throws IOException
*/
public boolean isComplete() throws IOException {
ensureState(JobState.RUNNING);
updateStatus();
return status.isJobComplete();
}
/**
* Check if the job completed successfully.
*
* @return true if the job succeeded, else false.
* @throws IOException
*/
public boolean isSuccessful() throws IOException {
ensureState(JobState.RUNNING);
updateStatus();
return status.getState() == JobStatus.State.SUCCEEDED;
}
/**
* Kill the running job. Blocks until all job tasks have been
* killed as well. If the job is no longer running, it simply returns.
*
* @throws IOException
*/
public void killJob() throws IOException {
ensureState(JobState.RUNNING);
try {
cluster.getClient().killJob(getJobID());
}
catch (InterruptedException ie) {
throw new IOException(ie);
}
}
/**
* Set the priority of a running job.
* @param jobPriority the new priority for the job.
* @throws IOException
*/
public void setPriority(JobPriority jobPriority) throws IOException,
InterruptedException {
if (state == JobState.DEFINE) {
if (jobPriority == JobPriority.UNDEFINED_PRIORITY) {
conf.setJobPriorityAsInteger(convertPriorityToInteger(jobPriority));
} else {
conf.setJobPriority(org.apache.hadoop.mapred.JobPriority
.valueOf(jobPriority.name()));
}
} else {
ensureState(JobState.RUNNING);
final int tmpPriority = convertPriorityToInteger(jobPriority);
ugi.doAs(new PrivilegedExceptionAction