org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.lib.jobcontrol;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State;
import org.apache.hadoop.util.StringUtils;
/**
* This class encapsulates a set of MapReduce jobs and its dependency.
*
* It tracks the states of the jobs by placing them into different tables
* according to their states.
*
* This class provides APIs for the client app to add a job to the group
* and to get the jobs in the group in different states. When a job is
* added, an ID unique to the group is assigned to the job.
*
* This class has a thread that submits jobs when they become ready,
* monitors the states of the running jobs, and updates the states of jobs
* based on the state changes of their depending jobs states. The class
* provides APIs for suspending/resuming the thread, and
* for stopping the thread.
*
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class JobControl implements Runnable {
private static final Log LOG = LogFactory.getLog(JobControl.class);
// The thread can be in one of the following state
public static enum ThreadState {RUNNING, SUSPENDED,STOPPED, STOPPING, READY};
private ThreadState runnerState; // the thread state
private LinkedList jobsInProgress = new LinkedList();
private LinkedList successfulJobs = new LinkedList();
private LinkedList failedJobs = new LinkedList();
private long nextJobID;
private String groupName;
/**
* Construct a job control for a group of jobs.
* @param groupName a name identifying this group
*/
public JobControl(String groupName) {
this.nextJobID = -1;
this.groupName = groupName;
this.runnerState = ThreadState.READY;
}
private static List toList(
LinkedList jobs) {
ArrayList retv = new ArrayList();
synchronized (jobs) {
for (ControlledJob job : jobs) {
retv.add(job);
}
}
return retv;
}
synchronized private List getJobsIn(State state) {
LinkedList l = new LinkedList();
for(ControlledJob j: jobsInProgress) {
if(j.getJobState() == state) {
l.add(j);
}
}
return l;
}
/**
* @return the jobs in the waiting state
*/
public List getWaitingJobList() {
return getJobsIn(State.WAITING);
}
/**
* @return the jobs in the running state
*/
public List getRunningJobList() {
return getJobsIn(State.RUNNING);
}
/**
* @return the jobs in the ready state
*/
public List getReadyJobsList() {
return getJobsIn(State.READY);
}
/**
* @return the jobs in the success state
*/
public List getSuccessfulJobList() {
return toList(this.successfulJobs);
}
public List getFailedJobList() {
return toList(this.failedJobs);
}
private String getNextJobID() {
nextJobID += 1;
return this.groupName + this.nextJobID;
}
/**
* Add a new job.
* @param aJob the new job
*/
synchronized public String addJob(ControlledJob aJob) {
String id = this.getNextJobID();
aJob.setJobID(id);
aJob.setJobState(State.WAITING);
jobsInProgress.add(aJob);
return id;
}
/**
* Add a collection of jobs
*
* @param jobs
*/
public void addJobCollection(Collection jobs) {
for (ControlledJob job : jobs) {
addJob(job);
}
}
/**
* @return the thread state
*/
public ThreadState getThreadState() {
return this.runnerState;
}
/**
* set the thread state to STOPPING so that the
* thread will stop when it wakes up.
*/
public void stop() {
this.runnerState = ThreadState.STOPPING;
}
/**
* suspend the running thread
*/
public void suspend () {
if (this.runnerState == ThreadState.RUNNING) {
this.runnerState = ThreadState.SUSPENDED;
}
}
/**
* resume the suspended thread
*/
public void resume () {
if (this.runnerState == ThreadState.SUSPENDED) {
this.runnerState = ThreadState.RUNNING;
}
}
synchronized public boolean allFinished() {
return jobsInProgress.isEmpty();
}
/**
* The main loop for the thread.
* The loop does the following:
* Check the states of the running jobs
* Update the states of waiting jobs
* Submit the jobs in ready state
*/
public void run() {
try {
this.runnerState = ThreadState.RUNNING;
while (true) {
while (this.runnerState == ThreadState.SUSPENDED) {
try {
Thread.sleep(5000);
}
catch (Exception e) {
//TODO the thread was interrupted, do something!!!
}
}
synchronized(this) {
Iterator it = jobsInProgress.iterator();
while(it.hasNext()) {
ControlledJob j = it.next();
LOG.debug("Checking state of job "+j);
switch(j.checkState()) {
case SUCCESS:
successfulJobs.add(j);
it.remove();
break;
case FAILED:
case DEPENDENT_FAILED:
failedJobs.add(j);
it.remove();
break;
case READY:
j.submit();
break;
case RUNNING:
case WAITING:
//Do Nothing
break;
}
}
}
if (this.runnerState != ThreadState.RUNNING &&
this.runnerState != ThreadState.SUSPENDED) {
break;
}
try {
Thread.sleep(5000);
}
catch (Exception e) {
//TODO the thread was interrupted, do something!!!
}
if (this.runnerState != ThreadState.RUNNING &&
this.runnerState != ThreadState.SUSPENDED) {
break;
}
}
}catch(Throwable t) {
LOG.error("Error while trying to run jobs.",t);
//Mark all jobs as failed because we got something bad.
failAllJobs(t);
}
this.runnerState = ThreadState.STOPPED;
}
synchronized private void failAllJobs(Throwable t) {
String message = "Unexpected System Error Occured: "+
StringUtils.stringifyException(t);
Iterator it = jobsInProgress.iterator();
while(it.hasNext()) {
ControlledJob j = it.next();
try {
j.failJob(message);
} catch (IOException e) {
LOG.error("Error while tyring to clean up "+j.getJobName(), e);
} catch (InterruptedException e) {
LOG.error("Error while tyring to clean up "+j.getJobName(), e);
} finally {
failedJobs.add(j);
it.remove();
}
}
}
}