org.apache.hadoop.mapred.TaskScheduler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-mapred
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.IOException;
import java.util.Collection;
import java.util.List;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker;

/**
 * Used by a {@link JobTracker} to schedule {@link Task}s on
 * {@link TaskTracker}s.
 * 
 * {@link TaskScheduler}s typically use one or more
 * {@link JobInProgressListener}s to receive notifications about jobs.
 * 

 * It is the responsibility of the {@link TaskScheduler}
 * to initialize tasks for a job, by calling {@link JobInProgress#initTasks()}
 * between the job being added (when
 * {@link JobInProgressListener#jobAdded(JobInProgress)} is called)
 * and tasks for that job being assigned (by
 * {@link #assignTasks(TaskTracker)}).
 * @see EagerTaskInitializationListener
 */
abstract class TaskScheduler implements Configurable {

  protected Configuration conf;
  protected TaskTrackerManager taskTrackerManager;
  
  public Configuration getConf() {
    return conf;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public synchronized void setTaskTrackerManager(
      TaskTrackerManager taskTrackerManager) {
    this.taskTrackerManager = taskTrackerManager;
  }
  
  /**
   * Lifecycle method to allow the scheduler to start any work in separate
   * threads.
   * @throws IOException
   */
  public void start() throws IOException {
    // do nothing
  }
  
  /**
   * Lifecycle method to allow the scheduler to stop any work it is doing.
   * @throws IOException
   */
  public void terminate() throws IOException {
    // do nothing
  }

  /**
   * Returns the tasks we'd like the TaskTracker to execute right now.
   * 
   * @param taskTracker The TaskTracker for which we're looking for tasks.
   * @return A list of tasks to run on that TaskTracker, possibly empty.
   */
  public abstract List assignTasks(TaskTracker taskTracker)
  throws IOException;

  /**
   * Returns a collection of jobs in an order which is specific to 
   * the particular scheduler.
   * @param queueName
   * @return
   */
  public abstract Collection getJobs(String queueName);

  /**
   * Abstract QueueRefresher class. Scheduler's can extend this and return an
   * instance of this in the {@link #getQueueRefresher()} method. The
   * {@link #refreshQueues(List)} method of this instance will be invoked by the
   * {@link QueueManager} whenever it gets a request from an administrator to
   * refresh its own queue-configuration. This method has a documented contract
   * between the {@link QueueManager} and the {@link TaskScheduler}.
   * 
   * Before calling QueueRefresher, the caller must hold the lock to the
   * corresponding {@link TaskScheduler} (generally in the {@link JobTracker}).
   */
  abstract class QueueRefresher {

    /**
     * Refresh the queue-configuration in the scheduler. This method has the
     * following contract.
     * 

     * Before this method, {@link QueueManager} does a validation of the new
     * queue-configuration. For e.g, currently addition of new queues, or
     * removal of queues at any level in the hierarchy is not supported by
     * {@link QueueManager} and so are not supported for schedulers too.
     * Schedulers will be passed a list of {@link JobQueueInfo}s of the root
     * queues i.e. the queues at the top level. All the descendants are properly
     * linked from these top-level queues.
     * Schedulers should use the scheduler specific queue properties from
     * the newRootQueues, validate the properties themselves and apply them
     * internally.
     * 
     * Once the method returns successfully from the schedulers, it is assumed
     * that the refresh of queue properties is successful throughout and will be
     * 'committed' internally to {@link QueueManager} too. It is guaranteed that
     * at no point, after successful return from the scheduler, is the queue
     * refresh in QueueManager failed. If ever, such abnormalities happen, the
     * queue framework will be inconsistent and will need a JT restart.
     * If scheduler throws an exception during {@link #refreshQueues()},
     * {@link QueueManager} throws away the newly read configuration, retains
     * the old (consistent) configuration and informs the request issuer about
     * the error appropriately.
     * 
     * 
     * @param newRootQueues
     */
    abstract void refreshQueues(List newRootQueues)
        throws Throwable;
  }

  /**
   * Get the {@link QueueRefresher} for this scheduler. By default, no
   * {@link QueueRefresher} exists for a scheduler and is set to null.
   * Schedulers need to return an instance of {@link QueueRefresher} if they
   * wish to refresh their queue-configuration when {@link QueueManager}
   * refreshes its own queue-configuration via an administrator request.
   * 
   * @return
   */
  QueueRefresher getQueueRefresher() {
    return null;
  }
}