All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapred.JobTrackerMetricsInst Maven / Gradle / Ivy

Go to download

Hadoop is the distributed computing framework of Apache; hadoop-core contains the filesystem, job tracker and map/reduce modules

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.util.List;

import org.apache.hadoop.mapred.Counters.Group;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics;

class JobTrackerMetricsInst extends JobTrackerInstrumentation implements Updater {
  private final MetricsRecord metricsRecord;

  private int numJobsSubmitted = 0;
  private int numJobsCompleted = 0;
  private int numWaitingMaps = 0;
  private int numWaitingReduces = 0;

  /**
   * Helper class that makes it easier to keep track of additional stats for
   * speculated tasks. Needed to evaluate the performance of speculating by
   * processing (bytes/s) vs progress (%/s) rate.
   */
  private static class SpecStats {
    public static enum TaskType {MAP, REDUCE};
    // The type of speculation. Whether it's by progress rate (e.g. %/sec) or
    // processing rate (e.g. bytes/sec)
    public static enum SpecType {PROGRESS, PROCESSING};
    // Wasted time is in ms
    public static enum StatType {WASTED_TASKS, WASTED_TIME, LAUNCHED_TASKS,
      SUCCESFUL_TASKS};

    private final int taskTypeSize = TaskType.values().length;
    private final int specTypeSize = SpecType.values().length;
    private final int statTypeSize = StatType.values().length;

    private final long[][][] values =
      new long[taskTypeSize][specTypeSize][statTypeSize];

    public SpecStats() {}

    public void incStat(TaskType taskType, SpecType specType, StatType statType,
                        long value) {
      values[taskType.ordinal()][specType.ordinal()][statType.ordinal()] +=
        value;
    }

    public long getStat(TaskType taskType, SpecType specType,
                        StatType statType) {
      return values[taskType.ordinal()][specType.ordinal()][statType.ordinal()];
    }
  }

  private final SpecStats specStats = new SpecStats();

  private final JobStats aggregateJobStats = new JobStats();

  private final Counters countersToMetrics = new Counters();

  //Cluster status fields.
  private volatile int numMapSlots = 0;
  private volatile int numReduceSlots = 0;
  private int numBlackListedMapSlots = 0;
  private int numBlackListedReduceSlots = 0;

  private int numReservedMapSlots = 0;
  private int numReservedReduceSlots = 0;
  private int numOccupiedMapSlots = 0;
  private int numOccupiedReduceSlots = 0;

  private int numJobsFailed = 0;
  private int numJobsKilled = 0;

  private int numJobsPreparing = 0;
  private int numJobsRunning = 0;

  private int numRunningMaps = 0;
  private int numRunningReduces = 0;

  private int numTrackers = 0;
  private int numTrackersBlackListed = 0;
  private int numTrackersDecommissioned = 0;
  private int numTrackersExcluded = 0;
  private int numTrackersDead = 0;

  private int numTasksInMemory = 0;

  //Extended JobTracker Metrics
  private long totalSubmitTime = 0;
  private long numJobsLaunched = 0;

  public JobTrackerMetricsInst(JobTracker tracker, JobConf conf) {
    super(tracker, conf);
    String sessionId = conf.getSessionId();
    // Initiate JVM Metrics
    JvmMetrics.init("JobTracker", sessionId);
    // Create a record for map-reduce metrics
    MetricsContext context = MetricsUtil.getContext("mapred");
    metricsRecord = MetricsUtil.createRecord(context, "jobtracker");
    metricsRecord.setTag("sessionId", sessionId);
    context.registerUpdater(this);
  }

  /**
   * Since this object is a registered updater, this method will be called
   * periodically, e.g. every 5 seconds.
   */
  public void doUpdates(MetricsContext unused) {
    // In case of running in LocalMode tracker == null
    if (tracker != null) {
      synchronized (tracker) {
        synchronized (this) {
          numRunningMaps = 0;
          numRunningReduces = 0;

          numWaitingMaps = 0;
          numWaitingReduces = 0;
          numTasksInMemory = 0;

          List jobs = tracker.getRunningJobs();
          for (JobInProgress jip : jobs) {
            for (TaskInProgress tip : jip.maps) {
              if (tip.isRunning()) {
                numRunningMaps++;
              } else if (tip.isRunnable()) {
                numWaitingMaps++;
              }
            }
            for (TaskInProgress tip : jip.reduces) {
              if (tip.isRunning()) {
                numRunningReduces++;
              } else if (tip.isRunnable()) {
                numWaitingReduces++;
              }

            }
            numTasksInMemory += jip.getTasks(TaskType.MAP).length;
            numTasksInMemory += jip.getTasks(TaskType.REDUCE).length;
          }

          // Get tracker metrics
          numTrackersDead = tracker.getDeadNodes().size();
          ClusterStatus cs = tracker.getClusterStatus(false);
          numTrackersExcluded = cs.getNumExcludedNodes();
        }
      }
    }
    synchronized (this) {
      metricsRecord.setMetric("map_slots", numMapSlots);
      metricsRecord.setMetric("reduce_slots", numReduceSlots);
      metricsRecord.incrMetric("blacklisted_maps", numBlackListedMapSlots);
      metricsRecord.incrMetric("blacklisted_reduces",
          numBlackListedReduceSlots);
      metricsRecord.incrMetric("jobs_submitted", numJobsSubmitted);
      metricsRecord.incrMetric("jobs_completed", numJobsCompleted);
      metricsRecord.setMetric("waiting_maps", numWaitingMaps);
      metricsRecord.setMetric("waiting_reduces", numWaitingReduces);

      metricsRecord.incrMetric("reserved_map_slots", numReservedMapSlots);
      metricsRecord.incrMetric("reserved_reduce_slots", numReservedReduceSlots);
      metricsRecord.incrMetric("occupied_map_slots", numOccupiedMapSlots);
      metricsRecord.incrMetric("occupied_reduce_slots", numOccupiedReduceSlots);

      metricsRecord.incrMetric("jobs_failed", numJobsFailed);
      metricsRecord.incrMetric("jobs_killed", numJobsKilled);

      metricsRecord.incrMetric("jobs_preparing", numJobsPreparing);
      metricsRecord.incrMetric("jobs_running", numJobsRunning);

      metricsRecord.setMetric("running_maps", numRunningMaps);
      metricsRecord.setMetric("running_reduces", numRunningReduces);

      metricsRecord.setMetric("num_tasks_in_memory", numTasksInMemory);

      metricsRecord.setMetric("trackers", numTrackers);
      metricsRecord.setMetric("trackers_blacklisted", numTrackersBlackListed);
      metricsRecord.setMetric("trackers_decommissioned",
          numTrackersDecommissioned);
      metricsRecord.setMetric("trackers_excluded", numTrackersExcluded);
      metricsRecord.setMetric("trackers_dead", numTrackersDead);

      metricsRecord.incrMetric("num_launched_jobs", numJobsLaunched);
      metricsRecord.incrMetric("total_submit_time", totalSubmitTime);

      aggregateJobStats.incrementMetricsAndReset(metricsRecord);

      // Update additional speculation stats for measuring the performance
      // of different kinds of speculation
      for (SpecStats.TaskType taskType : SpecStats.TaskType.values()) {
        for (SpecStats.SpecType specType : SpecStats.SpecType.values()) {
          for(SpecStats.StatType statType : SpecStats.StatType.values()) {
            String key = "speculation_by_" + specType.toString().toLowerCase()  
                + "_" + "rate_" + taskType.toString().toLowerCase() + "_" + 
                statType.toString().toLowerCase();
            long value = specStats.getStat(taskType, specType, statType);
            metricsRecord.setMetric(key, value);
          }
        }
      }

      
      for (Group group: countersToMetrics) {
        String groupName = group.getName();
        for (Counter counter : group) {
          String name = groupName + "_" + counter.getName();
          name = name.replaceAll("[^a-zA-Z_]", "_").toLowerCase();
          metricsRecord.incrMetric(name, counter.getValue());
        }
      }
      clearCounters();

      numJobsSubmitted = 0;
      numJobsCompleted = 0;
      numWaitingMaps = 0;
      numWaitingReduces = 0;
      numBlackListedMapSlots = 0;
      numBlackListedReduceSlots = 0;

      numReservedMapSlots = 0;
      numReservedReduceSlots = 0;
      numOccupiedMapSlots = 0;
      numOccupiedReduceSlots = 0;

      numJobsFailed = 0;
      numJobsKilled = 0;

      numJobsPreparing = 0;
      numJobsRunning = 0;

      numRunningMaps = 0;
      numRunningReduces = 0;

      numTrackers = 0;
      numTrackersBlackListed = 0;

      totalSubmitTime = 0;
      numJobsLaunched = 0;
    }
    metricsRecord.update();
  }

  @Override
  public synchronized void launchMap(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumMapTasksLaunched();
    decWaitingMaps(taskAttemptID.getJobID(), 1);
  }
  @Override
  public void launchDataLocalMap(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumDataLocalMaps();
  }
  @Override
  public void launchRackLocalMap(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumRackLocalMaps();
  }

  @Override
  public void completeMap(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumMapTasksCompleted();
  }

  @Override
  public synchronized void speculateMap(TaskAttemptID taskAttemptID, 
      boolean isUsingProcessingRate) {
    aggregateJobStats.incNumSpeculativeMaps();
    SpecStats.SpecType specType = isUsingProcessingRate ?
        SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
    specStats.incStat(SpecStats.TaskType.MAP, specType, 
        SpecStats.StatType.LAUNCHED_TASKS, 1);
  }

  @Override
  public synchronized void speculativeSucceededMap(
          TaskAttemptID taskAttemptID, boolean isUsingProcessingRate) {
    aggregateJobStats.incNumSpeculativeSucceededMaps();
    SpecStats.SpecType specType = isUsingProcessingRate ?
        SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
    specStats.incStat(SpecStats.TaskType.REDUCE, specType, 
        SpecStats.StatType.SUCCESFUL_TASKS, 1);
  }

  @Override
  public synchronized void speculativeSucceededReduce(
          TaskAttemptID taskAttemptID, boolean isUsingProcessingRate) {
    aggregateJobStats.incNumSpeculativeSucceededMaps();
    SpecStats.SpecType specType = isUsingProcessingRate ?
        SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
    specStats.incStat(SpecStats.TaskType.REDUCE, specType, 
        SpecStats.StatType.SUCCESFUL_TASKS, 1);
  }

  @Override
  public synchronized void failedMap(TaskAttemptID taskAttemptID,
      boolean wasFailed, boolean isSpeculative,
      boolean isUsingProcessingRate, long taskStartTime) {
    long timeSpent = JobTracker.getClock().getTime() - taskStartTime;
    if (wasFailed) {
      aggregateJobStats.incNumMapTasksFailed();
      aggregateJobStats.incFailedMapTime(timeSpent);
    } else {
      aggregateJobStats.incNumMapTasksKilled();
      aggregateJobStats.incKilledMapTime(timeSpent);
	    if (isSpeculative) {
        aggregateJobStats.incNumSpeculativeWasteMaps();
	      aggregateJobStats.incSpeculativeMapTimeWaste(timeSpent);
	      // More detailed stats
	      SpecStats.SpecType specType = isUsingProcessingRate ? 
	          SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
	      specStats.incStat(SpecStats.TaskType.MAP, specType, 
	          SpecStats.StatType.WASTED_TASKS, 1);
	      specStats.incStat(SpecStats.TaskType.MAP, specType, 
	          SpecStats.StatType.WASTED_TIME, timeSpent);
	    }
    }
    addWaitingMaps(taskAttemptID.getJobID(), 1);
  }

  @Override
  public synchronized void launchReduce(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumReduceTasksLaunched();
    decWaitingReduces(taskAttemptID.getJobID(), 1);
  }

  @Override
  public void completeReduce(TaskAttemptID taskAttemptID) {
    aggregateJobStats.incNumReduceTasksCompleted();
  }

  @Override
  public synchronized void speculateReduce(TaskAttemptID taskAttemptID,
      boolean isUsingProcessingRate) {
    aggregateJobStats.incNumSpeculativeReduces();
    SpecStats.SpecType specType = isUsingProcessingRate ? 
        SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
    specStats.incStat(SpecStats.TaskType.REDUCE, specType, 
        SpecStats.StatType.LAUNCHED_TASKS, 1);
  }

  @Override
  public synchronized void failedReduce(TaskAttemptID taskAttemptID,
      boolean wasFailed, boolean isSpeculative, boolean isUsingProcessingRate, 
      long taskStartTime) {
    long timeSpent = JobTracker.getClock().getTime() - taskStartTime;
    if (wasFailed) {
      aggregateJobStats.incNumReduceTasksFailed();
      aggregateJobStats.incFailedReduceTime(timeSpent);
    } else {
      aggregateJobStats.incNumReduceTasksKilled();
      aggregateJobStats.incFailedReduceTime(timeSpent);
	    if (isSpeculative) {
	      aggregateJobStats.incNumSpeculativeWasteReduces();
	      aggregateJobStats.incSpeculativeReduceTimeWaste(timeSpent);
	       // More detailed stats
        SpecStats.SpecType specType = isUsingProcessingRate ? 
            SpecStats.SpecType.PROCESSING : SpecStats.SpecType.PROGRESS;
        specStats.incStat(SpecStats.TaskType.MAP, specType, 
            SpecStats.StatType.WASTED_TASKS, 1);
        specStats.incStat(SpecStats.TaskType.MAP, specType, 
            SpecStats.StatType.WASTED_TIME, timeSpent);
	    }
    }
    addWaitingReduces(taskAttemptID.getJobID(), 1);
  }

  @Override
  public void mapFailedByFetchFailures() {
    aggregateJobStats.incNumMapTasksFailedByFetchFailures();
  }

  @Override
  public void mapFetchFailure() {
    aggregateJobStats.incNumMapFetchFailures();
  }

  @Override
  public synchronized void submitJob(JobConf conf, JobID id) {
    ++numJobsSubmitted;
  }

  @Override
  public synchronized void completeJob(JobConf conf, JobID id) {
    collectJobCounters(id);
    ++numJobsCompleted;
  }

  @Override
  public synchronized void addWaitingMaps(JobID id, int task) {
  }

  @Override
  public synchronized void decWaitingMaps(JobID id, int task) {
  }

  @Override
  public synchronized void addWaitingReduces(JobID id, int task) {
  }

  @Override
  public synchronized void decWaitingReduces(JobID id, int task){
  }

  @Override
  public synchronized void setMapSlots(int slots) {
    numMapSlots = slots;
  }

  @Override
  public synchronized void setReduceSlots(int slots) {
    numReduceSlots = slots;
  }

  @Override
  public synchronized void addBlackListedMapSlots(int slots){
    numBlackListedMapSlots += slots;
  }

  @Override
  public synchronized void decBlackListedMapSlots(int slots){
    numBlackListedMapSlots -= slots;
  }

  @Override
  public synchronized void addBlackListedReduceSlots(int slots){
    numBlackListedReduceSlots += slots;
  }

  @Override
  public synchronized void decBlackListedReduceSlots(int slots){
    numBlackListedReduceSlots -= slots;
  }

  @Override
  public synchronized void addReservedMapSlots(int slots)
  {
    numReservedMapSlots += slots;
  }

  @Override
  public synchronized void decReservedMapSlots(int slots)
  {
    numReservedMapSlots -= slots;
  }

  @Override
  public synchronized void addReservedReduceSlots(int slots)
  {
    numReservedReduceSlots += slots;
  }

  @Override
  public synchronized void decReservedReduceSlots(int slots)
  {
    numReservedReduceSlots -= slots;
  }

  @Override
  public synchronized void addOccupiedMapSlots(int slots)
  {
    numOccupiedMapSlots += slots;
  }

  @Override
  public synchronized void decOccupiedMapSlots(int slots)
  {
    numOccupiedMapSlots -= slots;
  }

  @Override
  public synchronized void addOccupiedReduceSlots(int slots)
  {
    numOccupiedReduceSlots += slots;
  }

  @Override
  public synchronized void decOccupiedReduceSlots(int slots)
  {
    numOccupiedReduceSlots -= slots;
  }

  @Override
  public synchronized void failedJob(JobConf conf, JobID id)
  {
    numJobsFailed++;
  }

  @Override
  public synchronized void killedJob(JobConf conf, JobID id)
  {
    numJobsKilled++;
  }

  @Override
  public synchronized void addPrepJob(JobConf conf, JobID id)
  {
    numJobsPreparing++;
  }

  @Override
  public synchronized void decPrepJob(JobConf conf, JobID id)
  {
    numJobsPreparing--;
  }

  @Override
  public synchronized void addRunningJob(JobConf conf, JobID id)
  {
    numJobsRunning++;
  }

  @Override
  public synchronized void decRunningJob(JobConf conf, JobID id)
  {
    numJobsRunning--;
  }

  @Override
  public synchronized void addRunningMaps(int task)
  {
  }

  @Override
  public synchronized void decRunningMaps(int task)
  {
  }

  @Override
  public synchronized void addRunningReduces(int task)
  {
  }

  @Override
  public synchronized void decRunningReduces(int task)
  {
  }

  @Override
  public synchronized void killedMap(TaskAttemptID taskAttemptID)
  {
  }

  @Override
  public synchronized void killedReduce(TaskAttemptID taskAttemptID)
  {
  }

  @Override
  public synchronized void addTrackers(int trackers)
  {
    numTrackers += trackers;
  }

  @Override
  public synchronized void decTrackers(int trackers)
  {
    numTrackers -= trackers;
  }

  @Override
  public synchronized void addBlackListedTrackers(int trackers)
  {
    numTrackersBlackListed += trackers;
  }

  @Override
  public synchronized void decBlackListedTrackers(int trackers)
  {
    numTrackersBlackListed -= trackers;
  }

  @Override
  public synchronized void setDecommissionedTrackers(int trackers)
  {
    numTrackersDecommissioned = trackers;
  }

  @Override
  public synchronized void addLaunchedJobs(long submitTime)
  {
    ++numJobsLaunched;
    totalSubmitTime += submitTime;
  }

  @Override
  public void addMapInputBytes(long size) {
    aggregateJobStats.incTotalMapInputBytes(size);
  }

  @Override
  public void addLocalMapInputBytes(long size) {
    aggregateJobStats.incLocalMapInputBytes(size);
    addMapInputBytes(size);
  }

  @Override
  public void addRackMapInputBytes(long size) {
    aggregateJobStats.incRackMapInputBytes(size);
    addMapInputBytes(size);
  }

  @Override
  public void terminateJob(JobConf conf, JobID id) {
    collectJobCounters(id);
  }

  private synchronized void collectJobCounters(JobID id) {
    JobInProgress job = tracker.jobs.get(id);
    if (job == null) {
      return;
    }
    Counters jobCounter = job.getCounters();
    for (JobInProgress.Counter key : JobInProgress.Counter.values()) {
      countersToMetrics.findCounter(key).
      increment(jobCounter.findCounter(key).getValue());
    }
    for (Task.Counter key : Task.Counter.values()) {
      countersToMetrics.findCounter(key).
      increment(jobCounter.findCounter(key).getValue());
    }
    for (Counter counter : jobCounter.getGroup(Task.FILESYSTEM_COUNTER_GROUP)) {
      countersToMetrics.incrCounter(
          Task.FILESYSTEM_COUNTER_GROUP, counter.getName(), counter.getValue());
    }
  }
  /*
   *  Set everything in the counters to zero
   */
  private void clearCounters() {
    for (Group g : countersToMetrics) {
      for (Counter c : g) {
        c.setValue(0);
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy