All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tez.common.counters.TaskCounter Maven / Gradle / Ivy

There is a newer version: 0.10.4
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.common.counters;

import org.apache.hadoop.classification.InterfaceAudience.Private;

// Counters used by Task classes
// Keep in sync with tez-ui/src/main/webapp/config/default-app-conf.js
@Private
public enum TaskCounter {
  // TODO Eventually, rename counters to be non-MR specific and map them to MR equivalent.

  NUM_SPECULATIONS,

  /**
   * Number of Input Groups seen by ShuffledMergedInput.
   * Alternately the number of Input Groups seen by a Reduce task.
   */
  REDUCE_INPUT_GROUPS,

  /**
   * Number of records (across all Groups) seen by ShuffledMergedInput
   * Alternately number of records seen by a ReduceProcessor
   */
  REDUCE_INPUT_RECORDS,

  REDUCE_OUTPUT_RECORDS, // Not used at the moment.
  REDUCE_SKIPPED_GROUPS, // Not used at the moment.
  REDUCE_SKIPPED_RECORDS, // Not used at the moment.
  SPLIT_RAW_BYTES,

  COMBINE_INPUT_RECORDS,
  COMBINE_OUTPUT_RECORDS, // Not used at the moment.

  /**
   * Number of records written to disk in case of OnFileSortedOutput.
   *
   * Number of additional records written out to disk in case of
   * ShuffledMergedInput; this represents the number of unnecessary spills to
   * disk caused by lac of memory.
   */
  SPILLED_RECORDS,

  /**
   * Number of Inputs from which data is copied. Represents physical Inputs.
   */
  NUM_SHUFFLED_INPUTS,

  /**
   * Number of Inputs from which data was not copied - typically due to an empty Input
   */
  NUM_SKIPPED_INPUTS,

  /**
   * Number of failed copy attempts (physical inputs)
   */
  NUM_FAILED_SHUFFLE_INPUTS,

  MERGED_MAP_OUTPUTS,
  GC_TIME_MILLIS,
  CPU_MILLISECONDS,
  /** Wall clock time taken by the task initialization and execution. */
  WALL_CLOCK_MILLISECONDS,
  PHYSICAL_MEMORY_BYTES,
  VIRTUAL_MEMORY_BYTES,
  COMMITTED_HEAP_BYTES,

  /**
   * Represents the number of Input Records that were actually processed.
   * Used by MRInput and ShuffledUnorderedKVInput
   *
   */
  INPUT_RECORDS_PROCESSED,

  /**
   * Number bytes for a task context, currently used by MRInput.
   */
  INPUT_SPLIT_LENGTH_BYTES,

  //
  /**
   * Represents the number of actual output records.
   * Used by MROutput, OnFileSortedOutput, and OnFileUnorderedKVOutput
   */
  OUTPUT_RECORDS,

  /**
   * Approximate number of input records that should be processed as the event keeps arriving from
   * inputs.
   * //TODO: As of now supporting broadcast data only.
   */
  APPROXIMATE_INPUT_RECORDS,

  /**
   * Represent the number of large records in the output - typically, records which are
   * spilled directly
   */
  OUTPUT_LARGE_RECORDS,

  SKIPPED_RECORDS, // Not used at the moment.

  /**
   * Represents the serialized output size (uncompressed) of data being written.
   */
  OUTPUT_BYTES,

  /**
   * Represents serialized output size (uncompressed) along with any overhead
   * added by the format being used.
   */
  OUTPUT_BYTES_WITH_OVERHEAD,

  /**
   * Represents the actual physical size of the Output generated. This factors
   * in Compression if it is enabled. (Will include actual serialized output
   * size + overhead)
   */
  OUTPUT_BYTES_PHYSICAL,

  /**
   * Bytes written to disk due to unnecessary spills (lac of adequate memory).
   * Used by OnFileSortedOutput and ShuffledMergedInput
   */
  ADDITIONAL_SPILLS_BYTES_WRITTEN,

  /**
   * Bytes read from disk due to previous spills (lac of adequate memory).
   * Used by OnFileSortedOutput and ShuffledMergedInput
   */
  ADDITIONAL_SPILLS_BYTES_READ,

  /**
   * Spills that were generated & read by the same task (unnecessary spills due to lac of
   * adequate memory).
   *
   * Used by OnFileSortedOutput
   */
  ADDITIONAL_SPILL_COUNT,

  /**
   * Number of spill files being offered via shuffle-handler.
   * e.g Without pipelined shuffle, this would be 1. With pipelined shuffle, this could be many
   * as final merge is avoided.
   */
  SHUFFLE_CHUNK_COUNT,

  INPUT_GROUPS, // Not used at the moment. Will eventually replace REDUCE_INPUT_GROUPS

  /**
   * Amount of physical data moved over the wire. Used by Shuffled*Input. Should
   * be a combination of SHUFFLE_BYTES_TO_MEM and SHUFFLE_BYTES_TO_DISK
   */
  SHUFFLE_BYTES,

  /**
   * Uncompressed size of the data being processed by the relevant Shuffle.
   * Includes serialization, file format etc overheads.
   */
  SHUFFLE_BYTES_DECOMPRESSED,

  /**
   * Number of bytes which were shuffled directly to memory.
   */
  SHUFFLE_BYTES_TO_MEM,

  /**
   * Number of bytes which were shuffled directly to disk
   */
  SHUFFLE_BYTES_TO_DISK,

  /**
   * Number of bytes which were read directly from local disk
   */
  SHUFFLE_BYTES_DISK_DIRECT,

  /**
   * Number of Memory to Disk merges performed during sort-merge.
   * Used by ShuffledMergedInput
   */
  NUM_MEM_TO_DISK_MERGES,

  /**
   * Number of disk to disk merges performed during the sort-merge
   */
  NUM_DISK_TO_DISK_MERGES,

  /**
   * Time taken to shuffle data. This includes time taken to fetch the data
   * & merging the data in parallel to fetching when needed.  This also includes any
   * waiting time related to event delays from source.
   *
   * Represented in milliseconds.
   */
  SHUFFLE_PHASE_TIME,

  /**
   * Time taken to merge data retrieved during shuffle.
   *
   * Relative to task start time and expressed in milliseconds.
   */
  MERGE_PHASE_TIME,

  /**
   * First event received from source relative to task start time.
   *
   * Represented in milliseconds
   */
  FIRST_EVENT_RECEIVED,

  /**
   * Last event received from source relative to task start time.
   *
   * Represented in milliseconds
   */
  LAST_EVENT_RECEIVED,


  /**
   * The size of the data that is transmitted via event.
   *
   * Represented in number of bytes
   */
  DATA_BYTES_VIA_EVENT
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy