All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.nvidia.spark.rapids.tool.profiling.GenerateTimeline.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2021-2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.nvidia.spark.rapids.tool.profiling

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import com.nvidia.spark.rapids.tool.ToolTextFileWriter

import org.apache.spark.sql.rapids.tool.profiling.ApplicationInfo
import org.apache.spark.sql.rapids.tool.store.{AccumInfo, AccumNameRef}

abstract class TimelineTiming(
    val startTime: Long,
    val endTime: Long)

class TimelineTaskInfo(val stageId: Int, val taskId: Long,
    startTime: Long, endTime: Long, val duration: Long,
    val deserTime: Long, val readTime: Long, val semTime: Long, val opTime: Long,
    val writeTime: Long)
    extends TimelineTiming(startTime, endTime)

class TimelineStageInfo(val stageId: Int,
    startTime: Long,
    endTime:Long,
    val duration: Long) extends TimelineTiming(startTime, endTime)

class TimelineJobInfo(val jobId: Int,
    startTime: Long,
    endTime: Long,
    val duration: Long) extends TimelineTiming(startTime, endTime)

class TimelineSqlInfo(val sqlId: Long,
    startTime: Long,
    endTime: Long,
    val duration: Long) extends TimelineTiming(startTime, endTime)

/**
 * Generates an SVG graph that is used to show cluster timeline.
 */
object GenerateTimeline {
  private val TASK_HEIGHT = 20
  private val TITLE_BOX_WIDTH = 200
  private val PADDING = 5
  private val FONT_SIZE = 14
  private val TITLE_HEIGHT = FONT_SIZE + (PADDING * 2)
  private val FOOTER_HEIGHT = FONT_SIZE + (PADDING * 2)
  private val MS_PER_PIXEL = 5.0

  // Generated using https://mokole.com/palette.html
  private val COLORS = Array(
    "#696969",
    "#dcdcdc",
    "#556b2f",
    "#8b4513",
    "#483d8b",
    "#008000",
    "#3cb371",
    "#008b8b",
    "#000080",
    "#800080",
    "#b03060",
    "#ff4500",
    "#ffa500",
    // Going to be used by lines/etc "#00ff00",
    "#8a2be2",
    "#00ff7f",
    "#dc143c",
    "#00ffff",
    "#00bfff",
    "#f4a460",
    "#0000ff",
    "#f08080",
    "#adff2f",
    "#da70d6",
    "#ff00ff",
    "#1e90ff",
    "#eee8aa",
    "#ffff54",
    "#ff1493",
    "#7b68ee")

  def calcLayoutSlotsNeeded[A <: TimelineTiming](toSchedule: Iterable[A]): Int = {
    val slotsFreeUntil = ArrayBuffer[Long]()
    computeLayout(toSchedule, (_: A, _: Int) => (), false, slotsFreeUntil)
    slotsFreeUntil.length
  }

  def doLayout[A <: TimelineTiming](
      toSchedule: Iterable[A],
      numSlots: Int)(scheduleCallback: (A, Int) => Unit): Unit = {
    val slotsFreeUntil = new Array[Long](numSlots).toBuffer
    computeLayout(toSchedule, scheduleCallback, true, slotsFreeUntil)
  }

  def computeLayout[A <: TimelineTiming](
      toSchedule: Iterable[A],
      scheduleCallback: (A, Int) => Unit,
      errorOnMissingSlot: Boolean,
      slotsFreeUntil: mutable.Buffer[Long]): Unit = {
    toSchedule.toSeq.sortWith {
      case (a, b) => a.startTime < b.startTime
    }.foreach { timing =>
      val startTime = timing.startTime
      val slot = slotsFreeUntil.indices
          // There is some slop in how Spark reports this. Not sure why...
          .find(i => (startTime + 1) >= slotsFreeUntil(i))
          .getOrElse {
            if (errorOnMissingSlot) {
              throw new IllegalStateException("Not enough slots to schedule")
            } else {
              // Add a slot
              slotsFreeUntil.append(0L)
              slotsFreeUntil.length - 1
            }
          }
      slotsFreeUntil(slot) = timing.endTime
      scheduleCallback(timing, slot)
    }
  }

  private def textBoxVirtCentered(
      text: String,
      x: Number,
      y: Long,
      fileWriter: ToolTextFileWriter): Unit =
    fileWriter.write(
      s"""$text
         |""".stripMargin)

  private def sectionBox(
      text: String,
      yStart: Long,
      numElements: Int,
      fileWriter: ToolTextFileWriter): Unit = {
    val boxHeight = numElements * TASK_HEIGHT
    val boxMiddleY = boxHeight/2 + yStart
    // Draw a box for the Host
    fileWriter.write(
      s"""
         |""".stripMargin)
    textBoxVirtCentered(text, PADDING * 2, boxMiddleY, fileWriter)
  }

  private def subTimingMark(
      color: String,
      startTime: Long,
      endTime: Long,
      slot: Int,
      xStart: Long,
      yStart: Long,
      minStart: Long,
      fileWriter: ToolTextFileWriter): Unit = {
    val x = xStart + (startTime - minStart) / MS_PER_PIXEL
    val y = (slot * TASK_HEIGHT) + yStart + TASK_HEIGHT / 2
    val width = (endTime - startTime) / MS_PER_PIXEL
    val height = TASK_HEIGHT / 2
    fileWriter.write(
      s"""
         |""".stripMargin)
  }

  private def timingBox[A <: TimelineTiming](
      text: String,
      color: String,
      timing: A,
      slot: Int,
      xStart: Long,
      yStart: Long,
      minStart: Long,
      fileWriter: ToolTextFileWriter,
      subMarks: Seq[(String, Long)] = Seq.empty): Unit = {
    val startTime = timing.startTime
    val endTime = timing.endTime
    val x = xStart + (startTime - minStart) / MS_PER_PIXEL
    val y = (slot * TASK_HEIGHT) + yStart
    val width = (endTime - startTime) / MS_PER_PIXEL
    fileWriter.write(
      s"""
         |""".stripMargin)
    var subStart = startTime
    subMarks.foreach {
      case (subColor, subTimeLength) =>
        subTimingMark(subColor,
          subStart,
          subStart + subTimeLength,
          slot,
          xStart,
          yStart,
          minStart,
          fileWriter)
        subStart = subStart + subTimeLength
    }
    textBoxVirtCentered(text, x, y + TASK_HEIGHT / 2, fileWriter)
  }

  private def scaleWithLines(x: Long,
      y: Long,
      minStart: Long,
      maxFinish: Long,
      height: Long,
      fileWriter: ToolTextFileWriter): Unit = {
    val timeRange = maxFinish - minStart
    val xEnd = x + timeRange / MS_PER_PIXEL
    val yEnd = y + height
    fileWriter.write(
      s"""
         |
         |""".stripMargin)
    (0L until timeRange).by(100L).foreach { timeForTick =>
      val xTick = timeForTick / MS_PER_PIXEL + x
      fileWriter.write(
        s"""
           |""".stripMargin)
      if (timeForTick % 1000 == 0) {
        fileWriter.write(
          s"""
             |$timeForTick ms
             |""".stripMargin)
      }
    }
  }

  private def calcTimingHeights(slots: Int): Int = slots * TASK_HEIGHT

  def generateFor(app: ApplicationInfo, outputDirectory: String): Unit = {
    // Gather the data
    val execHostToTaskList = new mutable.TreeMap[String, ArrayBuffer[TimelineTaskInfo]]()
    val stageIdToColor = mutable.HashMap[Int, String]()
    var colorIndex = 0
    var minStartTime = Long.MaxValue
    var maxEndTime = 0L

    // We want to show sub-timings here too.
    // semaphore wait time
    // scan time
    // op time
    val semWaitIds = new mutable.HashSet[Long]()
    val readTimeIds = new mutable.HashSet[Long]()
    val opTimeIds = new mutable.HashSet[Long]()
    val writeTimeIds = new mutable.HashSet[Long]()
    app.planMetricProcessor.allSQLMetrics.foreach { f =>
      f.name match {
        case "op time" | "GPU decode time" | "GPU Time" if f.metricType == "nsTiming" =>
          opTimeIds += f.accumulatorId
        case "GPU semaphore wait time" if f.metricType == "nsTiming" =>
          semWaitIds += f.accumulatorId
        case "buffer time" if f.metricType == "nsTiming" =>
          readTimeIds += f.accumulatorId
        case "write time" if f.metricType == "nsTiming" =>
          writeTimeIds += f.accumulatorId
        case _ =>
      }
    }

    val semMetricsNs = semWaitIds.toList
      .flatMap(app.accumManager.accumInfoMap.get)
      .flatMap(_.taskUpdatesMap.values).sum

    val semMetricsMs = app.accumManager.accumInfoMap.flatMap {
        case (_,accumInfo: AccumInfo)
            if accumInfo.infoRef.name == AccumNameRef.NAMES_TABLE.get("gpuSemaphoreWait") =>
            Some(accumInfo.taskUpdatesMap.values.sum)
        case _ => None
      }.sum

    val readMetrics = readTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)

    val opMetrics = opTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)

    val writeMetrics = writeTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)

    app.taskManager.getAllTasks().foreach { tc =>
      val host = tc.host
      val execId = tc.executorId
      val stageId = tc.stageId
      val taskId = tc.taskId
      val launchTime = tc.launchTime
      val finishTime = tc.finishTime
      val duration = tc.duration
      val semTimeMs = ( semMetricsNs / 1000000) + semMetricsMs
      val readTimeMs = readMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000 +
        tc.sr_fetchWaitTime
      val opTimeMs = opMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000
      val writeTimeMs = writeMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000 +
        tc.sw_writeTime
      val taskInfo = new TimelineTaskInfo(stageId, taskId, launchTime, finishTime, duration,
        tc.executorDeserializeTime, readTimeMs, semTimeMs, opTimeMs, writeTimeMs)
      val execHost = s"$execId/$host"
      execHostToTaskList.getOrElseUpdate(execHost, ArrayBuffer.empty) += taskInfo
      minStartTime = Math.min(launchTime, minStartTime)
      maxEndTime = Math.max(finishTime, maxEndTime)
    }

    val allStageIds = app.stageManager.getAllStageIds.toSeq
    allStageIds.sorted.foreach { stageId =>
      stageIdToColor.getOrElseUpdate(stageId, {
        val color = COLORS(colorIndex % COLORS.length)
        colorIndex += 1
        color
      })
    }

    val stageRangeInfo = execHostToTaskList.values.flatMap { taskList =>
      taskList
    }.groupBy { taskInfo =>
      taskInfo.stageId
    }.map {
      case (stageId, iter) =>
        val start = iter.map(_.startTime).min
        val end = iter.map(_.endTime).max
        new TimelineStageInfo(stageId, start, end, end-start)
    }

    val stageInfo = app.stageManager.getAllStages.flatMap { case sm =>
      if (sm.stageInfo.completionTime.isDefined) {
        val stageId = sm.stageInfo.stageId
        val submissionTime = sm.stageInfo.submissionTime.get
        val completionTime = sm.stageInfo.completionTime.get
        val duration = sm.getDuration
        minStartTime = Math.min(minStartTime, submissionTime)
        maxEndTime = Math.max(maxEndTime, completionTime)
        Some(new TimelineStageInfo(stageId, submissionTime, completionTime, duration))
      } else {
        None
      }
    }

    val execHostToSlots = execHostToTaskList.map {
      case (execHost, taskList) =>
        (execHost, calcLayoutSlotsNeeded(taskList))
    }.toMap

    val jobInfo = app.jobIdToInfo.flatMap { case (_, jc) =>
      if (jc.endTime.isDefined && jc.duration.isDefined) {
        val jobId = jc.jobID
        val startTime = jc.startTime
        val endTime = jc.endTime.get
        val duration = jc.duration.get
        minStartTime = Math.min(minStartTime, startTime)
        maxEndTime = Math.max(maxEndTime, endTime)
        Some( new TimelineJobInfo(jobId, startTime, endTime, duration))
      } else {
        None
      }
    }

    val sqlInfo = app.sqlIdToInfo.flatMap { case (_, sc) =>
      // If a SQL op fails, it may not have an end-time with it (So remove it from the graph)
      if (sc.endTime.isDefined) {
        val sqlId = sc.sqlID
        val startTime = sc.startTime
        val endTime = sc.endTime.get
        val duration = sc.duration.get
        minStartTime = Math.min(minStartTime, startTime)
        maxEndTime = Math.max(maxEndTime, endTime)
        Some(new TimelineSqlInfo(sqlId, startTime, endTime, duration))
      } else {
        None
      }
    }

    // Add 1 second for padding at the end...
    maxEndTime += 1000

    // Do the high level layout of what the output page should look like
    // TITLE
    // EXEC(s)      | TASK TIMING
    // STAGES       | STAGE TIMING (Scheduled Stage to completed Stage)
    // STAGE RANGES | STAGE RANGE TIMING (Start of first task to end of the last task in stage)
    // JOBS         | JOB TIMING
    // SQLS         | SQL TIMING
    val titleStartX = PADDING
    val titleStartY = 0
    val titleEndY = titleStartY + TITLE_HEIGHT

    // All of the timings start at the same place
    val titleBoxStartX = PADDING
    val titleBoxWidth = TITLE_BOX_WIDTH
    val timingsStartX = titleBoxStartX + titleBoxWidth
    val timingsWidth = (maxEndTime - minStartTime)/MS_PER_PIXEL
    val timingsEndX = timingsStartX + timingsWidth

    // EXEC(s)
    val execsStartY = titleEndY
    val numExecTaskSlotsTotal = execHostToSlots.values.sum
    val execsHeight = calcTimingHeights(numExecTaskSlotsTotal)
    val execsWithFooterHeight = execsHeight + FOOTER_HEIGHT
    val execsEndY = execsStartY + execsWithFooterHeight

    // STAGES
    val stagesStartY = execsEndY
    val numStageSlots = calcLayoutSlotsNeeded(stageInfo)
    val stagesHeight = calcTimingHeights(numStageSlots)
    val stagesWithFooterHeight = stagesHeight + FOOTER_HEIGHT
    val stagesEndY = stagesStartY + stagesWithFooterHeight

    // STAGE RANGES
    val stageRangesStartY = stagesEndY
    val numStageRangeSlots = calcLayoutSlotsNeeded(stageRangeInfo)
    val stageRangesHeight = calcTimingHeights(numStageRangeSlots)
    val stageRangesWithFooterHeight = stageRangesHeight + FOOTER_HEIGHT
    val stageRangesEndY = stageRangesStartY + stageRangesWithFooterHeight

    // JOBS
    val jobsStartY = stageRangesEndY
    val numJobsSlots = calcLayoutSlotsNeeded(jobInfo)
    val jobsHeight = calcTimingHeights(numJobsSlots)
    val jobsWithFooterHeight = jobsHeight + FOOTER_HEIGHT
    val jobsEndY = jobsStartY + jobsWithFooterHeight

    // SQLS
    val sqlsStartY = jobsEndY
    val numSqlsSlots = calcLayoutSlotsNeeded(sqlInfo)
    val sqlsHeight = calcTimingHeights(numSqlsSlots)
    val sqlsWithFooterHeight = sqlsHeight + FOOTER_HEIGHT
    val sqlsEndY = sqlsStartY + sqlsWithFooterHeight

    // TOTAL IMAGE
    val imageHeight = sqlsEndY + PADDING
    val imageWidth = timingsEndX

    val fileWriter = new ToolTextFileWriter(outputDirectory,
      s"timeline.svg", "Timeline file")
    try {
      fileWriter.write(
        s"""
           |
           |
           |
           | ${app.appId} Timeline
           |""".stripMargin)
      // TITLE
      textBoxVirtCentered(s"${app.appId} Timeline",
        titleStartX,
        titleStartY + TITLE_HEIGHT/2,
        fileWriter)

      // EXEC(s)
      var currentExecsStartY = execsStartY
      execHostToTaskList.foreach {
        case (execHost, taskList) =>
          val numElements = execHostToSlots(execHost)
          val execHostHeight = calcTimingHeights(numElements)

          sectionBox(execHost, currentExecsStartY, numElements, fileWriter)
          doLayout(taskList, numElements) {
            case (taskInfo, slot) =>
              val subTimings = new ArrayBuffer[(String, Long)]()
              if (taskInfo.deserTime > 0) {
                subTimings += (("yellow", taskInfo.deserTime))
              }
              if (taskInfo.readTime > 0) {
                subTimings += (("white", taskInfo.readTime))
              }
              if (taskInfo.semTime > 0) {
                subTimings += (("red", taskInfo.semTime))
              }
              if (taskInfo.opTime > 0) {
                subTimings += (("green", taskInfo.opTime))
              }
              if (taskInfo.writeTime > 0) {
                subTimings += (("blue", taskInfo.writeTime))
              }
              timingBox(s"${taskInfo.duration} ms",
                stageIdToColor(taskInfo.stageId),
                taskInfo,
                slot,
                timingsStartX,
                currentExecsStartY,
                minStartTime,
                fileWriter,
                subTimings)
          }
          currentExecsStartY += execHostHeight

          // Add a line to show different executors from each other
          fileWriter.write(
            s"""
               |""".stripMargin)
      }

      scaleWithLines(timingsStartX,
        execsStartY,
        minStartTime,
        maxEndTime,
        execsHeight,
        fileWriter)

      // STAGES
      sectionBox("STAGES", stagesStartY, numStageSlots, fileWriter)

      doLayout(stageInfo, numStageSlots) {
        case (si, slot) =>
          timingBox(s"STAGE ${si.stageId} ${si.duration} ms",
            stageIdToColor(si.stageId),
            si,
            slot,
            timingsStartX,
            stagesStartY,
            minStartTime,
            fileWriter)
      }

      scaleWithLines(timingsStartX,
        stagesStartY,
        minStartTime,
        maxEndTime,
        stagesHeight,
        fileWriter)

      // STAGE RANGES
      sectionBox("STAGE RANGES", stageRangesStartY, numStageRangeSlots, fileWriter)

      doLayout(stageRangeInfo, numStageRangeSlots) {
        case (si, slot) =>
          timingBox(s"STAGE RANGE ${si.stageId} ${si.duration} ms",
            stageIdToColor(si.stageId),
            si,
            slot,
            timingsStartX,
            stageRangesStartY,
            minStartTime,
            fileWriter)
      }

      scaleWithLines(timingsStartX,
        stageRangesStartY,
        minStartTime,
        maxEndTime,
        stageRangesHeight,
        fileWriter)

      // JOBS
      sectionBox("JOBS", jobsStartY, numJobsSlots, fileWriter)

      doLayout(jobInfo, numJobsSlots) {
        case (ji, slot) =>
          timingBox(s"JOB ${ji.jobId} ${ji.duration} ms",
            "green",
            ji,
            slot,
            timingsStartX,
            jobsStartY,
            minStartTime,
            fileWriter)
      }
      scaleWithLines(timingsStartX,
        jobsStartY,
        minStartTime,
        maxEndTime,
        jobsHeight,
        fileWriter)

      // SQLS
      sectionBox("SQL", sqlsStartY, numSqlsSlots, fileWriter)

      doLayout(sqlInfo, numSqlsSlots) {
        case (sql, slot) =>
          timingBox(s"SQL ${sql.sqlId} ${sql.duration} ms",
            "blue",
            sql,
            slot,
            timingsStartX,
            sqlsStartY,
            minStartTime,
            fileWriter)
      }
      scaleWithLines(timingsStartX,
        sqlsStartY,
        minStartTime,
        maxEndTime,
        sqlsHeight,
        fileWriter)

      fileWriter.write(s"""""")
    } finally {
      fileWriter.close()
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy