com.nvidia.spark.rapids.tool.profiling.GenerateTimeline.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rapids-4-spark-tools_2.12 Show documentation
Show all versions of rapids-4-spark-tools_2.12 Show documentation
RAPIDS Accelerator for Apache Spark tools
The newest version!
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.nvidia.spark.rapids.tool.profiling
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import com.nvidia.spark.rapids.tool.ToolTextFileWriter
import org.apache.spark.sql.rapids.tool.profiling.ApplicationInfo
import org.apache.spark.sql.rapids.tool.store.{AccumInfo, AccumNameRef}
abstract class TimelineTiming(
val startTime: Long,
val endTime: Long)
class TimelineTaskInfo(val stageId: Int, val taskId: Long,
startTime: Long, endTime: Long, val duration: Long,
val deserTime: Long, val readTime: Long, val semTime: Long, val opTime: Long,
val writeTime: Long)
extends TimelineTiming(startTime, endTime)
class TimelineStageInfo(val stageId: Int,
startTime: Long,
endTime:Long,
val duration: Long) extends TimelineTiming(startTime, endTime)
class TimelineJobInfo(val jobId: Int,
startTime: Long,
endTime: Long,
val duration: Long) extends TimelineTiming(startTime, endTime)
class TimelineSqlInfo(val sqlId: Long,
startTime: Long,
endTime: Long,
val duration: Long) extends TimelineTiming(startTime, endTime)
/**
* Generates an SVG graph that is used to show cluster timeline.
*/
object GenerateTimeline {
private val TASK_HEIGHT = 20
private val TITLE_BOX_WIDTH = 200
private val PADDING = 5
private val FONT_SIZE = 14
private val TITLE_HEIGHT = FONT_SIZE + (PADDING * 2)
private val FOOTER_HEIGHT = FONT_SIZE + (PADDING * 2)
private val MS_PER_PIXEL = 5.0
// Generated using https://mokole.com/palette.html
private val COLORS = Array(
"#696969",
"#dcdcdc",
"#556b2f",
"#8b4513",
"#483d8b",
"#008000",
"#3cb371",
"#008b8b",
"#000080",
"#800080",
"#b03060",
"#ff4500",
"#ffa500",
// Going to be used by lines/etc "#00ff00",
"#8a2be2",
"#00ff7f",
"#dc143c",
"#00ffff",
"#00bfff",
"#f4a460",
"#0000ff",
"#f08080",
"#adff2f",
"#da70d6",
"#ff00ff",
"#1e90ff",
"#eee8aa",
"#ffff54",
"#ff1493",
"#7b68ee")
def calcLayoutSlotsNeeded[A <: TimelineTiming](toSchedule: Iterable[A]): Int = {
val slotsFreeUntil = ArrayBuffer[Long]()
computeLayout(toSchedule, (_: A, _: Int) => (), false, slotsFreeUntil)
slotsFreeUntil.length
}
def doLayout[A <: TimelineTiming](
toSchedule: Iterable[A],
numSlots: Int)(scheduleCallback: (A, Int) => Unit): Unit = {
val slotsFreeUntil = new Array[Long](numSlots).toBuffer
computeLayout(toSchedule, scheduleCallback, true, slotsFreeUntil)
}
def computeLayout[A <: TimelineTiming](
toSchedule: Iterable[A],
scheduleCallback: (A, Int) => Unit,
errorOnMissingSlot: Boolean,
slotsFreeUntil: mutable.Buffer[Long]): Unit = {
toSchedule.toSeq.sortWith {
case (a, b) => a.startTime < b.startTime
}.foreach { timing =>
val startTime = timing.startTime
val slot = slotsFreeUntil.indices
// There is some slop in how Spark reports this. Not sure why...
.find(i => (startTime + 1) >= slotsFreeUntil(i))
.getOrElse {
if (errorOnMissingSlot) {
throw new IllegalStateException("Not enough slots to schedule")
} else {
// Add a slot
slotsFreeUntil.append(0L)
slotsFreeUntil.length - 1
}
}
slotsFreeUntil(slot) = timing.endTime
scheduleCallback(timing, slot)
}
}
private def textBoxVirtCentered(
text: String,
x: Number,
y: Long,
fileWriter: ToolTextFileWriter): Unit =
fileWriter.write(
s"""$text
|""".stripMargin)
private def sectionBox(
text: String,
yStart: Long,
numElements: Int,
fileWriter: ToolTextFileWriter): Unit = {
val boxHeight = numElements * TASK_HEIGHT
val boxMiddleY = boxHeight/2 + yStart
// Draw a box for the Host
fileWriter.write(
s"""
|""".stripMargin)
textBoxVirtCentered(text, PADDING * 2, boxMiddleY, fileWriter)
}
private def subTimingMark(
color: String,
startTime: Long,
endTime: Long,
slot: Int,
xStart: Long,
yStart: Long,
minStart: Long,
fileWriter: ToolTextFileWriter): Unit = {
val x = xStart + (startTime - minStart) / MS_PER_PIXEL
val y = (slot * TASK_HEIGHT) + yStart + TASK_HEIGHT / 2
val width = (endTime - startTime) / MS_PER_PIXEL
val height = TASK_HEIGHT / 2
fileWriter.write(
s"""
|""".stripMargin)
}
private def timingBox[A <: TimelineTiming](
text: String,
color: String,
timing: A,
slot: Int,
xStart: Long,
yStart: Long,
minStart: Long,
fileWriter: ToolTextFileWriter,
subMarks: Seq[(String, Long)] = Seq.empty): Unit = {
val startTime = timing.startTime
val endTime = timing.endTime
val x = xStart + (startTime - minStart) / MS_PER_PIXEL
val y = (slot * TASK_HEIGHT) + yStart
val width = (endTime - startTime) / MS_PER_PIXEL
fileWriter.write(
s"""
|""".stripMargin)
var subStart = startTime
subMarks.foreach {
case (subColor, subTimeLength) =>
subTimingMark(subColor,
subStart,
subStart + subTimeLength,
slot,
xStart,
yStart,
minStart,
fileWriter)
subStart = subStart + subTimeLength
}
textBoxVirtCentered(text, x, y + TASK_HEIGHT / 2, fileWriter)
}
private def scaleWithLines(x: Long,
y: Long,
minStart: Long,
maxFinish: Long,
height: Long,
fileWriter: ToolTextFileWriter): Unit = {
val timeRange = maxFinish - minStart
val xEnd = x + timeRange / MS_PER_PIXEL
val yEnd = y + height
fileWriter.write(
s"""
|
|""".stripMargin)
(0L until timeRange).by(100L).foreach { timeForTick =>
val xTick = timeForTick / MS_PER_PIXEL + x
fileWriter.write(
s"""
|""".stripMargin)
if (timeForTick % 1000 == 0) {
fileWriter.write(
s"""
|$timeForTick ms
|""".stripMargin)
}
}
}
private def calcTimingHeights(slots: Int): Int = slots * TASK_HEIGHT
def generateFor(app: ApplicationInfo, outputDirectory: String): Unit = {
// Gather the data
val execHostToTaskList = new mutable.TreeMap[String, ArrayBuffer[TimelineTaskInfo]]()
val stageIdToColor = mutable.HashMap[Int, String]()
var colorIndex = 0
var minStartTime = Long.MaxValue
var maxEndTime = 0L
// We want to show sub-timings here too.
// semaphore wait time
// scan time
// op time
val semWaitIds = new mutable.HashSet[Long]()
val readTimeIds = new mutable.HashSet[Long]()
val opTimeIds = new mutable.HashSet[Long]()
val writeTimeIds = new mutable.HashSet[Long]()
app.planMetricProcessor.allSQLMetrics.foreach { f =>
f.name match {
case "op time" | "GPU decode time" | "GPU Time" if f.metricType == "nsTiming" =>
opTimeIds += f.accumulatorId
case "GPU semaphore wait time" if f.metricType == "nsTiming" =>
semWaitIds += f.accumulatorId
case "buffer time" if f.metricType == "nsTiming" =>
readTimeIds += f.accumulatorId
case "write time" if f.metricType == "nsTiming" =>
writeTimeIds += f.accumulatorId
case _ =>
}
}
val semMetricsNs = semWaitIds.toList
.flatMap(app.accumManager.accumInfoMap.get)
.flatMap(_.taskUpdatesMap.values).sum
val semMetricsMs = app.accumManager.accumInfoMap.flatMap {
case (_,accumInfo: AccumInfo)
if accumInfo.infoRef.name == AccumNameRef.NAMES_TABLE.get("gpuSemaphoreWait") =>
Some(accumInfo.taskUpdatesMap.values.sum)
case _ => None
}.sum
val readMetrics = readTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)
val opMetrics = opTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)
val writeMetrics = writeTimeIds.toList.flatMap(app.accumManager.accumInfoMap.get)
app.taskManager.getAllTasks().foreach { tc =>
val host = tc.host
val execId = tc.executorId
val stageId = tc.stageId
val taskId = tc.taskId
val launchTime = tc.launchTime
val finishTime = tc.finishTime
val duration = tc.duration
val semTimeMs = ( semMetricsNs / 1000000) + semMetricsMs
val readTimeMs = readMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000 +
tc.sr_fetchWaitTime
val opTimeMs = opMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000
val writeTimeMs = writeMetrics.flatMap(_.taskUpdatesMap.get(taskId)).sum / 1000000 +
tc.sw_writeTime
val taskInfo = new TimelineTaskInfo(stageId, taskId, launchTime, finishTime, duration,
tc.executorDeserializeTime, readTimeMs, semTimeMs, opTimeMs, writeTimeMs)
val execHost = s"$execId/$host"
execHostToTaskList.getOrElseUpdate(execHost, ArrayBuffer.empty) += taskInfo
minStartTime = Math.min(launchTime, minStartTime)
maxEndTime = Math.max(finishTime, maxEndTime)
}
val allStageIds = app.stageManager.getAllStageIds.toSeq
allStageIds.sorted.foreach { stageId =>
stageIdToColor.getOrElseUpdate(stageId, {
val color = COLORS(colorIndex % COLORS.length)
colorIndex += 1
color
})
}
val stageRangeInfo = execHostToTaskList.values.flatMap { taskList =>
taskList
}.groupBy { taskInfo =>
taskInfo.stageId
}.map {
case (stageId, iter) =>
val start = iter.map(_.startTime).min
val end = iter.map(_.endTime).max
new TimelineStageInfo(stageId, start, end, end-start)
}
val stageInfo = app.stageManager.getAllStages.flatMap { case sm =>
if (sm.stageInfo.completionTime.isDefined) {
val stageId = sm.stageInfo.stageId
val submissionTime = sm.stageInfo.submissionTime.get
val completionTime = sm.stageInfo.completionTime.get
val duration = sm.getDuration
minStartTime = Math.min(minStartTime, submissionTime)
maxEndTime = Math.max(maxEndTime, completionTime)
Some(new TimelineStageInfo(stageId, submissionTime, completionTime, duration))
} else {
None
}
}
val execHostToSlots = execHostToTaskList.map {
case (execHost, taskList) =>
(execHost, calcLayoutSlotsNeeded(taskList))
}.toMap
val jobInfo = app.jobIdToInfo.flatMap { case (_, jc) =>
if (jc.endTime.isDefined && jc.duration.isDefined) {
val jobId = jc.jobID
val startTime = jc.startTime
val endTime = jc.endTime.get
val duration = jc.duration.get
minStartTime = Math.min(minStartTime, startTime)
maxEndTime = Math.max(maxEndTime, endTime)
Some( new TimelineJobInfo(jobId, startTime, endTime, duration))
} else {
None
}
}
val sqlInfo = app.sqlIdToInfo.flatMap { case (_, sc) =>
// If a SQL op fails, it may not have an end-time with it (So remove it from the graph)
if (sc.endTime.isDefined) {
val sqlId = sc.sqlID
val startTime = sc.startTime
val endTime = sc.endTime.get
val duration = sc.duration.get
minStartTime = Math.min(minStartTime, startTime)
maxEndTime = Math.max(maxEndTime, endTime)
Some(new TimelineSqlInfo(sqlId, startTime, endTime, duration))
} else {
None
}
}
// Add 1 second for padding at the end...
maxEndTime += 1000
// Do the high level layout of what the output page should look like
// TITLE
// EXEC(s) | TASK TIMING
// STAGES | STAGE TIMING (Scheduled Stage to completed Stage)
// STAGE RANGES | STAGE RANGE TIMING (Start of first task to end of the last task in stage)
// JOBS | JOB TIMING
// SQLS | SQL TIMING
val titleStartX = PADDING
val titleStartY = 0
val titleEndY = titleStartY + TITLE_HEIGHT
// All of the timings start at the same place
val titleBoxStartX = PADDING
val titleBoxWidth = TITLE_BOX_WIDTH
val timingsStartX = titleBoxStartX + titleBoxWidth
val timingsWidth = (maxEndTime - minStartTime)/MS_PER_PIXEL
val timingsEndX = timingsStartX + timingsWidth
// EXEC(s)
val execsStartY = titleEndY
val numExecTaskSlotsTotal = execHostToSlots.values.sum
val execsHeight = calcTimingHeights(numExecTaskSlotsTotal)
val execsWithFooterHeight = execsHeight + FOOTER_HEIGHT
val execsEndY = execsStartY + execsWithFooterHeight
// STAGES
val stagesStartY = execsEndY
val numStageSlots = calcLayoutSlotsNeeded(stageInfo)
val stagesHeight = calcTimingHeights(numStageSlots)
val stagesWithFooterHeight = stagesHeight + FOOTER_HEIGHT
val stagesEndY = stagesStartY + stagesWithFooterHeight
// STAGE RANGES
val stageRangesStartY = stagesEndY
val numStageRangeSlots = calcLayoutSlotsNeeded(stageRangeInfo)
val stageRangesHeight = calcTimingHeights(numStageRangeSlots)
val stageRangesWithFooterHeight = stageRangesHeight + FOOTER_HEIGHT
val stageRangesEndY = stageRangesStartY + stageRangesWithFooterHeight
// JOBS
val jobsStartY = stageRangesEndY
val numJobsSlots = calcLayoutSlotsNeeded(jobInfo)
val jobsHeight = calcTimingHeights(numJobsSlots)
val jobsWithFooterHeight = jobsHeight + FOOTER_HEIGHT
val jobsEndY = jobsStartY + jobsWithFooterHeight
// SQLS
val sqlsStartY = jobsEndY
val numSqlsSlots = calcLayoutSlotsNeeded(sqlInfo)
val sqlsHeight = calcTimingHeights(numSqlsSlots)
val sqlsWithFooterHeight = sqlsHeight + FOOTER_HEIGHT
val sqlsEndY = sqlsStartY + sqlsWithFooterHeight
// TOTAL IMAGE
val imageHeight = sqlsEndY + PADDING
val imageWidth = timingsEndX
val fileWriter = new ToolTextFileWriter(outputDirectory,
s"timeline.svg", "Timeline file")
try {
fileWriter.write(
s"""
|
|
|""")
} finally {
fileWriter.close()
}
}
}