org.apache.spark.ui.jobs.JobPage.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.ui.jobs
import java.util.Locale
import javax.servlet.http.HttpServletRequest
import scala.collection.mutable.{Buffer, ListBuffer}
import scala.xml.{Node, NodeSeq, Unparsed, Utility}
import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.JobExecutionStatus
import org.apache.spark.resource.ResourceProfile
import org.apache.spark.status.AppStatusStore
import org.apache.spark.status.api.v1
import org.apache.spark.ui._
/** Page showing statistics and stage list for a given job */
private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIPage("job") {
private val STAGES_LEGEND =
.toString.filter(_ != '\n')
private val EXECUTORS_LEGEND =
.toString.filter(_ != '\n')
private def makeStageEvent(stageInfos: Seq[v1.StageData]): Seq[String] = {
stageInfos.map { stage =>
val stageId = stage.stageId
val attemptId = stage.attemptId
val name = stage.name
val status = stage.status.toString.toLowerCase(Locale.ROOT)
val submissionTime = stage.submissionTime.get.getTime()
val completionTime = stage.completionTime.map(_.getTime())
.getOrElse(System.currentTimeMillis())
// The timeline library treats contents as HTML, so we have to escape them. We need to add
// extra layers of escaping in order to embed this in a JavaScript string literal.
val escapedName = Utility.escape(name)
val jsEscapedNameForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedName))
val jsEscapedNameForLabel = StringEscapeUtils.escapeEcmaScript(escapedName)
s"""
|{
| 'className': 'stage job-timeline-object ${status}',
| 'group': 'stages',
| 'start': new Date(${submissionTime}),
| 'end': new Date(${completionTime}),
| 'content': '' +
| 'Status: ${status.toUpperCase(Locale.ROOT)}
' +
| 'Submitted: ${UIUtils.formatDate(submissionTime)}' +
| '${
if (status != " running") { s"""
Completed: ${UIUtils.formatDate(completionTime)}"""
} else {
""
}
}">' +
| '${jsEscapedNameForLabel} (Stage ${stageId}.${attemptId})',
|}
""".stripMargin
}
}
def makeExecutorEvent(executors: Seq[v1.ExecutorSummary]): Seq[String] = {
val events = ListBuffer[String]()
executors.foreach { e =>
val addedEvent =
s"""
|{
| 'className': 'executor added',
| 'group': 'executors',
| 'start': new Date(${e.addTime.getTime()}),
| 'content': '' +
| 'Added at ${UIUtils.formatDate(e.addTime)}" ' + | 'data-html="true">Executor ${e.id} added'
|}
""".stripMargin
events += addedEvent
e.removeTime.foreach { removeTime =>
val removedEvent =
s"""
|{
| 'className': 'executor removed',
| 'group': 'executors',
| 'start': new Date(${removeTime.getTime()}),
| 'content': '' +
| 'Removed at ${UIUtils.formatDate(removeTime)}' +
| '${
e.removeReason.map { reason =>
s" ""
Reason: ${StringEscapeUtils.escapeEcmaScript(
reason.replace("\n", " "))}"""
}.getOrElse("")
}"' +
| 'data-html="true">Executor ${e.id} removed'
|}
""".stripMargin
events += removedEvent
}
}
events.toSeq
}
private def makeTimeline(
stages: Seq[v1.StageData],
executors: Seq[v1.ExecutorSummary],
appStartTime: Long): Seq[Node] = {
val stageEventJsonAsStrSeq = makeStageEvent(stages)
val executorsJsonAsStrSeq = makeExecutorEvent(executors)
val groupJsonArrayAsStr =
s"""
|[
| {
| 'id': 'executors',
| 'content': 'Executors${EXECUTORS_LEGEND}',
| },
| {
| 'id': 'stages',
| 'content': 'Stages${STAGES_LEGEND}',
| }
|]
""".stripMargin
val eventArrayAsStr =
(stageEventJsonAsStrSeq ++ executorsJsonAsStrSeq).mkString("[", ",", "]")
++
Enable zooming
++
}
def render(request: HttpServletRequest): Seq[Node] = {
val parameterId = request.getParameter("id")
require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
val jobId = parameterId.toInt
val (jobData, sqlExecutionId) = store.asOption(store.jobWithAssociatedSql(jobId)).getOrElse {
val content =
No information to display for job {jobId}
return UIUtils.headerSparkPage(
request, s"Details for Job $jobId", content, parent)
}
val isComplete = jobData.status != JobExecutionStatus.RUNNING
val stages = jobData.stageIds.map { stageId =>
// This could be empty if the listener hasn't received information about the
// stage or if the stage information has been garbage collected
store.asOption(store.lastStageAttempt(stageId)).getOrElse {
new v1.StageData(
status = v1.StageStatus.PENDING,
stageId = stageId,
attemptId = 0,
numTasks = 0,
numActiveTasks = 0,
numCompleteTasks = 0,
numFailedTasks = 0,
numKilledTasks = 0,
numCompletedIndices = 0,
submissionTime = None,
firstTaskLaunchedTime = None,
completionTime = None,
failureReason = None,
executorDeserializeTime = 0L,
executorDeserializeCpuTime = 0L,
executorRunTime = 0L,
executorCpuTime = 0L,
resultSize = 0L,
jvmGcTime = 0L,
resultSerializationTime = 0L,
memoryBytesSpilled = 0L,
diskBytesSpilled = 0L,
peakExecutionMemory = 0L,
inputBytes = 0L,
inputRecords = 0L,
outputBytes = 0L,
outputRecords = 0L,
shuffleRemoteBlocksFetched = 0L,
shuffleLocalBlocksFetched = 0L,
shuffleFetchWaitTime = 0L,
shuffleRemoteBytesRead = 0L,
shuffleRemoteBytesReadToDisk = 0L,
shuffleLocalBytesRead = 0L,
shuffleReadBytes = 0L,
shuffleReadRecords = 0L,
shuffleWriteBytes = 0L,
shuffleWriteTime = 0L,
shuffleWriteRecords = 0L,
name = "Unknown",
description = None,
details = "Unknown",
schedulingPool = null,
rddIds = Nil,
accumulatorUpdates = Nil,
tasks = None,
executorSummary = None,
killedTasksSummary = Map(),
ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID,
peakExecutorMetrics = None)
}
}
val activeStages = Buffer[v1.StageData]()
val completedStages = Buffer[v1.StageData]()
// If the job is completed, then any pending stages are displayed as "skipped":
val pendingOrSkippedStages = Buffer[v1.StageData]()
val failedStages = Buffer[v1.StageData]()
for (stage <- stages) {
if (stage.submissionTime.isEmpty) {
pendingOrSkippedStages += stage
} else if (stage.completionTime.isDefined) {
if (stage.status == v1.StageStatus.FAILED) {
failedStages += stage
} else {
completedStages += stage
}
} else {
activeStages += stage
}
}
val basePath = "jobs/job"
val pendingOrSkippedTableId =
if (isComplete) {
"skipped"
} else {
"pending"
}
val activeStagesTable =
new StageTableBase(store, request, activeStages.toSeq, "active", "activeStage",
parent.basePath, basePath, parent.isFairScheduler,
killEnabled = parent.killEnabled, isFailedStage = false)
val pendingOrSkippedStagesTable =
new StageTableBase(store, request, pendingOrSkippedStages.toSeq, pendingOrSkippedTableId,
"pendingStage", parent.basePath, basePath, parent.isFairScheduler,
killEnabled = false, isFailedStage = false)
val completedStagesTable =
new StageTableBase(store, request, completedStages.toSeq, "completed", "completedStage",
parent.basePath, basePath, parent.isFairScheduler,
killEnabled = false, isFailedStage = false)
val failedStagesTable =
new StageTableBase(store, request, failedStages.toSeq, "failed", "failedStage",
parent.basePath, basePath, parent.isFairScheduler,
killEnabled = false, isFailedStage = true)
val shouldShowActiveStages = activeStages.nonEmpty
val shouldShowPendingStages = !isComplete && pendingOrSkippedStages.nonEmpty
val shouldShowCompletedStages = completedStages.nonEmpty
val shouldShowSkippedStages = isComplete && pendingOrSkippedStages.nonEmpty
val shouldShowFailedStages = failedStages.nonEmpty
val summary: NodeSeq =
-
Status:
{jobData.status}
-
Submitted:
{JobDataUtil.getFormattedSubmissionTime(jobData)}
-
Duration:
{JobDataUtil.getFormattedDuration(jobData)}
{
if (sqlExecutionId.isDefined) {
-
Associated SQL Query:
{{sqlExecutionId.get}}
}
}
{
if (jobData.jobGroup.isDefined) {
-
Job Group:
{jobData.jobGroup.get}
}
}
{
if (shouldShowActiveStages) {
-
Active Stages:
{activeStages.size}
}
}
{
if (shouldShowPendingStages) {
-
Pending Stages:
{pendingOrSkippedStages.size}
}
}
{
if (shouldShowCompletedStages) {
-
Completed Stages:
{completedStages.size}
}
}
{
if (shouldShowSkippedStages) {
-
Skipped Stages:
{pendingOrSkippedStages.size}
}
}
{
if (shouldShowFailedStages) {
-
Failed Stages:
{failedStages.size}
}
}
var content = summary
val appStartTime = store.applicationInfo().attempts.head.startTime.getTime()
content ++= makeTimeline((activeStages ++ completedStages ++ failedStages).toSeq,
store.executorList(false), appStartTime)
val operationGraphContent = store.asOption(store.operationGraphForJob(jobId)) match {
case Some(operationGraph) => UIUtils.showDagVizForJob(jobId, operationGraph)
case None =>
No DAG visualization information to display for job {jobId}
}
content ++= operationGraphContent
if (shouldShowActiveStages) {
content ++=
Active Stages ({activeStages.size})
++
{activeStagesTable.toNodeSeq}
}
if (shouldShowPendingStages) {
content ++=
Pending Stages ({pendingOrSkippedStages.size})
++
{pendingOrSkippedStagesTable.toNodeSeq}
}
if (shouldShowCompletedStages) {
content ++=
Completed Stages ({completedStages.size})
++
{completedStagesTable.toNodeSeq}
}
if (shouldShowSkippedStages) {
content ++=
Skipped Stages ({pendingOrSkippedStages.size})
++
{pendingOrSkippedStagesTable.toNodeSeq}
}
if (shouldShowFailedStages) {
content ++=
Failed Stages ({failedStages.size})
++
{failedStagesTable.toNodeSeq}
}
UIUtils.headerSparkPage(
request, s"Details for Job $jobId", content, parent, showVisualization = true)
}
}