org.apache.spark.streaming.ui.BatchPage.scala Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.streaming.ui
import javax.servlet.http.HttpServletRequest
import scala.xml._
import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.status.api.v1.{JobData, StageData}
import org.apache.spark.streaming.Time
import org.apache.spark.streaming.ui.StreamingJobProgressListener.SparkJobId
import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}
private[ui] case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobData: Option[JobData])
private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
private val streamingListener = parent.listener
private val store = parent.parent.store
private def columns: Seq[Node] = {
Output Op Id
Description
Output Op Duration {SparkUIUtils.tooltip("Time taken for all the jobs of this batch to" +
" finish processing from the time they were submitted.",
"top")}
Status
Job Id
Job Duration {SparkUIUtils.tooltip("Time taken from submission time to completion " +
"time of the job", "top")}
Stages: Succeeded/Total
Tasks (for all stages): Succeeded/Total
Error
}
private def generateJobRow(
request: HttpServletRequest,
outputOpData: OutputOperationUIData,
outputOpDescription: Seq[Node],
formattedOutputOpDuration: String,
numSparkJobRowsInOutputOp: Int,
isFirstRow: Boolean,
jobIdWithData: SparkJobIdWithUIData): Seq[Node] = {
if (jobIdWithData.jobData.isDefined) {
generateNormalJobRow(request, outputOpData, outputOpDescription, formattedOutputOpDuration,
numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.jobData.get)
} else {
generateDroppedJobRow(outputOpData, outputOpDescription, formattedOutputOpDuration,
numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.sparkJobId)
}
}
private def generateOutputOpRowWithoutSparkJobs(
outputOpData: OutputOperationUIData,
outputOpDescription: Seq[Node],
formattedOutputOpDuration: String): Seq[Node] = {
{outputOpData.id.toString}
{outputOpDescription}
{formattedOutputOpDuration}
{outputOpStatusCell(outputOpData, rowspan = 1)}
-
-
-
-
-
}
/**
* Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
* one cell, we use "rowspan" for the first row of an output op.
*/
private def generateNormalJobRow(
request: HttpServletRequest,
outputOpData: OutputOperationUIData,
outputOpDescription: Seq[Node],
formattedOutputOpDuration: String,
numSparkJobRowsInOutputOp: Int,
isFirstRow: Boolean,
sparkJob: JobData): Seq[Node] = {
val duration: Option[Long] = {
sparkJob.submissionTime.map { start =>
val end = sparkJob.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
end - start.getTime()
}
}
val lastFailureReason =
sparkJob.stageIds.sorted(Ordering.Int.reverse).flatMap(getStageData).
dropWhile(_.failureReason == None).take(1). // get the first info that contains failure
flatMap(info => info.failureReason).headOption.getOrElse("")
val formattedDuration = duration.map(d => SparkUIUtils.formatDuration(d)).getOrElse("-")
val detailUrl = s"${SparkUIUtils.prependBaseUri(
request, parent.basePath)}/jobs/job/?id=${sparkJob.jobId}"
// In the first row, output op id and its information needs to be shown. In other rows, these
// cells will be taken up due to "rowspan".
// scalastyle:off
val prefixCells =
if (isFirstRow) {
{outputOpData.id.toString}
{outputOpDescription}
{formattedOutputOpDuration} ++
{outputOpStatusCell(outputOpData, numSparkJobRowsInOutputOp)}
} else {
Nil
}
// scalastyle:on
{prefixCells}
{sparkJob.jobId}{sparkJob.jobGroup.map(id => s"($id)").getOrElse("")}
{formattedDuration}
{sparkJob.numCompletedStages}/{sparkJob.stageIds.size - sparkJob.numSkippedStages}
{if (sparkJob.numFailedStages > 0) s"(${sparkJob.numFailedStages} failed)"}
{if (sparkJob.numSkippedStages > 0) s"(${sparkJob.numSkippedStages} skipped)"}
{
SparkUIUtils.makeProgressBar(
started = sparkJob.numActiveTasks,
completed = sparkJob.numCompletedTasks,
failed = sparkJob.numFailedTasks,
skipped = sparkJob.numSkippedTasks,
reasonToNumKilled = sparkJob.killedTasksSummary,
total = sparkJob.numTasks - sparkJob.numSkippedTasks)
}
{UIUtils.failureReasonCell(lastFailureReason)}
}
/**
* If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id
* with "-" cells.
*/
private def generateDroppedJobRow(
outputOpData: OutputOperationUIData,
outputOpDescription: Seq[Node],
formattedOutputOpDuration: String,
numSparkJobRowsInOutputOp: Int,
isFirstRow: Boolean,
jobId: Int): Seq[Node] = {
// In the first row, output op id and its information needs to be shown. In other rows, these
// cells will be taken up due to "rowspan".
// scalastyle:off
val prefixCells =
if (isFirstRow) {
{outputOpData.id.toString}
{outputOpDescription}
{formattedOutputOpDuration} ++
{outputOpStatusCell(outputOpData, numSparkJobRowsInOutputOp)}
} else {
Nil
}
// scalastyle:on
{prefixCells}
{if (jobId >= 0) jobId.toString else "-"}
-
-
-
-
}
private def generateOutputOpIdRow(
request: HttpServletRequest,
outputOpData: OutputOperationUIData,
sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
val formattedOutputOpDuration =
if (outputOpData.duration.isEmpty) {
"-"
} else {
SparkUIUtils.formatDuration(outputOpData.duration.get)
}
val description = generateOutputOpDescription(outputOpData)
if (sparkJobs.isEmpty) {
generateOutputOpRowWithoutSparkJobs(outputOpData, description, formattedOutputOpDuration)
} else {
val firstRow =
generateJobRow(
request,
outputOpData,
description,
formattedOutputOpDuration,
sparkJobs.size,
true,
sparkJobs.head)
val tailRows =
sparkJobs.tail.map { sparkJob =>
generateJobRow(
request,
outputOpData,
description,
formattedOutputOpDuration,
sparkJobs.size,
false,
sparkJob)
}
(firstRow ++ tailRows).flatten
}
}
private def generateOutputOpDescription(outputOp: OutputOperationUIData): Seq[Node] = {
{outputOp.name}
{outputOp.description}
}
private def getJobData(sparkJobId: SparkJobId): Option[JobData] = {
try {
Some(store.job(sparkJobId))
} catch {
case _: NoSuchElementException => None
}
}
private def getStageData(stageId: Int): Option[StageData] = {
try {
Some(store.lastStageAttempt(stageId))
} catch {
case _: NoSuchElementException => None
}
}
private def generateOutputOperationStatusForUI(failure: String): String = {
if (failure.startsWith("org.apache.spark.SparkException")) {
"Failed due to Spark job error\n" + failure
} else {
var nextLineIndex = failure.indexOf("\n")
if (nextLineIndex < 0) {
nextLineIndex = failure.length
}
val firstLine = failure.substring(0, nextLineIndex)
s"Failed due to error: $firstLine\n$failure"
}
}
/**
* Generate the job table for the batch.
*/
private def generateJobTable(
request: HttpServletRequest,
batchUIData: BatchUIData): Seq[Node] = {
val outputOpIdToSparkJobIds = batchUIData.outputOpIdSparkJobIdPairs.groupBy(_.outputOpId).
map { case (outputOpId, outputOpIdAndSparkJobIds) =>
// sort SparkJobIds for each OutputOpId
(outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).toSeq.sorted)
}
val outputOps: Seq[(OutputOperationUIData, Seq[SparkJobId])] =
batchUIData.outputOperations.map { case (outputOpId, outputOperation) =>
val sparkJobIds = outputOpIdToSparkJobIds.getOrElse(outputOpId, Seq.empty)
(outputOperation, sparkJobIds)
}.toSeq.sortBy(_._1.id)
val outputOpWithJobs = outputOps.map { case (outputOpData, sparkJobIds) =>
(outputOpData, sparkJobIds.map { jobId => SparkJobIdWithUIData(jobId, getJobData(jobId)) })
}
{columns}
{
outputOpWithJobs.map { case (outputOpData, sparkJobs) =>
generateOutputOpIdRow(request, outputOpData, sparkJobs)
}
}
}
def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized {
val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong))
.getOrElse {
throw new IllegalArgumentException(s"Missing id parameter")
}
val formattedBatchTime =
SparkUIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration)
val batchUIData = streamingListener.getBatchUIData(batchTime).getOrElse {
throw new IllegalArgumentException(s"Batch $formattedBatchTime does not exist")
}
val formattedSchedulingDelay =
batchUIData.schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
val formattedProcessingTime =
batchUIData.processingDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
val formattedTotalDelay = batchUIData.totalDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
val inputMetadatas = batchUIData.streamIdToInputInfo.values.flatMap { inputInfo =>
inputInfo.metadataDescription.map(desc => inputInfo.inputStreamId -> desc)
}.toSeq
val summary: NodeSeq =
-
Batch Duration:
{SparkUIUtils.formatDuration(streamingListener.batchDuration)}
-
Input data size:
{batchUIData.numRecords} records
-
Scheduling delay:
{formattedSchedulingDelay}
-
Processing time:
{formattedProcessingTime}
-
Total delay:
{formattedTotalDelay}
{
if (inputMetadatas.nonEmpty) {
-
Input Metadata:{generateInputMetadataTable(inputMetadatas)}
}
}
val content = summary ++ generateJobTable(request, batchUIData)
SparkUIUtils.headerSparkPage(
request, s"Details of batch at $formattedBatchTime", content, parent)
}
def generateInputMetadataTable(inputMetadatas: Seq[(Int, String)]): Seq[Node] = {
Input
Metadata {SparkUIUtils.tooltip("Batch Input Details", "right")}
{inputMetadatas.flatMap(generateInputMetadataRow)}
}
def generateInputMetadataRow(inputMetadata: (Int, String)): Seq[Node] = {
val streamId = inputMetadata._1
{streamingListener.streamName(streamId).getOrElse(s"Stream-$streamId")}
{metadataDescriptionToHTML(inputMetadata._2)}
}
private def metadataDescriptionToHTML(metadataDescription: String): Seq[Node] = {
// tab to 4 spaces and "\n" to "
"
Unparsed(StringEscapeUtils.escapeHtml4(metadataDescription).
replaceAllLiterally("\t", " ").replaceAllLiterally("\n", "
"))
}
private def outputOpStatusCell(outputOp: OutputOperationUIData, rowspan: Int): Seq[Node] = {
outputOp.failureReason match {
case Some(failureReason) =>
val failureReasonForUI = UIUtils.createOutputOperationFailureForUI(failureReason)
UIUtils.failureReasonCell(
failureReasonForUI, rowspan, includeFirstLineInExpandDetails = false)
case None =>
if (outputOp.endTime.isEmpty) {
-
} else {
Succeeded
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy