All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.streaming.ui.BatchPage.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.streaming.ui

import javax.servlet.http.HttpServletRequest

import scala.xml._

import org.apache.commons.text.StringEscapeUtils

import org.apache.spark.status.api.v1.{JobData, StageData}
import org.apache.spark.streaming.Time
import org.apache.spark.streaming.ui.StreamingJobProgressListener.SparkJobId
import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}

private[ui] case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobData: Option[JobData])

private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
  private val streamingListener = parent.listener
  private val store = parent.parent.store

  private def columns: Seq[Node] = {
    Output Op Id
      Description
      Output Op Duration {SparkUIUtils.tooltip("Time taken for all the jobs of this batch to" +
        " finish processing from the time they were submitted.",
        "top")}
      Status
      Job Id
      Job Duration {SparkUIUtils.tooltip("Time taken from submission time to completion " +
        "time of the job", "top")}
      Stages: Succeeded/Total
      Tasks (for all stages): Succeeded/Total
      Error
  }

  private def generateJobRow(
      request: HttpServletRequest,
      outputOpData: OutputOperationUIData,
      outputOpDescription: Seq[Node],
      formattedOutputOpDuration: String,
      numSparkJobRowsInOutputOp: Int,
      isFirstRow: Boolean,
      jobIdWithData: SparkJobIdWithUIData): Seq[Node] = {
    if (jobIdWithData.jobData.isDefined) {
      generateNormalJobRow(request, outputOpData, outputOpDescription, formattedOutputOpDuration,
        numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.jobData.get)
    } else {
      generateDroppedJobRow(outputOpData, outputOpDescription, formattedOutputOpDuration,
        numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.sparkJobId)
    }
  }

  private def generateOutputOpRowWithoutSparkJobs(
    outputOpData: OutputOperationUIData,
    outputOpDescription: Seq[Node],
    formattedOutputOpDuration: String): Seq[Node] = {
    
      {outputOpData.id.toString}
      {outputOpDescription}
      {formattedOutputOpDuration}
      {outputOpStatusCell(outputOpData, rowspan = 1)}
      
      -
      
      -
      
      -
      
      -
      
      -
    
  }

  /**
   * Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
   * one cell, we use "rowspan" for the first row of an output op.
   */
  private def generateNormalJobRow(
      request: HttpServletRequest,
      outputOpData: OutputOperationUIData,
      outputOpDescription: Seq[Node],
      formattedOutputOpDuration: String,
      numSparkJobRowsInOutputOp: Int,
      isFirstRow: Boolean,
      sparkJob: JobData): Seq[Node] = {
    val duration: Option[Long] = {
      sparkJob.submissionTime.map { start =>
        val end = sparkJob.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
        end - start.getTime()
      }
    }
    val lastFailureReason =
      sparkJob.stageIds.sorted(Ordering.Int.reverse).flatMap(getStageData).
      dropWhile(_.failureReason == None).take(1). // get the first info that contains failure
      flatMap(info => info.failureReason).headOption.getOrElse("")
    val formattedDuration = duration.map(d => SparkUIUtils.formatDuration(d)).getOrElse("-")
    val detailUrl = s"${SparkUIUtils.prependBaseUri(
      request, parent.basePath)}/jobs/job/?id=${sparkJob.jobId}"

    // In the first row, output op id and its information needs to be shown. In other rows, these
    // cells will be taken up due to "rowspan".
    // scalastyle:off
    val prefixCells =
      if (isFirstRow) {
        {outputOpData.id.toString}
        
          {outputOpDescription}
        
        {formattedOutputOpDuration} ++
        {outputOpStatusCell(outputOpData, numSparkJobRowsInOutputOp)}
      } else {
        Nil
      }
    // scalastyle:on

    
      {prefixCells}
      
        
          {sparkJob.jobId}{sparkJob.jobGroup.map(id => s"($id)").getOrElse("")}
        
      
      
        {formattedDuration}
      
      
        {sparkJob.numCompletedStages}/{sparkJob.stageIds.size - sparkJob.numSkippedStages}
        {if (sparkJob.numFailedStages > 0) s"(${sparkJob.numFailedStages} failed)"}
        {if (sparkJob.numSkippedStages > 0) s"(${sparkJob.numSkippedStages} skipped)"}
      
      
        {
          SparkUIUtils.makeProgressBar(
            started = sparkJob.numActiveTasks,
            completed = sparkJob.numCompletedTasks,
            failed = sparkJob.numFailedTasks,
            skipped = sparkJob.numSkippedTasks,
            reasonToNumKilled = sparkJob.killedTasksSummary,
            total = sparkJob.numTasks - sparkJob.numSkippedTasks)
        }
      
      {UIUtils.failureReasonCell(lastFailureReason)}
    
  }

  /**
   * If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id
   * with "-" cells.
   */
  private def generateDroppedJobRow(
      outputOpData: OutputOperationUIData,
      outputOpDescription: Seq[Node],
      formattedOutputOpDuration: String,
      numSparkJobRowsInOutputOp: Int,
      isFirstRow: Boolean,
      jobId: Int): Seq[Node] = {
    // In the first row, output op id and its information needs to be shown. In other rows, these
    // cells will be taken up due to "rowspan".
    // scalastyle:off
    val prefixCells =
      if (isFirstRow) {
        {outputOpData.id.toString}
          {outputOpDescription}
          {formattedOutputOpDuration} ++
          {outputOpStatusCell(outputOpData, numSparkJobRowsInOutputOp)}
      } else {
        Nil
      }
    // scalastyle:on

    
      {prefixCells}
      
        {if (jobId >= 0) jobId.toString else "-"}
      
      
      -
      
      -
      
      -
      
      -
    
  }

  private def generateOutputOpIdRow(
      request: HttpServletRequest,
      outputOpData: OutputOperationUIData,
      sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
    val formattedOutputOpDuration =
      if (outputOpData.duration.isEmpty) {
        "-"
      } else {
        SparkUIUtils.formatDuration(outputOpData.duration.get)
      }

    val description = generateOutputOpDescription(outputOpData)

    if (sparkJobs.isEmpty) {
      generateOutputOpRowWithoutSparkJobs(outputOpData, description, formattedOutputOpDuration)
    } else {
      val firstRow =
        generateJobRow(
          request,
          outputOpData,
          description,
          formattedOutputOpDuration,
          sparkJobs.size,
          true,
          sparkJobs.head)
      val tailRows =
        sparkJobs.tail.map { sparkJob =>
          generateJobRow(
            request,
            outputOpData,
            description,
            formattedOutputOpDuration,
            sparkJobs.size,
            false,
            sparkJob)
        }
      (firstRow ++ tailRows).flatten
    }
  }

  private def generateOutputOpDescription(outputOp: OutputOperationUIData): Seq[Node] = {
    
{outputOp.name} +details
} private def getJobData(sparkJobId: SparkJobId): Option[JobData] = { try { Some(store.job(sparkJobId)) } catch { case _: NoSuchElementException => None } } private def getStageData(stageId: Int): Option[StageData] = { try { Some(store.lastStageAttempt(stageId)) } catch { case _: NoSuchElementException => None } } private def generateOutputOperationStatusForUI(failure: String): String = { if (failure.startsWith("org.apache.spark.SparkException")) { "Failed due to Spark job error\n" + failure } else { var nextLineIndex = failure.indexOf("\n") if (nextLineIndex < 0) { nextLineIndex = failure.length } val firstLine = failure.substring(0, nextLineIndex) s"Failed due to error: $firstLine\n$failure" } } /** * Generate the job table for the batch. */ private def generateJobTable( request: HttpServletRequest, batchUIData: BatchUIData): Seq[Node] = { val outputOpIdToSparkJobIds = batchUIData.outputOpIdSparkJobIdPairs.groupBy(_.outputOpId). map { case (outputOpId, outputOpIdAndSparkJobIds) => // sort SparkJobIds for each OutputOpId (outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).toSeq.sorted) } val outputOps: Seq[(OutputOperationUIData, Seq[SparkJobId])] = batchUIData.outputOperations.map { case (outputOpId, outputOperation) => val sparkJobIds = outputOpIdToSparkJobIds.getOrElse(outputOpId, Seq.empty) (outputOperation, sparkJobIds) }.toSeq.sortBy(_._1.id) val outputOpWithJobs = outputOps.map { case (outputOpData, sparkJobIds) => (outputOpData, sparkJobIds.map { jobId => SparkJobIdWithUIData(jobId, getJobData(jobId)) }) } {columns} { outputOpWithJobs.map { case (outputOpData, sparkJobs) => generateOutputOpIdRow(request, outputOpData, sparkJobs) } }
} def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized { val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong)) .getOrElse { throw new IllegalArgumentException(s"Missing id parameter") } val formattedBatchTime = SparkUIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration) val batchUIData = streamingListener.getBatchUIData(batchTime).getOrElse { throw new IllegalArgumentException(s"Batch $formattedBatchTime does not exist") } val formattedSchedulingDelay = batchUIData.schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val formattedProcessingTime = batchUIData.processingDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val formattedTotalDelay = batchUIData.totalDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val inputMetadatas = batchUIData.streamIdToInputInfo.values.flatMap { inputInfo => inputInfo.metadataDescription.map(desc => inputInfo.inputStreamId -> desc) }.toSeq val summary: NodeSeq =
  • Batch Duration: {SparkUIUtils.formatDuration(streamingListener.batchDuration)}
  • Input data size: {batchUIData.numRecords} records
  • Scheduling delay: {formattedSchedulingDelay}
  • Processing time: {formattedProcessingTime}
  • Total delay: {formattedTotalDelay}
  • { if (inputMetadatas.nonEmpty) {
  • Input Metadata:{generateInputMetadataTable(inputMetadatas)}
  • } }
val content = summary ++ generateJobTable(request, batchUIData) SparkUIUtils.headerSparkPage( request, s"Details of batch at $formattedBatchTime", content, parent) } def generateInputMetadataTable(inputMetadatas: Seq[(Int, String)]): Seq[Node] = { {inputMetadatas.flatMap(generateInputMetadataRow)}
Input Metadata {SparkUIUtils.tooltip("Batch Input Details", "right")}
} def generateInputMetadataRow(inputMetadata: (Int, String)): Seq[Node] = { val streamId = inputMetadata._1 {streamingListener.streamName(streamId).getOrElse(s"Stream-$streamId")} {metadataDescriptionToHTML(inputMetadata._2)} } private def metadataDescriptionToHTML(metadataDescription: String): Seq[Node] = { // tab to 4 spaces and "\n" to "
" Unparsed(StringEscapeUtils.escapeHtml4(metadataDescription). replaceAllLiterally("\t", "    ").replaceAllLiterally("\n", "
")) } private def outputOpStatusCell(outputOp: OutputOperationUIData, rowspan: Int): Seq[Node] = { outputOp.failureReason match { case Some(failureReason) => val failureReasonForUI = UIUtils.createOutputOperationFailureForUI(failureReason) UIUtils.failureReasonCell( failureReasonForUI, rowspan, includeFirstLineInExpandDetails = false) case None => if (outputOp.endTime.isEmpty) { - } else { Succeeded } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy