All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.streaming.ui.StreamingPage.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.streaming.ui

import java.util.concurrent.TimeUnit
import javax.servlet.http.HttpServletRequest

import scala.collection.mutable
import scala.xml.{Node, Unparsed}

import org.apache.spark.internal.Logging
import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage}
import org.apache.spark.util.Utils

/**
 * A helper class for "scheduling delay", "processing time" and "total delay" to generate data that
 * will be used in the timeline and histogram graphs.
 *
 * @param data (batchTime, milliseconds). "milliseconds" is something like "processing time".
 */
private[ui] class MillisecondsStatUIData(data: Seq[(Long, Long)]) {

  /**
   * Converting the original data as per `unit`.
   */
  def timelineData(unit: TimeUnit): Seq[(Long, Double)] =
    data.map(x => x._1 -> UIUtils.convertToTimeUnit(x._2, unit))

  /**
   * Converting the original data as per `unit`.
   */
  def histogramData(unit: TimeUnit): Seq[Double] =
    data.map(x => UIUtils.convertToTimeUnit(x._2, unit))

  val avg: Option[Long] = if (data.isEmpty) None else Some(data.map(_._2).sum / data.size)

  val formattedAvg: String = StreamingPage.formatDurationOption(avg)

  val max: Option[Long] = if (data.isEmpty) None else Some(data.map(_._2).max)
}

/**
 * A helper class for "input rate" to generate data that will be used in the timeline and histogram
 * graphs.
 *
 * @param data (batch time, record rate).
 */
private[ui] class RecordRateUIData(val data: Seq[(Long, Double)]) {

  val avg: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).sum / data.size)

  val formattedAvg: String = avg.map("%.2f".format(_)).getOrElse("-")

  val max: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).max)
}

/** Page for Spark Web UI that shows statistics of a streaming job */
private[ui] class StreamingPage(parent: StreamingTab)
  extends WebUIPage("") with Logging {

  import StreamingPage._

  private val listener = parent.listener

  private def startTime: Long = listener.startTime

  /** Render the page */
  def render(request: HttpServletRequest): Seq[Node] = {
    val resources = generateLoadResources(request)
    val onClickTimelineFunc = generateOnClickTimelineFunction()
    val basicInfo = generateBasicInfo()
    val content = resources ++
      onClickTimelineFunc ++ basicInfo ++
      listener.synchronized {
        generateStatTable() ++
          generateBatchListTables(request)
      }
    SparkUIUtils.headerSparkPage(request, "Streaming Statistics", content, parent)
  }

  /**
   * Generate html that will load css/js files for StreamingPage
   */
  private def generateLoadResources(request: HttpServletRequest): Seq[Node] = {
    // scalastyle:off
    
      
      
    // scalastyle:on
  }

  /** Generate html that will set onClickTimeline declared in streaming-page.js */
  private def generateOnClickTimelineFunction(): Seq[Node] = {
    val js = "onClickTimeline = getOnClickTimelineFunction();"
    
  }

  /** Generate basic information of the streaming program */
  private def generateBasicInfo(): Seq[Node] = {
    val timeSinceStart = System.currentTimeMillis() - startTime
    
Running batches of {SparkUIUtils.formatDurationVerbose(listener.batchDuration)} for {SparkUIUtils.formatDurationVerbose(timeSinceStart)} since {SparkUIUtils.formatDate(startTime)} ({listener.numTotalCompletedBatches} completed batches, {listener.numTotalReceivedRecords} records)

} /** * Generate a global "timeFormat" dictionary in the JavaScript to store the time and its formatted * string. Because we cannot specify a timezone in JavaScript, to make sure the server and client * use the same timezone, we use the "timeFormat" dictionary to format all time values used in the * graphs. * * @param times all time values that will be used in the graphs. */ private def generateTimeMap(times: Seq[Long]): Seq[Node] = { val js = "var timeFormat = {};\n" + times.map { time => val formattedTime = SparkUIUtils.formatBatchTime(time, listener.batchDuration, showYYYYMMSS = false) s"timeFormat[$time] = '$formattedTime';" }.mkString("\n") } private def generateTimeTipStrings(times: Seq[Long]): Seq[Node] = { // We leverage timeFormat as the value would be same as timeFormat. This means it is // sensitive to the order - generateTimeMap should be called earlier than this. val js = "var timeTipStrings = {};\n" + times.map { time => s"timeTipStrings[$time] = timeFormat[$time];" }.mkString("\n") } private def generateStatTable(): Seq[Node] = { val batches = listener.retainedBatches val batchTimes = batches.map(_.batchTime.milliseconds) val minBatchTime = if (batchTimes.isEmpty) startTime else batchTimes.min val maxBatchTime = if (batchTimes.isEmpty) startTime else batchTimes.max val recordRateForAllStreams = new RecordRateUIData(batches.map { batchInfo => (batchInfo.batchTime.milliseconds, batchInfo.numRecords * 1000.0 / listener.batchDuration) }) val schedulingDelay = new MillisecondsStatUIData(batches.flatMap { batchInfo => batchInfo.schedulingDelay.map(batchInfo.batchTime.milliseconds -> _) }) val processingTime = new MillisecondsStatUIData(batches.flatMap { batchInfo => batchInfo.processingDelay.map(batchInfo.batchTime.milliseconds -> _) }) val totalDelay = new MillisecondsStatUIData(batches.flatMap { batchInfo => batchInfo.totalDelay.map(batchInfo.batchTime.milliseconds -> _) }) // Use the max value of "schedulingDelay", "processingTime", and "totalDelay" to make the // Y axis ranges same. val _maxTime = (for (m1 <- schedulingDelay.max; m2 <- processingTime.max; m3 <- totalDelay.max) yield m1 max m2 max m3).getOrElse(0L) // Should start at 0 val minTime = 0L val (maxTime, normalizedUnit) = UIUtils.normalizeDuration(_maxTime) val formattedUnit = UIUtils.shortTimeUnitString(normalizedUnit) // Use the max input rate for all InputDStreams' graphs to make the Y axis ranges same. // If it's not an integral number, just use its ceil integral number. val maxRecordRate = recordRateForAllStreams.max.map(_.ceil.toLong).getOrElse(0L) val minRecordRate = 0L val batchInterval = UIUtils.convertToTimeUnit(listener.batchDuration, normalizedUnit) val jsCollector = new JsCollector val graphUIDataForRecordRateOfAllStreams = new GraphUIData( "all-stream-records-timeline", "all-stream-records-histogram", recordRateForAllStreams.data, minBatchTime, maxBatchTime, minRecordRate, maxRecordRate, "records/sec") graphUIDataForRecordRateOfAllStreams.generateDataJs(jsCollector) val graphUIDataForSchedulingDelay = new GraphUIData( "scheduling-delay-timeline", "scheduling-delay-histogram", schedulingDelay.timelineData(normalizedUnit), minBatchTime, maxBatchTime, minTime, maxTime, formattedUnit) graphUIDataForSchedulingDelay.generateDataJs(jsCollector) val graphUIDataForProcessingTime = new GraphUIData( "processing-time-timeline", "processing-time-histogram", processingTime.timelineData(normalizedUnit), minBatchTime, maxBatchTime, minTime, maxTime, formattedUnit, Some(batchInterval)) graphUIDataForProcessingTime.generateDataJs(jsCollector) val graphUIDataForTotalDelay = new GraphUIData( "total-delay-timeline", "total-delay-histogram", totalDelay.timelineData(normalizedUnit), minBatchTime, maxBatchTime, minTime, maxTime, formattedUnit) graphUIDataForTotalDelay.generateDataJs(jsCollector) // It's false before the user registers the first InputDStream val hasStream = listener.streamIds.nonEmpty val numCompletedBatches = listener.retainedCompletedBatches.size val numActiveBatches = batchTimes.length - numCompletedBatches val numReceivers = listener.numInactiveReceivers + listener.numActiveReceivers val table = // scalastyle:off {if (hasStream) { }}
Timelines (Last {batchTimes.length} batches, {numActiveBatches} active, {numCompletedBatches} completed) Histograms
{ if (hasStream) { Input Rate } else { Input Rate } }
{ if (numReceivers > 0) {
Receivers: {listener.numActiveReceivers} / {numReceivers} active
} }
Avg: {recordRateForAllStreams.formattedAvg} records/sec
{graphUIDataForRecordRateOfAllStreams.generateTimelineHtml(jsCollector)} {graphUIDataForRecordRateOfAllStreams.generateHistogramHtml(jsCollector)}
Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "top")}
Avg: {schedulingDelay.formattedAvg}
{graphUIDataForSchedulingDelay.generateTimelineHtml(jsCollector)} {graphUIDataForSchedulingDelay.generateHistogramHtml(jsCollector)}
Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "top")}
Avg: {processingTime.formattedAvg}
{graphUIDataForProcessingTime.generateTimelineHtml(jsCollector)} {graphUIDataForProcessingTime.generateHistogramHtml(jsCollector)}
Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "top")}
Avg: {totalDelay.formattedAvg}
{graphUIDataForTotalDelay.generateTimelineHtml(jsCollector)} {graphUIDataForTotalDelay.generateHistogramHtml(jsCollector)}
// scalastyle:on generateTimeMap(batchTimes) ++ generateTimeTipStrings(batchTimes) ++ table ++ jsCollector.toHtml } private def generateInputDStreamsTable( jsCollector: JsCollector, minX: Long, maxX: Long, minY: Double): Seq[Node] = { val maxYCalculated = listener.receivedRecordRateWithBatchTime.values .flatMap { case streamAndRates => streamAndRates.map { case (_, recordRate) => recordRate } } .reduceOption[Double](math.max) .map(_.ceil.toLong) .getOrElse(0L) val content: Seq[Node] = listener.receivedRecordRateWithBatchTime.toList.sortBy(_._1).flatMap { case (streamId, recordRates) => generateInputDStreamRow( jsCollector, streamId, recordRates, minX, maxX, minY, maxYCalculated) } // scalastyle:off {content}
Status
Executor ID / Host
Last Error Time
Last Error Message
// scalastyle:on } private def generateInputDStreamRow( jsCollector: JsCollector, streamId: Int, recordRates: Seq[(Long, Double)], minX: Long, maxX: Long, minY: Double, maxY: Double): Seq[Node] = { // If this is a ReceiverInputDStream, we need to show the receiver info. Or we only need the // InputDStream name. val receiverInfo = listener.receiverInfo(streamId) val receiverName = receiverInfo.map(_.name). orElse(listener.streamName(streamId)).getOrElse(s"Stream-$streamId") val receiverActive = receiverInfo.map { info => if (info.active) "ACTIVE" else "INACTIVE" }.getOrElse(emptyCell) val receiverLocation = receiverInfo.map { info => val executorId = if (info.executorId.isEmpty) emptyCell else info.executorId val location = if (info.location.isEmpty) emptyCell else info.location s"$executorId / $location" }.getOrElse(emptyCell) val receiverLastError = receiverInfo.map { info => val msg = s"${info.lastErrorMessage} - ${info.lastError}" if (msg.length > 100) msg.take(97) + "..." else msg }.getOrElse(emptyCell) val receiverLastErrorTime = receiverInfo.map { r => if (r.lastErrorTime < 0) "-" else SparkUIUtils.formatDate(r.lastErrorTime) }.getOrElse(emptyCell) val receivedRecords = new RecordRateUIData(recordRates) val graphUIDataForRecordRate = new GraphUIData( s"stream-$streamId-records-timeline", s"stream-$streamId-records-histogram", receivedRecords.data, minX, maxX, minY, maxY, "records/sec") graphUIDataForRecordRate.generateDataJs(jsCollector)
{receiverName}
Avg: {receivedRecords.formattedAvg} records/sec
{receiverActive} {receiverLocation} {receiverLastErrorTime}
{receiverLastError}
{graphUIDataForRecordRate.generateTimelineHtml(jsCollector)} {graphUIDataForRecordRate.generateHistogramHtml(jsCollector)} } private def streamingTable(request: HttpServletRequest, batches: Seq[BatchUIData], tableTag: String): Seq[Node] = { val interval: Long = listener.batchDuration val streamingPage = Option(request.getParameter(s"$tableTag.page")).map(_.toInt).getOrElse(1) try { new StreamingPagedTable( request, tableTag, batches, SparkUIUtils.prependBaseUri(request, parent.basePath), "streaming", interval ).table(streamingPage) } catch { case e @ (_: IllegalArgumentException | _: IndexOutOfBoundsException) =>

Error while rendering streaming table:

            {Utils.exceptionString(e)}
          
} } private def generateBatchListTables(request: HttpServletRequest): Seq[Node] = { val runningBatches = listener.runningBatches.sortBy(_.batchTime.milliseconds).reverse val waitingBatches = listener.waitingBatches.sortBy(_.batchTime.milliseconds).reverse val completedBatches = listener.retainedCompletedBatches. sortBy(_.batchTime.milliseconds).reverse val content = mutable.ListBuffer[Node]() if (runningBatches.nonEmpty) { content ++=

Running Batches ({runningBatches.size})

{ streamingTable(request, runningBatches, "runningBatches") }
} if (waitingBatches.nonEmpty) { content ++=

Waiting Batches ({waitingBatches.size})

{ streamingTable(request, waitingBatches, "waitingBatches") }
} if (completedBatches.nonEmpty) { content ++=

Completed Batches (last {completedBatches.size} out of {listener.numTotalCompletedBatches})

{ streamingTable(request, completedBatches, "completedBatches") }
} content } } private[ui] object StreamingPage { val BLACK_RIGHT_TRIANGLE_HTML = "▶" val BLACK_DOWN_TRIANGLE_HTML = "▼" val emptyCell = "-" /** * Returns a human-readable string representing a duration such as "5 second 35 ms" */ def formatDurationOption(msOption: Option[Long]): String = { msOption.map(SparkUIUtils.formatDurationVerbose).getOrElse(emptyCell) } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy