All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.streaming.ui.StreamingQueryStatisticsPage.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.streaming.ui

import java.{util => ju}
import java.lang.{Long => JLong}
import java.util.UUID
import javax.servlet.http.HttpServletRequest

import scala.xml.{Node, Unparsed}

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.ui.UIUtils._
import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage}

private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
  extends WebUIPage("statistics") with Logging {

  def generateLoadResources(request: HttpServletRequest): Seq[Node] = {
    // scalastyle:off
    
        
      
      
    // scalastyle:on
  }

  override def render(request: HttpServletRequest): Seq[Node] = {
    val parameterId = request.getParameter("id")
    require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")

    val query = parent.statusListener.allQueryStatus.find { case q =>
      q.runId.equals(UUID.fromString(parameterId))
    }.getOrElse(throw new IllegalArgumentException(s"Failed to find streaming query $parameterId"))

    val resources = generateLoadResources(request)
    val basicInfo = generateBasicInfo(query)
    val content =
      resources ++
        basicInfo ++
        generateStatTable(query)
    SparkUIUtils.headerSparkPage(request, "Streaming Query Statistics", content, parent)
  }

  def generateTimeMap(times: Seq[Long]): Seq[Node] = {
    val js = "var timeFormat = {};\n" + times.map { time =>
      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
      s"timeFormat[$time] = '$formattedTime';"
    }.mkString("\n")

    
  }

  def generateTimeTipStrings(values: Array[(Long, Long)]): Seq[Node] = {
    val js = "var timeTipStrings = {};\n" + values.map { case (batchId, time) =>
      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
      s"timeTipStrings[$time] = 'batch $batchId ($formattedTime)';"
    }.mkString("\n")

    
  }

  def generateFormattedTimeTipStrings(values: Array[(Long, Long)]): Seq[Node] = {
    val js = "var formattedTimeTipStrings = {};\n" + values.map { case (batchId, time) =>
      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
      s"""formattedTimeTipStrings["$formattedTime"] = 'batch $batchId ($formattedTime)';"""
    }.mkString("\n")

    
  }

  def generateTimeToValues(values: Array[(Long, ju.Map[String, JLong])]): Seq[Node] = {
    val durationDataPadding = SparkUIUtils.durationDataPadding(values)
    val js = "var formattedTimeToValues = {};\n" + durationDataPadding.map { case (x, y) =>
      val s = y.toSeq.sortBy(_._1).map(e => s""""${e._2}"""").mkString("[", ",", "]")
      val formattedTime = SparkUIUtils.formatBatchTime(x, 1, showYYYYMMSS = false)
      s"""formattedTimeToValues["$formattedTime"] = $s;"""
    }.mkString("\n")

    
  }

  def generateBasicInfo(query: StreamingQueryUIData): Seq[Node] = {
    val duration = if (query.isActive) {
      SparkUIUtils.formatDurationVerbose(System.currentTimeMillis() - query.startTimestamp)
    } else {
      withNoProgress(query, {
        val end = query.lastProgress.timestamp
        val start = query.recentProgress.head.timestamp
        SparkUIUtils.formatDurationVerbose(
          parseProgressTimestamp(end) - parseProgressTimestamp(start))
      }, "-")
    }

    val name = UIUtils.getQueryName(query)
    val numBatches = withNoProgress(query, { query.lastProgress.batchId + 1L }, 0)
    
Running batches for {duration} since {SparkUIUtils.formatDate(query.startTimestamp)} ({numBatches} completed batches)

Name: {name}
Id: {query.id}
RunId: {query.runId}

} def generateStatTable(query: StreamingQueryUIData): Seq[Node] = { val batchToTimestamps = withNoProgress(query, query.recentProgress.map(p => (p.batchId, parseProgressTimestamp(p.timestamp))), Array.empty[(Long, Long)]) val batchTimes = batchToTimestamps.map(_._2) val minBatchTime = withNoProgress(query, parseProgressTimestamp(query.recentProgress.head.timestamp), 0L) val maxBatchTime = withNoProgress(query, parseProgressTimestamp(query.lastProgress.timestamp), 0L) val maxRecordRate = withNoProgress(query, query.recentProgress.map(_.inputRowsPerSecond).max, 0L) val minRecordRate = 0L val maxProcessRate = withNoProgress(query, query.recentProgress.map(_.processedRowsPerSecond).max, 0L) val minProcessRate = 0L val maxRows = withNoProgress(query, query.recentProgress.map(_.numInputRows).max, 0L) val minRows = 0L val maxBatchDuration = withNoProgress(query, query.recentProgress.map(_.batchDuration).max, 0L) val minBatchDuration = 0L val inputRateData = withNoProgress(query, query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), withNumberInvalid { p.inputRowsPerSecond })), Array.empty[(Long, Double)]) val processRateData = withNoProgress(query, query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), withNumberInvalid { p.processedRowsPerSecond })), Array.empty[(Long, Double)]) val inputRowsData = withNoProgress(query, query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), withNumberInvalid { p.numInputRows })), Array.empty[(Long, Double)]) val batchDurations = withNoProgress(query, query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp), withNumberInvalid { p.batchDuration })), Array.empty[(Long, Double)]) val operationDurationData = withNoProgress( query, query.recentProgress.map { p => val durationMs = p.durationMs // remove "triggerExecution" as it count the other operation duration. durationMs.remove("triggerExecution") (parseProgressTimestamp(p.timestamp), durationMs) }, Array.empty[(Long, ju.Map[String, JLong])]) val jsCollector = new JsCollector val graphUIDataForInputRate = new GraphUIData( "input-rate-timeline", "input-rate-histogram", inputRateData, minBatchTime, maxBatchTime, minRecordRate, maxRecordRate, "records/sec") graphUIDataForInputRate.generateDataJs(jsCollector) val graphUIDataForProcessRate = new GraphUIData( "process-rate-timeline", "process-rate-histogram", processRateData, minBatchTime, maxBatchTime, minProcessRate, maxProcessRate, "records/sec") graphUIDataForProcessRate.generateDataJs(jsCollector) val graphUIDataForInputRows = new GraphUIData( "input-rows-timeline", "input-rows-histogram", inputRowsData, minBatchTime, maxBatchTime, minRows, maxRows, "records") graphUIDataForInputRows.generateDataJs(jsCollector) val graphUIDataForBatchDuration = new GraphUIData( "batch-duration-timeline", "batch-duration-histogram", batchDurations, minBatchTime, maxBatchTime, minBatchDuration, maxBatchDuration, "ms") graphUIDataForBatchDuration.generateDataJs(jsCollector) val graphUIDataForDuration = new GraphUIData( "duration-area-stack", "", Seq.empty[(Long, Double)], 0L, 0L, 0L, 0L, "ms") val table = if (query.lastProgress != null) { // scalastyle:off
Timelines Histograms
Input Rate {SparkUIUtils.tooltip("The aggregate (across all sources) rate of data arriving.", "right")}
{graphUIDataForInputRate.generateTimelineHtml(jsCollector)} {graphUIDataForInputRate.generateHistogramHtml(jsCollector)}
Process Rate {SparkUIUtils.tooltip("The aggregate (across all sources) rate at which Spark is processing data.", "right")}
{graphUIDataForProcessRate.generateTimelineHtml(jsCollector)} {graphUIDataForProcessRate.generateHistogramHtml(jsCollector)}
Input Rows {SparkUIUtils.tooltip("The aggregate (across all sources) number of records processed in a trigger.", "right")}
{graphUIDataForInputRows.generateTimelineHtml(jsCollector)} {graphUIDataForInputRows.generateHistogramHtml(jsCollector)}
Batch Duration {SparkUIUtils.tooltip("The process duration of each batch.", "right")}
{graphUIDataForBatchDuration.generateTimelineHtml(jsCollector)} {graphUIDataForBatchDuration.generateHistogramHtml(jsCollector)}
Operation Duration {SparkUIUtils.tooltip("The amount of time taken to perform various operations in milliseconds.", "right")}
{graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)}
} else {
No visualization information available.
// scalastyle:on } generateTimeToValues(operationDurationData) ++ generateFormattedTimeTipStrings(batchToTimestamps) ++ generateTimeMap(batchTimes) ++ generateTimeTipStrings(batchToTimestamps) ++ table ++ jsCollector.toHtml } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy