All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.execution.ui.SparkPlanGraph.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.execution.ui

import java.util.concurrent.atomic.AtomicLong

import scala.collection.mutable

import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.metric.{SQLMetricParam, SQLMetricValue}

/**
 * A graph used for storing information of an executionPlan of DataFrame.
 *
 * Each graph is defined with a set of nodes and a set of edges. Each node represents a node in the
 * SparkPlan tree, and each edge represents a parent-child relationship between two nodes.
 */
private[ui] case class SparkPlanGraph(
    nodes: Seq[SparkPlanGraphNode], edges: Seq[SparkPlanGraphEdge]) {

  def makeDotFile(metrics: Map[Long, String]): String = {
    val dotFile = new StringBuilder
    dotFile.append("digraph G {\n")
    nodes.foreach(node => dotFile.append(node.makeDotNode(metrics) + "\n"))
    edges.foreach(edge => dotFile.append(edge.makeDotEdge + "\n"))
    dotFile.append("}")
    dotFile.toString()
  }
}

private[sql] object SparkPlanGraph {

  /**
   * Build a SparkPlanGraph from the root of a SparkPlan tree.
   */
  def apply(plan: SparkPlan): SparkPlanGraph = {
    val nodeIdGenerator = new AtomicLong(0)
    val nodes = mutable.ArrayBuffer[SparkPlanGraphNode]()
    val edges = mutable.ArrayBuffer[SparkPlanGraphEdge]()
    buildSparkPlanGraphNode(plan, nodeIdGenerator, nodes, edges)
    new SparkPlanGraph(nodes, edges)
  }

  private def buildSparkPlanGraphNode(
      plan: SparkPlan,
      nodeIdGenerator: AtomicLong,
      nodes: mutable.ArrayBuffer[SparkPlanGraphNode],
      edges: mutable.ArrayBuffer[SparkPlanGraphEdge]): SparkPlanGraphNode = {
    val metrics = plan.metrics.toSeq.map { case (key, metric) =>
      SQLPlanMetric(metric.name.getOrElse(key), metric.id,
        metric.param.asInstanceOf[SQLMetricParam[SQLMetricValue[Any], Any]])
    }
    val node = SparkPlanGraphNode(
      nodeIdGenerator.getAndIncrement(), plan.nodeName, plan.simpleString, plan.metadata, metrics)
    nodes += node
    val childrenNodes = plan.children.map(
      child => buildSparkPlanGraphNode(child, nodeIdGenerator, nodes, edges))
    for (child <- childrenNodes) {
      edges += SparkPlanGraphEdge(child.id, node.id)
    }
    node
  }
}

/**
 * Represent a node in the SparkPlan tree, along with its metrics.
 *
 * @param id generated by "SparkPlanGraph". There is no duplicate id in a graph
 * @param name the name of this SparkPlan node
 * @param metrics metrics that this SparkPlan node will track
 */
private[ui] case class SparkPlanGraphNode(
    id: Long,
    name: String,
    desc: String,
    metadata: Map[String, String],
    metrics: Seq[SQLPlanMetric]) {

  def makeDotNode(metricsValue: Map[Long, String]): String = {
    val builder = new mutable.StringBuilder(name)

    val values = for {
      metric <- metrics
      value <- metricsValue.get(metric.accumulatorId)
    } yield {
      metric.name + ": " + value
    }

    if (values.nonEmpty) {
      // If there are metrics, display each entry in a separate line. We should use an escaped
      // "\n" here to follow the dot syntax.
      //
      // Note: whitespace between two "\n"s is to create an empty line between the name of
      // SparkPlan and metrics. If removing it, it won't display the empty line in UI.
      builder ++= "\\n \\n"
      builder ++= values.mkString("\\n")
    }

    s"""  $id [label="${builder.toString()}"];"""
  }
}

/**
 * Represent an edge in the SparkPlan tree. `fromId` is the parent node id, and `toId` is the child
 * node id.
 */
private[ui] case class SparkPlanGraphEdge(fromId: Long, toId: Long) {

  def makeDotEdge: String = s"""  $fromId->$toId;\n"""
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy