All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.nvidia.spark.rapids.tool.profiling.ProfSQLPlanClassifier.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.nvidia.spark.rapids.tool.profiling

import scala.collection.mutable

import com.nvidia.spark.rapids.tool.analysis.{AppAnalysisBase, SparkSQLPlanInfoVisitor, SQLPlanInfoContext}
import com.nvidia.spark.rapids.tool.planparser.DeltaLakeHelper

import org.apache.spark.sql.execution.ui
import org.apache.spark.sql.rapids.tool.SqlPlanInfoGraphEntry
import org.apache.spark.sql.rapids.tool.profiling.ApplicationInfo


/**
 * A context object that holds the classification information for a SQLPlan
 * @param sqlPIGEntry the SqlPlanInfoGraphEntry for the SQLPlan
 * @param deltaOpsNode the list of nodes that are classified as Delta metadata
 */
case class SQLPlanClassifierCtxt(
    sqlPIGEntry: SqlPlanInfoGraphEntry,
    deltaOpsNode: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.empty)
  extends SQLPlanInfoContext(sqlPIGEntry)


/**
 * An implementation of SparkSQLPlanInfoVisitor that visits all the nodes of a SQLPlanInfo to
 * assign classifications to each SQLPlan
 * @param app the AppBase object to analyze
 */
class SQLPlanClassifier(app: ApplicationInfo)
  extends AppAnalysisBase(app) with SparkSQLPlanInfoVisitor[SQLPlanClassifierCtxt] {
  // A HashMap[category: String, SQLIDs Set[Long]] that holds the relation between specific
  // category/class to the SQLID
  // Note that for now we have only category "deltaOp", but this is subject to be extended in the
  // future to classify the SQLPlans.
  val sqlCategories: mutable.HashMap[String, mutable.LinkedHashSet[Long]] =
    mutable.HashMap("deltaOp" -> mutable.LinkedHashSet.empty)

  override def visitNode(sqlPlanCtxt: SQLPlanClassifierCtxt, node: ui.SparkPlanGraphNode): Unit = {
    // Check if the node is a delta metadata operation
    val isDeltaLog = DeltaLakeHelper.isDeltaOpNode(
      sqlPlanCtxt.sqlPIGEntry, app.physicalPlanDescription(sqlPlanCtxt.getSQLPIGEntry.sqlID), node)
    if (isDeltaLog) {
      // if it is a Delta operation, add it to the list of Delta operations nodes
      sqlPlanCtxt.deltaOpsNode += node.id
    }
  }

  override def createPlanCtxtFromPIGEntry(
      sqlPIGEntry: SqlPlanInfoGraphEntry): SQLPlanClassifierCtxt = {
    SQLPlanClassifierCtxt(sqlPIGEntry)
  }

  override def postWalkPlan(planCtxt: SQLPlanClassifierCtxt): Unit = {
    // After visiting all the nodes of a SQLPlan, decide on the classifications
    if (planCtxt.deltaOpsNode.nonEmpty) {
      // If at least one nodes is defined as Delta operations, then the entire SQLPlan is a Delta
      // operation
      // Note that we do not keep the nodes in a global variable because we do not that for now
      sqlCategories("deltaOp") += planCtxt.getSQLPIGEntry.sqlID
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy