com.nvidia.spark.rapids.tool.tuning.QualAppSummaryInfoProvider.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rapids-4-spark-tools_2.12 Show documentation
Show all versions of rapids-4-spark-tools_2.12 Show documentation
RAPIDS Accelerator for Apache Spark tools
The newest version!
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.nvidia.spark.rapids.tool.tuning
import com.nvidia.spark.rapids.tool.AppSummaryInfoBaseProvider
import com.nvidia.spark.rapids.tool.analysis.AggRawMetricsResult
import com.nvidia.spark.rapids.tool.profiling.DataSourceProfileResult
import org.apache.spark.internal.Logging
import org.apache.spark.sql.rapids.tool.qualification.{QualificationAppInfo, QualificationSummaryInfo}
/**
* Implementation of AppInfoPropertyGetter to wrap the output of the Qualification analysis.
* @param appInfo the main QualificationAppInfo object representing the CPU application.
* @param appAggStats optional stats aggregate is included here for future improvement as we may
* need to feed the autotuner with values from the aggregates.
* @param rawAggMetrics the raw profiler aggregation metrics
* @param dsInfo Data source information
*/
class QualAppSummaryInfoProvider(
val appInfo: QualificationAppInfo,
val appAggStats: Option[QualificationSummaryInfo],
val rawAggMetrics: AggRawMetricsResult,
val dsInfo: Seq[DataSourceProfileResult]) extends AppSummaryInfoBaseProvider with Logging{
private lazy val distinctLocations = dsInfo.groupBy(_.location)
override def isAppInfoAvailable = true
private def findPropertyInternal(
key: String, props: collection.Map[String, String]): Option[String] = {
props.get(key)
}
override def getAllProperties: Map[String, String] = {
appInfo.sparkProperties
}
override def getSparkProperty(propKey: String): Option[String] = {
findPropertyInternal(propKey, appInfo.sparkProperties)
}
override def getRapidsProperty(propKey: String): Option[String] = {
getSparkProperty(propKey)
}
override def getSystemProperty(propKey: String): Option[String] = {
findPropertyInternal(propKey, appInfo.systemProperties)
}
override def getSparkVersion: Option[String] = {
Option(appInfo.sparkVersion)
}
def getAppID: String = appInfo.appId
override def getJvmGCFractions: Seq[Double] = {
rawAggMetrics.sqlAggs.map {
taskMetrics => taskMetrics.jvmGCTimeSum * 1.0 / taskMetrics.executorCpuTime
}
}
override def getSpilledMetrics: Seq[Long] = {
rawAggMetrics.sqlAggs.map { task =>
task.diskBytesSpilledSum + task.memoryBytesSpilledSum
}
}
// Return shuffle stage(Id)s which have positive spilling metrics
// The heuristics below assume that these are CPU event logs and just look at the
// size of memory bytes spilled.
override def getShuffleStagesWithPosSpilling: Set[Long] = {
// TODO:Should not this be same as SingleAppSummaryInfoProvider.getShuffleStagesWithPosSpilling?
rawAggMetrics.stageAggs.collect { case row
if row.srTotalBytesReadSum + row.swBytesWrittenSum > 0 &&
row.diskBytesSpilledSum > 0 => row.id
}.toSet
}
override def getShuffleSkewStages: Set[Long] = {
rawAggMetrics.taskShuffleSkew.map { row => row.stageId }.toSet
}
override def getMaxInput: Double = {
if (rawAggMetrics.maxTaskInputSizes.nonEmpty) {
rawAggMetrics.maxTaskInputSizes.head.maxTaskInputBytesRead
} else {
0.0
}
}
// Rapids Jar will be empty since CPU event logs are used here
override def getRapidsJars: Seq[String] = {
Seq.empty
}
override def getDistinctLocationPct: Double = {
100.0 * distinctLocations.size / dsInfo.size
}
override def getRedundantReadSize: Long = {
distinctLocations
.filter {
case (_, objects) => objects.size > 1 && objects.exists(_.format.contains("Parquet"))
}
.mapValues(_.map(_.data_size).sum)
.values
.sum
}
override def getMeanInput: Double = {
if (rawAggMetrics.ioAggs.nonEmpty) {
rawAggMetrics.ioAggs.map(_.inputBytesReadSum).sum * 1.0 / rawAggMetrics.ioAggs.size
} else {
0.0
}
}
override def getMeanShuffleRead: Double = {
if (rawAggMetrics.ioAggs.nonEmpty) {
rawAggMetrics.ioAggs.map(_.srTotalBytesReadSum).sum * 1.0 / rawAggMetrics.ioAggs.size
} else {
0.0
}
}
}