com.nvidia.spark.rapids.tool.qualification.QualificationMain.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rapids-4-spark-tools_2.12 Show documentation
Show all versions of rapids-4-spark-tools_2.12 Show documentation
RAPIDS Accelerator for Apache Spark tools
The newest version!
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.nvidia.spark.rapids.tool.qualification
import scala.util.control.NonFatal
import com.nvidia.spark.rapids.tool.{EventLogPathProcessor, PlatformFactory}
import com.nvidia.spark.rapids.tool.profiling.AutoTuner.loadClusterProps
import com.nvidia.spark.rapids.tool.tuning.TunerContext
import org.apache.spark.internal.Logging
import org.apache.spark.sql.rapids.tool.AppFilterImpl
import org.apache.spark.sql.rapids.tool.qualification.QualificationSummaryInfo
import org.apache.spark.sql.rapids.tool.util.RapidsToolsConfUtil
/**
* A tool to analyze Spark event logs and determine if
* they might be a good fit for running on the GPU.
*/
object QualificationMain extends Logging {
def main(args: Array[String]) {
val (exitCode, _) =
mainInternal(new QualificationArgs(args), printStdout = true, enablePB = true)
if (exitCode != 0) {
System.exit(exitCode)
}
}
/**
* Entry point for tests
*/
def mainInternal(appArgs: QualificationArgs,
printStdout: Boolean = false,
enablePB: Boolean = false): (Int, Seq[QualificationSummaryInfo]) = {
val eventlogPaths = appArgs.eventlog()
val filterN = appArgs.filterCriteria
val minEventLogSize = appArgs.minEventLogSize.toOption
val maxEventLogSize = appArgs.maxEventLogSize.toOption
val matchEventLogs = appArgs.matchEventLogs
val outputDirectory = appArgs.outputDirectory().stripSuffix("/")
val numOutputRows = appArgs.numOutputRows.getOrElse(1000)
val maxSQLDescLength = appArgs.maxSqlDescLength.getOrElse(100)
val nThreads = appArgs.numThreads.getOrElse(
Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)
val timeout = appArgs.timeout.toOption
val reportReadSchema = appArgs.reportReadSchema.getOrElse(false)
val order = appArgs.order.getOrElse("desc")
val reportSqlLevel = appArgs.perSql.getOrElse(false)
val mlOpsEnabled = appArgs.mlFunctions.getOrElse(false)
val penalizeTransitions = appArgs.penalizeTransitions.getOrElse(true)
val recursiveSearchEnabled = !appArgs.noRecursion()
val hadoopConf = RapidsToolsConfUtil.newHadoopConf
val platform = try {
val clusterPropsOpt = loadClusterProps(appArgs.workerInfo())
PlatformFactory.createInstance(appArgs.platform(), clusterPropsOpt)
} catch {
case NonFatal(e) =>
logError("Error creating the platform", e)
return (1, Seq[QualificationSummaryInfo]())
}
val pluginTypeChecker = try {
new PluginTypeChecker(
platform,
appArgs.speedupFactorFile.toOption)
} catch {
case ie: IllegalStateException =>
logError("Error creating the plugin type checker!", ie)
return (1, Seq[QualificationSummaryInfo]())
}
val (eventLogFsFiltered, allEventLogs) = EventLogPathProcessor.processAllPaths(
filterN.toOption, matchEventLogs.toOption, eventlogPaths, hadoopConf, recursiveSearchEnabled,
maxEventLogSize, minEventLogSize, appArgs.fsStartTime.toOption, appArgs.fsEndTime.toOption)
val filteredLogs = if (argsContainsAppFilters(appArgs)) {
val appFilter = new AppFilterImpl(numOutputRows, hadoopConf, timeout, nThreads)
val finaleventlogs = if (appArgs.any() && argsContainsFSFilters(appArgs)) {
(appFilter.filterEventLogs(allEventLogs, appArgs) ++ eventLogFsFiltered).toSet.toSeq
} else {
appFilter.filterEventLogs(eventLogFsFiltered, appArgs)
}
finaleventlogs
} else {
eventLogFsFiltered
}
if (filteredLogs.isEmpty) {
logWarning("No event logs to process after checking paths, exiting!")
return (0, Seq[QualificationSummaryInfo]())
}
// create the AutoTuner context object
val tunerContext = if (appArgs.autoTuner()) {
TunerContext(platform, appArgs.workerInfo(), outputDirectory, Option(hadoopConf))
} else {
None
}
val qual = new Qualification(outputDirectory, numOutputRows, hadoopConf, timeout,
nThreads, order, pluginTypeChecker, reportReadSchema, printStdout,
enablePB, reportSqlLevel, maxSQLDescLength, mlOpsEnabled, penalizeTransitions,
tunerContext, appArgs.clusterReport())
val res = qual.qualifyApps(filteredLogs)
(0, res)
}
def argsContainsFSFilters(appArgs: QualificationArgs): Boolean = {
val filterCriteria = appArgs.filterCriteria.toOption
appArgs.matchEventLogs.isSupplied ||
(filterCriteria.isDefined && filterCriteria.get.endsWith("-filesystem")) ||
appArgs.maxEventLogSize.toOption.isDefined ||
appArgs.minEventLogSize.toOption.isDefined ||
appArgs.fsStartTime.toOption.isDefined ||
appArgs.fsEndTime.toOption.isDefined
}
def argsContainsAppFilters(appArgs: QualificationArgs): Boolean = {
val filterCriteria = appArgs.filterCriteria.toOption
appArgs.applicationName.isSupplied || appArgs.startAppTime.isSupplied ||
appArgs.userName.isSupplied || appArgs.sparkProperty.isSupplied ||
(filterCriteria.isDefined && (filterCriteria.get.endsWith("-newest") ||
filterCriteria.get.endsWith("-oldest") || filterCriteria.get.endsWith("-per-app-name")))
}
}