All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.nvidia.spark.rapids.tool.profiling.AutoTuner.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.nvidia.spark.rapids.tool.profiling

import java.io.{BufferedReader, InputStreamReader, IOException}
import java.util

import scala.beans.BeanProperty
import scala.collection.{mutable, Seq}
import scala.collection.JavaConverters.mapAsScalaMapConverter
import scala.collection.mutable.ListBuffer
import scala.util.control.NonFatal
import scala.util.matching.Regex

import com.nvidia.spark.rapids.tool.{AppSummaryInfoBaseProvider, GpuDevice, Platform, PlatformFactory}
import com.nvidia.spark.rapids.tool.planparser.DatabricksParseHelper
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path}
import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml}
import org.yaml.snakeyaml.constructor.{Constructor, ConstructorException}
import org.yaml.snakeyaml.representer.Representer

import org.apache.spark.internal.Logging
import org.apache.spark.sql.rapids.tool.ToolUtils
import org.apache.spark.sql.rapids.tool.util.{StringUtils, WebCrawlerUtil}

/**
 * A wrapper class that stores all the GPU properties.
 * The BeanProperty enables loading and parsing the YAML formatted content using the
 * Constructor SnakeYaml approach.
 */
class GpuWorkerProps(
    @BeanProperty var memory: String,
    @BeanProperty var count: Int,
    @BeanProperty var name: String) {
  def this() {
    this("0m", 0, "None")
  }
  def isMissingInfo: Boolean = {
    memory == null || memory.isEmpty || name == null || name.isEmpty ||
       count == 0 || memory.startsWith("0") || name == "None"
  }
  def isEmpty: Boolean = {
    count == 0 && (memory == null || memory.isEmpty || memory.startsWith("0")) &&
      (name == null || name.isEmpty || name == "None")
  }
  /**
   * If the GPU count is missing, it will set 1 as a default value
   *
   * @return true if the value has been updated.
   */
  def setDefaultGpuCountIfMissing(): Boolean = {
    // TODO - do we want to recommend 1 or base it on core count?  32 cores to 1 gpu may be to much.
    if (count == 0) {
      count = AutoTuner.DEF_WORKER_GPU_COUNT
      true
    } else {
      false
    }
  }
  def setDefaultGpuNameIfMissing(platform: Platform): Boolean = {
    if (!GpuDevice.deviceMap.contains(name)) {
      name = platform.gpuDevice.getOrElse(platform.defaultGpuDevice).toString
      true
    } else {
      false
    }
  }

  /**
   * If the GPU memory is missing, it will sets a default valued based on the GPU device type.
   * If it is still missing, it sets a default to 15109m (T4).
   *
   * @return true if the value has been updated.
   */
  def setDefaultGpuMemIfMissing(): Boolean = {
    if (memory == null || memory.isEmpty || memory.startsWith("0")) {
      memory = try {
        GpuDevice.createInstance(getName).getOrElse(GpuDevice.DEFAULT).getMemory
      } catch {
        case _: IllegalArgumentException => GpuDevice.DEFAULT.getMemory
      }
      true
    } else {
      false
    }
  }

  /**
   * Sets any missing field and return a list of messages to indicate what has been updated.
   * @return a list containing information of what was missing and the default value that has been
   *         used to initialize the field.
   */
  def setMissingFields(platform: Platform): Seq[String] = {
    val res = new ListBuffer[String]()
    if (setDefaultGpuCountIfMissing()) {
      res += s"GPU count is missing. Setting default to $getCount."
    }
    if (setDefaultGpuNameIfMissing(platform)) {
      res += s"GPU device is missing. Setting default to $getName."
    }
    if (setDefaultGpuMemIfMissing()) {
      res += s"GPU memory is missing. Setting default to $getMemory."
    }
    res
  }

  override def toString: String =
    s"{count: $count, memory: $memory, name: $name}"
}

/**
 * A wrapper class that stores all the system properties.
 * The BeanProperty enables loading and parsing the YAML formatted content using the
 * Constructor SnakeYaml approach.
 */
class SystemClusterProps(
    @BeanProperty var numCores: Int,
    @BeanProperty var memory: String,
    @BeanProperty var numWorkers: Int) {
  def this() {
    this(0, "0m", 0)
  }
  def isMissingInfo: Boolean = {
    // keep for future expansion as we may add more fields later.
    numWorkers <= 0
  }
  def isEmpty: Boolean = {
    // consider the object incorrect if either numCores or memory are not set.
    memory == null || memory.isEmpty || numCores <= 0 || memory.startsWith("0")
  }
  def setDefaultNumWorkersIfMissing(): Boolean = {
    if (numWorkers <= 0) {
      numWorkers = AutoTuner.DEF_NUM_WORKERS
      true
    } else {
      false
    }
  }
  /**
   * Sets any missing field and return a list of messages to indicate what has been updated.
   * @return a list containing information of what was missing and the default value that has been
   *         used to initialize the field.
   */
  def setMissingFields(): Seq[String] = {
    val res = new ListBuffer[String]()
    if (setDefaultNumWorkersIfMissing()) {
      res += s"Number of workers is missing. Setting default to $getNumWorkers."
    }
    res
  }
  override def toString: String =
    s"{numCores: $numCores, memory: $memory, numWorkers: $numWorkers}"
}

/**
 * A wrapper class that stores all the properties of the cluster.
 * The BeanProperty enables loading and parsing the YAML formatted content using the
 * Constructor SnakeYaml approach.
 *
 * @param system wrapper that includes the properties related to system information like cores and
 *               memory.
 * @param gpu wrapper that includes the properties related to GPU.
 * @param softwareProperties a set of software properties such as Spark properties.
 *                           The properties are typically loaded from the default cluster
 *                           configurations.
 */
class ClusterProperties(
    @BeanProperty var system: SystemClusterProps,
    @BeanProperty var gpu: GpuWorkerProps,
    @BeanProperty var softwareProperties: util.LinkedHashMap[String, String]) {

  import AutoTuner._

  def this() {
    this(new SystemClusterProps(), new GpuWorkerProps(), new util.LinkedHashMap[String, String]())
  }
  def isEmpty: Boolean = {
    system.isEmpty && gpu.isEmpty
  }
  def getTargetProperties: mutable.Map[String, String] = {
    softwareProperties.asScala.filter(entry => recommendationsTarget.contains(entry._1))
  }
  override def toString: String =
    s"{${system.toString}, ${gpu.toString}, $softwareProperties}"
}

/**
 * Wrapper to hold the recommendation of a given criterion.
 *
 * @param name the property label.
 * @param original the value loaded from the spark properties.
 * @param recommended the recommended value by the AutoTuner.
 */
class RecommendationEntry(val name: String,
    val original: Option[String],
    var recommended: Option[String]) {

  def setRecommendedValue(value: String): Unit = {
    recommended = Option(value)
  }

  /**
   * Used to compare between two properties by converting memory units to
   * a equivalent representations.
   * @param propValue property to be processed.
   * @return the uniform representation of property.
   *         For Memory, the value is converted to bytes.
   */
  private def getRawValue(propValue: Option[String]): Option[String] = {
    propValue match {
      case None => None
      case Some(value) =>
        if (StringUtils.isMemorySize(value)) {
          // if it is memory return the bytes unit
          Some(s"${StringUtils.convertMemorySizeToBytes(value)}")
        } else {
          propValue
        }
    }
  }

  /**
   * Returns true when the recommendation is different than the original.
   */
  private def recommendsNewValue(): Boolean = {
    val originalVal = getRawValue(original)
    val recommendedVal = getRawValue(recommended)
    (originalVal, recommendedVal) match {
      case (None, None) => false
      case (Some(orig), Some(rec)) =>
        orig != rec
      case _ => true
    }
  }

  /**
   * True or False whether the recommendation is valid. e.g., recommendations that does not change
   * the original value returns false if filter is enabled.
   * @param filterByUpdated flag to pick only the properties that would be updated by the
   *                        recommendations
   */
  def isValid(filterByUpdated: Boolean): Boolean = {
    recommended match {
      case None => false
      case _ =>
        if (filterByUpdated) { // filter enabled
          recommendsNewValue()
        } else {
          true
        }
    }
  }
}

/**
 * AutoTuner module that uses event logs and worker's system properties to recommend Spark
 * RAPIDS configuration based on heuristics.
 *
 * Example:
 * a. Success:
 *    Input:
 *      system:
 *        num_cores: 64
 *        cpu_arch: x86_64
 *        memory: 512gb
 *        free_disk_space: 800gb
 *        time_zone: America/Los_Angeles
 *        num_workers: 4
 *      gpu:
 *        count: 8
 *        memory: 32gb
 *        name: NVIDIA V100
 *      softwareProperties:
 *        spark.driver.maxResultSize: 7680m
 *        spark.driver.memory: 15360m
 *        spark.executor.cores: '8'
 *        spark.executor.instances: '2'
 *        spark.executor.memory: 47222m
 *        spark.executorEnv.OPENBLAS_NUM_THREADS: '1'
 *        spark.extraListeners: com.google.cloud.spark.performance.DataprocMetricsListener
 *        spark.scheduler.mode: FAIR
 *        spark.sql.cbo.enabled: 'true'
 *        spark.ui.port: '0'
 *        spark.yarn.am.memory: 640m
 *
 *    Output:
 *       Spark Properties:
 *       --conf spark.executor.cores=8
 *       --conf spark.executor.instances=20
 *       --conf spark.executor.memory=16384m
 *       --conf spark.executor.memoryOverhead=5734m
 *       --conf spark.rapids.memory.pinnedPool.size=4096m
 *       --conf spark.rapids.sql.concurrentGpuTasks=2
 *       --conf spark.sql.files.maxPartitionBytes=4096m
 *       --conf spark.task.resource.gpu.amount=0.125
 *
 *       Comments:
 *       - 'spark.rapids.sql.concurrentGpuTasks' was not set.
 *       - 'spark.executor.memoryOverhead' was not set.
 *       - 'spark.rapids.memory.pinnedPool.size' was not set.
 *       - 'spark.sql.adaptive.enabled' should be enabled for better performance.
 *
 * b. Failure:
 *    Input: Incorrect File
 *    Output:
 *      Cannot recommend properties. See Comments.
 *
 *      Comments:
 *      - java.io.FileNotFoundException: File worker_info.yaml does not exist
 *      - 'spark.executor.memory' should be set to at least 2GB/core.
 *      - 'spark.executor.instances' should be set to (gpuCount * numWorkers).
 *      - 'spark.task.resource.gpu.amount' should be set to Max(1, (numCores / gpuCount)).
 *      - 'spark.rapids.sql.concurrentGpuTasks' should be set to Min(4, (gpuMemory / 7.5G)).
 *      - 'spark.rapids.memory.pinnedPool.size' should be set to 2048m.
 *      - 'spark.sql.adaptive.enabled' should be enabled for better performance.
 *
 * @param clusterProps The cluster properties including cores, mem, GPU, and software
 *                     (see [[ClusterProperties]]).
 * @param appInfoProvider the container holding the profiling result.
 */
class AutoTuner(
    val clusterProps: ClusterProperties,
    val appInfoProvider: AppSummaryInfoBaseProvider,
    val platform: Platform,
    val driverInfoProvider: DriverLogInfoProvider)
  extends Logging {

  import AutoTuner._

  var comments = new ListBuffer[String]()
  var recommendations: mutable.LinkedHashMap[String, RecommendationEntry] =
    mutable.LinkedHashMap[String, RecommendationEntry]()
  // list of recommendations to be skipped for recommendations
  // Note that the recommendations will be computed anyway to avoid breaking dependencies.
  private val skippedRecommendations: mutable.HashSet[String] = mutable.HashSet[String]()
  // list of recommendations having the calculations disabled, and only depend on default values
  private val limitedLogicRecommendations: mutable.HashSet[String] = mutable.HashSet[String]()
  // When enabled, the profiler recommendations should only include updated settings.
  private var filterByUpdatedPropertiesEnabled: Boolean = true

  private def isCalculationEnabled(prop: String) : Boolean = {
    !limitedLogicRecommendations.contains(prop)
  }

  def getPropertyValue(key: String): Option[String] = {
    val fromProfile = appInfoProvider.getProperty(key)
    // If the value is not found above, fallback to cluster properties
    fromProfile.orElse(Option(clusterProps.softwareProperties.get(key)))
  }

  def getAllProperties: collection.Map[String, String] = {
    // the app properties override the cluster properties
    clusterProps.getSoftwareProperties.asScala ++ appInfoProvider.getAllProperties
  }

  def initRecommendations(): Unit = {
    recommendationsTarget.foreach { key =>
      // no need to add new records if they are missing from props
      getPropertyValue(key).foreach { propVal =>
        val recommendationVal = new RecommendationEntry(key, Option(propVal), None)
        recommendations(key) = recommendationVal
      }
    }
  }

  def appendRecommendation(key: String, value: String): Unit = {
    if (!skippedRecommendations.contains(key)) {
      val recomRecord = recommendations.getOrElseUpdate(key,
        new RecommendationEntry(key, getPropertyValue(key), None))
      if (value != null) {
        recomRecord.setRecommendedValue(value)
        if (recomRecord.original.isEmpty) {
          // add a comment that the value was missing in the cluster properties
          appendComment(s"'$key' was not set.")
        }
      }
    }
  }

  /**
   * Safely appends the recommendation to the given key.
   * It skips if the value is 0.
   */
  def appendRecommendation(key: String, value: Int): Unit = {
    if (value > 0) {
      appendRecommendation(key: String, s"$value")
    }
  }

  /**
   * Safely appends the recommendation to the given key.
   * It skips if the value is 0.0.
   */
  def appendRecommendation(key: String, value: Double): Unit = {
    if (value > 0.0) {
      appendRecommendation(key: String, s"$value")
    }
  }
  /**
   * Safely appends the recommendation to the given key.
   * It appends "m" to the string value. It skips if the value is 0 or null.
   */
  def appendRecommendationForMemoryMB(key: String, value: String): Unit = {
    if (value != null && value.toDouble > 0.0) {
      appendRecommendation(key, s"${value}m")
    }
  }

  /**
   * Try to figure out the recommended instance type to use and set
   * the executor cores and instances based on that instance type.
   * Returns None if the platform doesn't support specific instance types.
   */
  private def configureGPURecommendedInstanceType(): Unit = {
    val gpuClusterRec = platform.getGPUInstanceTypeRecommendation(getAllProperties.toMap)
    if (gpuClusterRec.isDefined) {
      appendRecommendation("spark.executor.cores", gpuClusterRec.get.coresPerExecutor)
      if (gpuClusterRec.get.numExecutors > 0) {
        appendRecommendation("spark.executor.instances", gpuClusterRec.get.numExecutors)
      }
    }
  }

  def calcNumExecutorCores: Int = {
    val executorCores = platform.recommendedClusterInfo.map(_.coresPerExecutor).getOrElse(1)
    Math.max(1, executorCores)
  }

  /**
   * Recommendation for 'spark.task.resource.gpu.amount' based on num of cpu cores.
   */
  def calcTaskGPUAmount: Double = {
    val numExecutorCores = calcNumExecutorCores
    // can never be 0 since numExecutorCores has to be at least 1
    1.0 / numExecutorCores
  }

  /**
   * Recommendation for 'spark.rapids.sql.concurrentGpuTasks' based on gpu memory.
   * Assumption - cluster properties were updated to have a default values if missing.
   */
  def calcGpuConcTasks(): Long = {
    Math.min(MAX_CONC_GPU_TASKS, platform.getGpuOrDefault.getGpuConcTasks)
  }

  /**
   * Calculates the available memory for each executor on the worker based on the number of
   * executors per node and the memory.
   * Assumption - cluster properties were updated to have a default values if missing.
   */
  private def calcAvailableMemPerExec(): Double = {
    val memMBPerNode = platform.recommendedNodeInstanceInfo.map(_.memoryMB).getOrElse(0L)
    val gpusPerExec = platform.getNumGPUsPerNode
    Math.max(0, memMBPerNode / gpusPerExec)
  }

  /**
   * Recommendation for initial heap size based on certain amount of memory per core.
   * Note that we will later reduce this if needed for off heap memory.
   */
  def calcInitialExecutorHeap(executorContainerMemCalculator: () => Double,
      numExecCores: Int): Long = {
    val maxExecutorHeap = Math.max(0, executorContainerMemCalculator()).toInt
    // give up to 2GB of heap to each executor core
    // TODO - revisit this in future as we could let heap be bigger
    Math.min(maxExecutorHeap, DEF_HEAP_PER_CORE_MB * numExecCores)
  }

  /**
   * Recommendation of memory settings for executor.
   * Returns:
   * (pinned memory size,
   *  executor memory overhead size,
   *  executor heap size,
   *  boolean if should set MaxBytesInFlight)
   */
  def calcOverallMemory(
      execHeapCalculator: () => Long,
      numExecutorCores: Int,
      containerMemCalculator: () => Double): (Long, Long, Long, Boolean) = {
    val executorHeap = execHeapCalculator()
    val containerMem = containerMemCalculator.apply()
    var setMaxBytesInFlight = false
    // reserve 10% of heap as memory overhead
    var executorMemOverhead = (executorHeap * DEF_HEAP_OVERHEAD_FRACTION).toLong
    executorMemOverhead += DEF_PAGEABLE_POOL_MB
    val containerMemLeftOverOffHeap = containerMem - executorHeap
    val minOverhead = executorMemOverhead + (MIN_PINNED_MEMORY_MB + MIN_SPILL_MEMORY_MB)
    logDebug("containerMem " + containerMem + " executorHeap: " + executorHeap +
      " executorMemOverhead: " + executorMemOverhead + " minOverhead " + minOverhead)
    if (containerMemLeftOverOffHeap >= minOverhead) {
      // this is hopefully path in the majority of cases because CSPs generally have a good
      // memory to core ratio
      if (numExecutorCores >= 16 && platform.isPlatformCSP &&
        containerMemLeftOverOffHeap >
          executorMemOverhead + 4096L + MIN_PINNED_MEMORY_MB + MIN_SPILL_MEMORY_MB) {
        // Account for the setting of:
        // appendRecommendation("spark.rapids.shuffle.multiThreaded.maxBytesInFlight", "4g")
        executorMemOverhead += 4096L
        setMaxBytesInFlight = true
      }
      // Pinned memory uses any unused space up to 4GB. Spill memory is same size as pinned.
      val pinnedMem = Math.min(MAX_PINNED_MEMORY_MB,
        (containerMemLeftOverOffHeap - executorMemOverhead) / 2).toLong
      // Spill storage is set to the pinned size by default. Its not guaranteed to use just pinned
      // memory though so the size worst case would be doesn't use any pinned memory and uses
      // all off heap memory.
      val spillMem = pinnedMem
      if (containerMemLeftOverOffHeap >= executorMemOverhead + pinnedMem + spillMem) {
        executorMemOverhead += pinnedMem + spillMem
      } else {
        // use min pinned and spill mem
        executorMemOverhead += MIN_PINNED_MEMORY_MB + MIN_SPILL_MEMORY_MB
      }
      (pinnedMem, executorMemOverhead, executorHeap, setMaxBytesInFlight)
    } else {
      // otherwise we have to adjust heuristic of the executor heap size
      // recommendedMinHeap = DEF_HEAP_PER_CORE_MB * numExecutorCores
      // first calculate what we think min overhead is and make sure we have enough
      // for that
      // calculate minimum heap size
      val minExecHeapMem = MIN_HEAP_PER_CORE_MB * numExecutorCores
      if ((containerMem - minOverhead) < minExecHeapMem) {
        // For now just throw so we don't get any tunings and its obvious to user this isn't a good
        // setup. In the future we may just recommend them to use larger nodes. This would be more
        // ideal once we hook up actual executor heap from an eventlog vs what user passes in.
        throwNotEnoughMemException(minExecHeapMem + minOverhead)
        (0, 0, 0, false)
      } else {
        val leftOverMemUsingMinHeap = containerMem - minExecHeapMem
        if (leftOverMemUsingMinHeap < 0) {
          throwNotEnoughMemException(minExecHeapMem + minOverhead)
        }
        // Pinned memory uses any unused space up to 4GB. Spill memory is same size as pinned.
        val pinnedMem = Math.min(MAX_PINNED_MEMORY_MB, (leftOverMemUsingMinHeap / 2)).toLong
        val spillMem = pinnedMem
        // spill memory is by default same size as pinned memory
        executorMemOverhead += pinnedMem + spillMem
        (pinnedMem, executorMemOverhead, minExecHeapMem, setMaxBytesInFlight)
      }
    }
  }

  private def throwNotEnoughMemException(minSize: Long): Unit = {
    // in the future it would be nice to enhance the error message with a recommendation of size
    val msg = "This node/worker configuration is not ideal for using the Spark Rapids " +
      "Accelerator because it doesn't have enough memory for the executors. " +
      s"We recommend using nodes/workers with more memory. Need at least ${minSize}MB memory."
    logError(msg)
    throw new IllegalArgumentException(msg)
  }

  /**
   * Find the label of the memory.overhead based on the spark master configuration and the spark
   * version.
   * @return "spark.executor.memoryOverhead", "spark.kubernetes.memoryOverheadFactor",
   *         or "spark.executor.memoryOverheadFactor".
   */
  def memoryOverheadLabel: String = {
    val sparkMasterConf = getPropertyValue("spark.master")
    val defaultLabel = "spark.executor.memoryOverhead"
    sparkMasterConf match {
      case None => defaultLabel
      case Some(sparkMaster) =>
        if (sparkMaster.contains("yarn")) {
          defaultLabel
        } else if (sparkMaster.contains("k8s")) {
          appInfoProvider.getSparkVersion match {
            case Some(version) =>
              if (ToolUtils.isSpark330OrLater(version)) {
                "spark.executor.memoryOverheadFactor"
              } else {
                "spark.kubernetes.memoryOverheadFactor"
              }
            case None => defaultLabel
          }
        } else {
          defaultLabel
        }
    }
  }

  /**
   * Flow:
   *   if "spark.master" is standalone => Do Nothing
   *   if "spark.rapids.memory.pinnedPool.size" is set
   *     if yarn -> recommend "spark.executor.memoryOverhead"
   *     if using k8s ->
   *         if version > 3.3.0 recommend "spark.executor.memoryOverheadFactor" and add comment
   *         else recommend "spark.kubernetes.memoryOverheadFactor" and add comment if missing
   */
  def addRecommendationForMemoryOverhead(recomValue: String): Unit = {
    if (enableMemoryOverheadRecommendation(getPropertyValue("spark.master"))) {
      val memOverheadLookup = memoryOverheadLabel
      appendRecommendationForMemoryMB(memOverheadLookup, recomValue)
      getPropertyValue("spark.rapids.memory.pinnedPool.size").foreach { lookup =>
        if (lookup != "spark.executor.memoryOverhead") {
          if (getPropertyValue(memOverheadLookup).isEmpty) {
            appendComment(s"'$memOverheadLookup' must be set if using " +
              s"'spark.rapids.memory.pinnedPool.size")
          }
        }
      }
    }
  }

  private def configureShuffleReaderWriterNumThreads(numExecutorCores: Int): Unit = {
    // if on a CSP using blob store recommend more threads for certain sizes. This is based on
    // testing on customer jobs on Databricks
    // didn't test with > 16 thread so leave those as numExecutorCores
    if (numExecutorCores < 4) {
      // leave as defaults - should we reduce less then default of 20? need more testing
    } else if (numExecutorCores >= 4 && numExecutorCores < 16) {
      appendRecommendation("spark.rapids.shuffle.multiThreaded.reader.threads", 20)
      appendRecommendation("spark.rapids.shuffle.multiThreaded.writer.threads", 20)
    } else if (numExecutorCores >= 16 && numExecutorCores < 20 && platform.isPlatformCSP) {
      appendRecommendation("spark.rapids.shuffle.multiThreaded.reader.threads", 28)
      appendRecommendation("spark.rapids.shuffle.multiThreaded.writer.threads", 28)
    } else {
      val numThreads = (numExecutorCores * 1.5).toLong
      appendRecommendation("spark.rapids.shuffle.multiThreaded.reader.threads", numThreads.toInt)
      appendRecommendation("spark.rapids.shuffle.multiThreaded.writer.threads", numThreads.toInt)
    }
  }

  // Currently only applies many configs for CSPs where we have an idea what network/disk
  // configuration is like. On prem we don't know so don't set these for now.
  private def configureMultiThreadedReaders(numExecutorCores: Int,
      setMaxBytesInFlight: Boolean): Unit = {
    if (numExecutorCores < 4) {
      appendRecommendation("spark.rapids.sql.multiThreadedRead.numThreads",
        Math.max(20, numExecutorCores))
    } else if (numExecutorCores >= 4 && numExecutorCores < 8 && platform.isPlatformCSP) {
      appendRecommendation("spark.rapids.sql.multiThreadedRead.numThreads",
        Math.max(20, numExecutorCores))
    } else if (numExecutorCores >= 8 && numExecutorCores < 16 && platform.isPlatformCSP) {
      appendRecommendation("spark.rapids.sql.multiThreadedRead.numThreads",
        Math.max(40, numExecutorCores))
    } else if (numExecutorCores >= 16 && numExecutorCores < 20 && platform.isPlatformCSP) {
      appendRecommendation("spark.rapids.sql.multiThreadedRead.numThreads",
        Math.max(80, numExecutorCores))
      if (setMaxBytesInFlight) {
        appendRecommendation("spark.rapids.shuffle.multiThreaded.maxBytesInFlight", "4g")
      }
      appendRecommendation("spark.rapids.sql.reader.multithreaded.combine.sizeBytes",
        10 * 1024 * 1024)
      appendRecommendation("spark.rapids.sql.format.parquet.multithreaded.combine.waitTime", 1000)
    } else {
      val numThreads = (numExecutorCores * 2).toInt
      appendRecommendation("spark.rapids.sql.multiThreadedRead.numThreads",
        Math.max(20, numThreads).toInt)
      if (platform.isPlatformCSP) {
        if (setMaxBytesInFlight) {
          appendRecommendation("spark.rapids.shuffle.multiThreaded.maxBytesInFlight", "4g")
        }
        appendRecommendation("spark.rapids.sql.reader.multithreaded.combine.sizeBytes",
          10 * 1024 * 1024)
        appendRecommendation("spark.rapids.sql.format.parquet.multithreaded.combine.waitTime", 1000)
      }
    }
  }


  def calculateClusterLevelRecommendations(): Unit = {
    // only if we were able to figure out a node type to recommend do we make
    // specific recommendations
    if (platform.recommendedClusterInfo.isDefined) {
      val execCores = platform.recommendedClusterInfo.map(_.coresPerExecutor).getOrElse(1)
      appendRecommendation("spark.task.resource.gpu.amount", calcTaskGPUAmount)
      appendRecommendation("spark.rapids.sql.concurrentGpuTasks",
        calcGpuConcTasks().toInt)
      val availableMemPerExec = calcAvailableMemPerExec()
      val shouldSetMaxBytesInFlight = if (availableMemPerExec > 0.0) {
        val availableMemPerExecExpr = () => availableMemPerExec
        val executorHeap = calcInitialExecutorHeap(availableMemPerExecExpr, execCores)
        val executorHeapExpr = () => executorHeap
        val (pinnedMemory, memoryOverhead, finalExecutorHeap, setMaxBytesInFlight) =
          calcOverallMemory(executorHeapExpr, execCores, availableMemPerExecExpr)
        appendRecommendationForMemoryMB("spark.rapids.memory.pinnedPool.size", s"$pinnedMemory")
        addRecommendationForMemoryOverhead(s"$memoryOverhead")
        appendRecommendationForMemoryMB("spark.executor.memory", s"$finalExecutorHeap")
        setMaxBytesInFlight
      } else {
        logInfo("Available memory per exec is not specified")
        addMissingMemoryComments()
        false
      }
      configureShuffleReaderWriterNumThreads(execCores)
      configureMultiThreadedReaders(execCores, shouldSetMaxBytesInFlight)
      recommendAQEProperties()
    } else {
      addDefaultComments()
    }
    appendRecommendation("spark.rapids.sql.batchSizeBytes", BATCH_SIZE_BYTES)
    appendRecommendation("spark.locality.wait", 0)
  }

  def calculateJobLevelRecommendations(): Unit = {
    // TODO - do we do anything with 200 shuffle partitions or maybe if its close
    // set the Spark config  spark.shuffle.sort.bypassMergeThreshold
   getShuffleManagerClassName match  {
      case Some(smClassName) => appendRecommendation("spark.shuffle.manager", smClassName)
      case None => appendComment("Could not define the Spark Version")
    }
    appendComment(classPathComments("rapids.shuffle.jars"))
    recommendFileCache()
    recommendMaxPartitionBytes()
    recommendShufflePartitions()
    recommendKryoSerializerSetting()
    recommendGCProperty()
    recommendClassPathEntries()
    recommendSystemProperties()
  }

  // if the user set the serializer to use Kryo, make sure we recommend using the GPU version
  // of it.
  def recommendKryoSerializerSetting(): Unit = {
      getPropertyValue("spark.serializer") match {
        case Some(f) if f.contains("org.apache.spark.serializer.KryoSerializer") =>
          val existingRegistrars = getPropertyValue("spark.kryo.registrator")
          val regToUse = if (existingRegistrars.isDefined && !existingRegistrars.get.isEmpty) {
            // spark.kryo.registrator is a comma separated list. If the user set some then
            // we need to append our GpuKryoRegistrator to ones they specified.
            existingRegistrars.get + ",com.nvidia.spark.rapids.GpuKryoRegistrator"
          } else {
            "com.nvidia.spark.rapids.GpuKryoRegistrator"
          }
          appendRecommendation("spark.kryo.registrator", regToUse)
        case None =>
          // do nothing
      }
  }

  def getShuffleManagerClassName() : Option[String] = {
    appInfoProvider.getSparkVersion.map { sparkVersion =>
      val shuffleManagerVersion = sparkVersion.filterNot("().".toSet)
      val dbVersion = getPropertyValue(
        DatabricksParseHelper.PROP_TAG_CLUSTER_SPARK_VERSION_KEY).getOrElse("")
      val finalShuffleVersion : String = if (dbVersion.nonEmpty) {
        dbVersion match {
          case ver if ver.contains("10.4") => "321db"
          case ver if ver.contains("11.3") => "330db"
          case _ => "332db"
        }
      } else {
        shuffleManagerVersion
      }
      "com.nvidia.spark.rapids.spark" + finalShuffleVersion + ".RapidsShuffleManager"
    }
  }

  /**
   * If the cluster worker-info is missing entries (i.e., CPU and GPU count), it sets the entries
   * to default values. For each default value, a comment is added to the [[comments]].
   */
  def configureClusterPropDefaults: Unit = {
    if (!clusterProps.system.isEmpty) {
      if (clusterProps.system.isMissingInfo) {
        clusterProps.system.setMissingFields().foreach(m => appendComment(m))
      }
      if (clusterProps.gpu.isMissingInfo) {
        clusterProps.gpu.setMissingFields(platform).foreach(m => appendComment(m))
      }
    }
  }

  private def recommendGCProperty(): Unit = {
    val jvmGCFraction = appInfoProvider.getJvmGCFractions
    if (jvmGCFraction.nonEmpty) { // avoid zero division
      if ((jvmGCFraction.sum / jvmGCFraction.size) > MAX_JVM_GCTIME_FRACTION) {
        // TODO - or other cores/memory ratio
        appendComment("Average JVM GC time is very high. " +
          "Other Garbage Collectors can be used for better performance.")
      }
    }
  }

  private def recommendAQEProperties(): Unit = {
    val aqeEnabled = getPropertyValue("spark.sql.adaptive.enabled")
      .getOrElse("false").toLowerCase
    if (aqeEnabled == "false") {
      appendComment(commentsForMissingProps("spark.sql.adaptive.enabled"))
    }
    appInfoProvider.getSparkVersion match {
      case Some(version) =>
        if (ToolUtils.isSpark320OrLater(version)) {
          // AQE configs changed in 3.2.0
          if (getPropertyValue("spark.sql.adaptive.coalescePartitions.minPartitionSize").isEmpty) {
            // the default is 1m, but 4m is slightly better for the GPU as we have a higher
            // per task overhead
            appendRecommendation("spark.sql.adaptive.coalescePartitions.minPartitionSize", "4m")
          }
        } else {
          if (getPropertyValue("spark.sql.adaptive.coalescePartitions.minPartitionNum").isEmpty) {
            // The ideal setting is for the parallelism of the cluster
            val numCoresPerExec = calcNumExecutorCores
            val numExecutorsPerWorker = clusterProps.gpu.getCount
            val numWorkers = clusterProps.system.getNumWorkers
            if (numExecutorsPerWorker != 0 && numWorkers != 0) {
              val total = numWorkers * numExecutorsPerWorker * numCoresPerExec
              appendRecommendation("spark.sql.adaptive.coalescePartitions.minPartitionNum",
                total.toString)
            }
          }
        }
      case None =>
    }

    val advisoryPartitionSizeProperty =
      getPropertyValue("spark.sql.adaptive.advisoryPartitionSizeInBytes")
    if (appInfoProvider.getMeanInput < AQE_INPUT_SIZE_BYTES_THRESHOLD) {
      if(advisoryPartitionSizeProperty.isEmpty) {
        // The default is 64m, but 128m is slightly better for the GPU as the GPU has sub-linear
        // scaling until it is full and 128m makes the GPU more full, but too large can be
        // slightly problematic because this is the compressed shuffle size
        appendRecommendation("spark.sql.adaptive.advisoryPartitionSizeInBytes", "128m")
      }
    }
    if (appInfoProvider.getMeanInput > AQE_INPUT_SIZE_BYTES_THRESHOLD &&
      appInfoProvider.getMeanShuffleRead > AQE_SHUFFLE_READ_BYTES_THRESHOLD) {
      // AQE Recommendations for large input and large shuffle reads
      platform.getGpuOrDefault.getAdvisoryPartitionSizeInBytes.foreach { size =>
        appendRecommendation("spark.sql.adaptive.advisoryPartitionSizeInBytes", size)
      }
      val initialPartitionNumProperty =
        getPropertyValue("spark.sql.adaptive.coalescePartitions.initialPartitionNum").map(_.toInt)
      if (initialPartitionNumProperty.getOrElse(0) <= AQE_MIN_INITIAL_PARTITION_NUM) {
        platform.getGpuOrDefault.getInitialPartitionNum.foreach { initialPartitionNum =>
          appendRecommendation(
            "spark.sql.adaptive.coalescePartitions.initialPartitionNum", initialPartitionNum)
        }
      }
      // We need to set this to false, else Spark ignores the target size specified by
      // spark.sql.adaptive.advisoryPartitionSizeInBytes.
      // Reference: https://spark.apache.org/docs/latest/sql-performance-tuning.html
      appendRecommendation("spark.sql.adaptive.coalescePartitions.parallelismFirst", "false")
    }

    // TODO - can we set spark.sql.autoBroadcastJoinThreshold ???
    val autoBroadcastJoinThresholdProperty =
      getPropertyValue("spark.sql.adaptive.autoBroadcastJoinThreshold").map(StringUtils.convertToMB)
    if (autoBroadcastJoinThresholdProperty.isEmpty) {
      appendComment("'spark.sql.adaptive.autoBroadcastJoinThreshold' was not set.")
    } else if (autoBroadcastJoinThresholdProperty.get >
        StringUtils.convertToMB(AQE_AUTOBROADCAST_JOIN_THRESHOLD)) {
      appendComment("Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > " +
        s"$AQE_AUTOBROADCAST_JOIN_THRESHOLD could lead to performance\n" +
        "  regression. Should be set to a lower number.")
    }
  }

  /**
   * Checks the system properties and give feedback to the user.
   * For example file.encoding=UTF-8 is required for some ops like GpuRegEX.
   */
  private def recommendSystemProperties(): Unit = {
    appInfoProvider.getSystemProperty("file.encoding").collect {
      case encoding if !ToolUtils.isFileEncodingRecommended(encoding) =>
        appendComment(s"file.encoding should be [${ToolUtils.SUPPORTED_ENCODINGS.mkString}]" +
            " because GPU only supports the charset when using some expressions.")
    }
  }

  /**
   * Check the class path entries with the following rules:
   * 1- If ".*rapids-4-spark.*jar" is missing then add a comment that the latest jar should be
   *    included in the classpath unless it is part of the spark
   * 2- If there are more than 1 entry for ".*rapids-4-spark.*jar", then add a comment that
   *    there should be only 1 jar in the class path.
   * 3- If there are cudf jars, ignore that for now.
   * 4- If there is a new release recommend that to the user
   */
  private def recommendClassPathEntries(): Unit = {
    val missingRapidsJarsEntry = classPathComments("rapids.jars.missing")
    val multipleRapidsJarsEntry = classPathComments("rapids.jars.multiple")

    appInfoProvider.getRapidsJars match {
      case Seq() =>
        // No rapids jars
        appendComment(missingRapidsJarsEntry)
      case s: Seq[String] =>
        s.flatMap(e => pluginJarRegEx.findAllMatchIn(e).map(_.group(1))) match {
          case Seq() => appendComment(missingRapidsJarsEntry)
          case v: Seq[String] if v.length > 1 =>
            val comment = s"$multipleRapidsJarsEntry [${v.mkString(", ")}]"
            appendComment(comment)
          case Seq(jarVer) =>
            // compare jarVersion to the latest release
            val latestPluginVersion = WebCrawlerUtil.getLatestPluginRelease
            latestPluginVersion match {
              case Some(ver) =>
                if (ToolUtils.compareVersions(jarVer, ver) < 0) {
                  val jarURL = WebCrawlerUtil.getPluginMvnDownloadLink(ver)
                  appendComment(
                    "A newer RAPIDS Accelerator for Apache Spark plugin is available:\n" +
                      s"  $jarURL\n" +
                      s"  Version used in application is $jarVer.")
                }
              case None =>
                logError("Could not pull the latest release of RAPIDS-plugin jar.")
                val pluginRepoUrl = WebCrawlerUtil.getMVNArtifactURL("rapids.plugin")
                appendComment(
                  "Failed to validate the latest release of Apache Spark plugin.\n" +
                    s"  Verify that the version used in application ($jarVer) is the latest on:\n" +
                    s"  $pluginRepoUrl")

            }
        }
    }
  }

  /**
   * Calculate max partition bytes using the max task input size and existing setting
   * for maxPartitionBytes. Note that this won't apply the same on iceberg.
   * The max bytes here does not distinguish between GPU and CPU reads so we could
   * improve that in the future.
   * Eg,
   * MIN_PARTITION_BYTES_RANGE = 128m, MAX_PARTITION_BYTES_RANGE = 256m
   * (1) Input:  maxPartitionBytes = 512m
   *             taskInputSize = 12m
   *     Output: newMaxPartitionBytes = 512m * (128m/12m) = 4g (hit max value)
   * (2) Input:  maxPartitionBytes = 2g
   *             taskInputSize = 512m,
   *     Output: newMaxPartitionBytes = 2g / (512m/128m) = 512m
   */
  private def calculateMaxPartitionBytes(maxPartitionBytes: String): String = {
    // AutoTuner only supports a single app right now, so we get whatever value is here
    val inputBytesMax = appInfoProvider.getMaxInput / 1024 / 1024
    val maxPartitionBytesNum = StringUtils.convertToMB(maxPartitionBytes)
    if (inputBytesMax == 0.0) {
      maxPartitionBytesNum.toString
    } else {
      if (inputBytesMax > 0 &&
        inputBytesMax < MIN_PARTITION_BYTES_RANGE_MB) {
        // Increase partition size
        val calculatedMaxPartitionBytes = Math.min(
          maxPartitionBytesNum *
            (MIN_PARTITION_BYTES_RANGE_MB / inputBytesMax),
          MAX_PARTITION_BYTES_BOUND_MB)
        calculatedMaxPartitionBytes.toLong.toString
      } else if (inputBytesMax > MAX_PARTITION_BYTES_RANGE_MB) {
        // Decrease partition size
        val calculatedMaxPartitionBytes = Math.min(
          maxPartitionBytesNum /
            (inputBytesMax / MAX_PARTITION_BYTES_RANGE_MB),
          MAX_PARTITION_BYTES_BOUND_MB)
        calculatedMaxPartitionBytes.toLong.toString
      } else {
        // Do not recommend maxPartitionBytes
        null
      }
    }
  }

  /**
   * Recommendation for 'spark.rapids.file.cache' based on read characteristics of job.
   */
  private def recommendFileCache() {
    if (appInfoProvider.getDistinctLocationPct < DEF_DISTINCT_READ_THRESHOLD
        && appInfoProvider.getRedundantReadSize > DEF_READ_SIZE_THRESHOLD) {
      appendRecommendation("spark.rapids.filecache.enabled", "true")
      appendComment("Enable file cache only if Spark local disks bandwidth is > 1 GB/s" +
        " and you have sufficient disk space available to fit both cache and normal Spark" +
        " temporary data.")
    }
  }

  /**
   * Recommendation for 'spark.sql.files.maxPartitionBytes' based on input size for each task.
   * Note that the logic can be disabled by adding the property to "limitedLogicRecommendations"
   * which is one of the arguments of [[getRecommendedProperties]].
   */
  private def recommendMaxPartitionBytes(): Unit = {
    val maxPartitionProp =
      getPropertyValue("spark.sql.files.maxPartitionBytes").getOrElse(MAX_PARTITION_BYTES)
    val recommended =
      if (isCalculationEnabled("spark.sql.files.maxPartitionBytes")) {
        calculateMaxPartitionBytes(maxPartitionProp)
      } else {
        s"${StringUtils.convertToMB(maxPartitionProp)}"
      }
    appendRecommendationForMemoryMB("spark.sql.files.maxPartitionBytes", recommended)
  }

  /**
   * Recommendations for 'spark.sql.shuffle.partitions' based on spills and skew in shuffle stages.
   * Note that the logic can be disabled by adding the property to "limitedLogicRecommendations"
   * which is one of the arguments of [[getRecommendedProperties]].
   */
  def recommendShufflePartitions(): Unit = {
    val lookup = "spark.sql.shuffle.partitions"
    var shufflePartitions =
      getPropertyValue(lookup).getOrElse(DEF_SHUFFLE_PARTITIONS).toInt
    val shuffleStagesWithPosSpilling = appInfoProvider.getShuffleStagesWithPosSpilling

    // TODO: Need to look at other metrics for GPU spills (DEBUG mode), and batch sizes metric
    if (isCalculationEnabled(lookup)) {
      if (shuffleStagesWithPosSpilling.nonEmpty) {
        val shuffleSkewStages = appInfoProvider.getShuffleSkewStages
        if (shuffleSkewStages.exists(id => shuffleStagesWithPosSpilling.contains(id))) {
          appendOptionalComment(lookup,
            "Shuffle skew exists (when task's Shuffle Read Size > 3 * Avg Stage-level size) in\n" +
            s"  stages with spilling. Increasing shuffle partitions is not recommended in this\n" +
            s"  case since keys will still hash to the same task.")
        } else {
           shufflePartitions *= DEF_SHUFFLE_PARTITION_MULTIPLIER
          // Could be memory instead of partitions
          appendOptionalComment(lookup,
            s"'$lookup' should be increased since spilling occurred in shuffle stages.")
        }
      }
    }
    // If the user has enabled AQE auto shuffle, the auto-tuner should recommend to disable this
    // feature before recommending shuffle partitions.
    val aqeAutoShuffle = getPropertyValue("spark.databricks.adaptive.autoOptimizeShuffle.enabled")
    if (!aqeAutoShuffle.isEmpty) {
      appendRecommendation("spark.databricks.adaptive.autoOptimizeShuffle.enabled", "false")
    }
    appendRecommendation("spark.sql.shuffle.partitions", s"$shufflePartitions")
  }

  /**
   * Analyzes unsupported driver logs and generates recommendations for configuration properties.
   */
  private def recommendFromDriverLogs(): Unit = {
    // Iterate through unsupported operators' reasons and check for matching properties
    driverInfoProvider.getUnsupportedOperators.map(_.reason).foreach { operatorReason =>
      recommendationsFromDriverLogs.collect {
        case (config, recommendedValue) if operatorReason.contains(config) =>
          appendRecommendation(config, recommendedValue)
          appendComment(commentForExperimentalConfig(config))
      }
    }
  }

  private def recommendPluginProps(): Unit = {
    val isPluginLoaded = getPropertyValue("spark.plugins") match {
      case Some(f) => f.contains("com.nvidia.spark.SQLPlugin")
      case None => false
    }
    val rapidsEnabled = getPropertyValue("spark.rapids.sql.enabled") match {
      case Some(f) => f.toBoolean
      case None => true
    }
    if (!rapidsEnabled) {
      appendRecommendation("spark.rapids.sql.enabled", "true")
    }
    if (!isPluginLoaded) {
      appendComment("RAPIDS Accelerator for Apache Spark jar is missing in \"spark.plugins\". " +
        "Please refer to " +
        "https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html")
    }
  }

  def appendOptionalComment(lookup: String, comment: String): Unit = {
    if (!skippedRecommendations.contains(lookup)) {
      appendComment(comment)
    }
  }

  def appendComment(comment: String): Unit = {
    comments += comment
  }

  def convertClusterPropsToString(): String = {
    clusterProps.toString
  }

  /**
   * Add default comments for missing properties except the ones
   * which should be skipped.
   */
  private def addDefaultComments(): Unit = {
    appendComment("Could not infer the cluster configuration, recommendations " +
      "are generated using default values!")
    commentsForMissingProps.foreach {
      case (key, value) =>
        if (!skippedRecommendations.contains(key)) {
          appendComment(value)
        }
    }
  }

  private def addMissingMemoryComments(): Unit = {
    commentsForMissingMemoryProps.foreach {
      case (key, value) =>
        if (!skippedRecommendations.contains(key)) {
          appendComment(value)
        }
    }
  }

  private def toCommentProfileResult: Seq[RecommendedCommentResult] = {
    comments.map(RecommendedCommentResult).sortBy(_.comment)
  }

  private def toRecommendationsProfileResult: Seq[RecommendedPropertyResult] = {
    val finalRecommendations =
      recommendations.filter(elem => elem._2.isValid(filterByUpdatedPropertiesEnabled))
    finalRecommendations.collect {
      case (key, record) => RecommendedPropertyResult(key, record.recommended.get)
    }.toSeq.sortBy(_.property)
  }

  /**
   * The Autotuner loads the spark properties from either the ClusterProperties or the eventlog.
   * 1- runs the calculation for each criterion and saves it as a [[RecommendationEntry]].
   * 2- The final list of recommendations include any [[RecommendationEntry]] that has a
   *    recommendation that is different from the original property.
   * 3- Null values are excluded.
   * 4- A comment is added for each missing property in the spark property.
   *
   * @param skipList a list of properties to be skipped. If none, all recommendations are
   *                 returned. Note that the recommendations will be computed anyway internally
   *                 in case there are dependencies between the recommendations.
   *                 Default is empty.
   * @param limitedLogicList a list of properties that will do simple recommendations based on
   *                         static default values.
   * @param showOnlyUpdatedProps When enabled, the profiler recommendations should only include
   *                             updated settings.
   * @return pair of recommendations and comments. Both sequence can be empty.
   */
  def getRecommendedProperties(
      skipList: Option[Seq[String]] = Some(Seq()),
      limitedLogicList: Option[Seq[String]] = Some(Seq()),
      showOnlyUpdatedProps: Boolean = true):
      (Seq[RecommendedPropertyResult], Seq[RecommendedCommentResult]) = {
    if (appInfoProvider.isAppInfoAvailable) {
      limitedLogicList.foreach(limitedSeq => limitedLogicRecommendations ++= limitedSeq)
      skipList.foreach(skipSeq => skippedRecommendations ++= skipSeq)
      skippedRecommendations ++= platform.recommendationsToExclude
      initRecommendations()
      // update GPU device of platform based on cluster properties if it is not already set.
      // if the GPU device cannot be inferred from cluster properties, do not make any updates.
      if (platform.gpuDevice.isEmpty && !clusterProps.isEmpty && !clusterProps.gpu.isEmpty) {
        GpuDevice.createInstance(clusterProps.gpu.getName)
          .foreach(platform.setGpuDevice)
        platform.setNumGpus(clusterProps.gpu.getCount)
      }
      // configured GPU recommended instance type NEEDS to happen before any of the other
      // recommendations as they are based on
      // the instance type
      configureGPURecommendedInstanceType
      configureClusterPropDefaults
      // Makes recommendations based on information extracted from the AppInfoProvider
      filterByUpdatedPropertiesEnabled = showOnlyUpdatedProps
      recommendPluginProps
      calculateJobLevelRecommendations()
      calculateClusterLevelRecommendations()

      // add all platform specific recommendations
      platform.recommendationsToInclude.foreach {
        case (property, value) => appendRecommendation(property, value)
      }
    }
    recommendFromDriverLogs()
    (toRecommendationsProfileResult, toCommentProfileResult)
  }

  // Process the properties keys. This is needed in case there are some properties that should not
  // be listed in the final combined results. For example:
  // - The UUID of the app is not part of the submitted spark configurations
  // - make sure that we exclude the skipped list
  private def processPropKeys(
      srcMap: collection.Map[String, String]): collection.Map[String, String] = {
    (srcMap -- skippedRecommendations) -- filteredPropKeys
  }

  // Combines the original Spark properties with the recommended ones.
  def combineSparkProperties(
      recommendedSet: Seq[RecommendedPropertyResult]): Seq[RecommendedPropertyResult] = {
    // get the original properties after filtering the and removing unnecessary keys
    val originalPropsFiltered = processPropKeys(getAllProperties)
    // Combine the original properties with the recommended properties.
    // The recommendations should always override the original ones
    val combinedProps = (originalPropsFiltered
      ++ recommendedSet.map(r => r.property -> r.value).toMap).toSeq.sortBy(_._1)
    combinedProps.collect {
      case (pK, pV) => RecommendedPropertyResult(pK, pV)
    }
  }
}

object AutoTuner extends Logging {
  // Maximum number of concurrent tasks to run on the GPU
  val MAX_CONC_GPU_TASKS = 4L
  // Amount of CPU memory to reserve for system overhead (kernel, buffers, etc.) in megabytes
  val DEF_SYSTEM_RESERVE_MB: Long = 2 * 1024L
  // Fraction of the executor JVM heap size that should be additionally reserved
  // for JVM off-heap overhead (thread stacks, native libraries, etc.)
  val DEF_HEAP_OVERHEAD_FRACTION = 0.1
  val MAX_JVM_GCTIME_FRACTION = 0.3
  // Minimum amount of JVM heap memory to request per CPU core in megabytes
  val MIN_HEAP_PER_CORE_MB: Long = 750L
  // Ideal amount of JVM heap memory to request per CPU core in megabytes
  val DEF_HEAP_PER_CORE_MB: Long = 2 * 1024L
  // Minimum amount of pinned memory to use per executor in MB
  val MIN_PINNED_MEMORY_MB: Long = 1024L
  val MIN_SPILL_MEMORY_MB: Long = MIN_PINNED_MEMORY_MB
  // Maximum amount of pinned memory to use per executor in MB
  val MAX_PINNED_MEMORY_MB: Long = 4 * 1024L
  // Default pinned memory to use per executor in MB
  val DEF_PINNED_MEMORY_MB: Long = 2 * 1024L
  // the pageable pool doesn't exist anymore but by default we don't have any hard limits so
  // leave this for now to account for off heap memory usage.
  val DEF_PAGEABLE_POOL_MB: Long = 2 * 1024L
  // value in MB
  val MIN_PARTITION_BYTES_RANGE_MB = 128L
  // value in MB
  val MAX_PARTITION_BYTES_RANGE_MB = 256L
  // value in MB
  val MAX_PARTITION_BYTES_BOUND_MB: Int = 4 * 1024
  val MAX_PARTITION_BYTES: String = "512m"
  val DEF_SHUFFLE_PARTITIONS = "200"
  val DEF_SHUFFLE_PARTITION_MULTIPLIER: Int = 2
  // GPU count defaults to 1 if it is missing.
  val DEF_WORKER_GPU_COUNT = 1
  // Default Number of Workers 1
  val DEF_NUM_WORKERS = 1
  // Default distinct read location thresholds is 50%
  val DEF_DISTINCT_READ_THRESHOLD = 50.0
  // Default file cache size minimum is 100 GB
  val DEF_READ_SIZE_THRESHOLD = 100 * 1024L * 1024L * 1024L
  val DEFAULT_WORKER_INFO_PATH = "./worker_info.yaml"
  val SUPPORTED_SIZE_UNITS: Seq[String] = Seq("b", "k", "m", "g", "t", "p")
  private val DOC_URL: String = "https://nvidia.github.io/spark-rapids/docs/" +
    "additional-functionality/advanced_configs.html#advanced-configuration"
  // Value of batchSizeBytes that performs best overall
  private val BATCH_SIZE_BYTES = 2147483647
  private val AQE_INPUT_SIZE_BYTES_THRESHOLD = 35000
  private val AQE_SHUFFLE_READ_BYTES_THRESHOLD = 50000
  private val AQE_MIN_INITIAL_PARTITION_NUM = 200
  private val AQE_AUTOBROADCAST_JOIN_THRESHOLD = "100m"
  // Set of spark properties to be filtered out from the combined Spark properties.
  private val filteredPropKeys: Set[String] = Set(
    "spark.app.id"
  )

  val commentsForMissingMemoryProps: Map[String, String] = Map(
    "spark.executor.memory" ->
      "'spark.executor.memory' should be set to at least 2GB/core.",
    "spark.rapids.memory.pinnedPool.size" ->
      s"'spark.rapids.memory.pinnedPool.size' should be set to ${DEF_PINNED_MEMORY_MB}m.")

  val commentsForMissingProps: Map[String, String] = Map(
    "spark.executor.instances" ->
      "'spark.executor.instances' should be set to (gpuCount * numWorkers).",
    "spark.task.resource.gpu.amount" ->
      "'spark.task.resource.gpu.amount' should be set to Min(1, (gpuCount / numCores)).",
    "spark.rapids.sql.concurrentGpuTasks" ->
      s"'spark.rapids.sql.concurrentGpuTasks' should be set to Min(4, (gpuMemory / 7.5G)).",
    "spark.rapids.sql.enabled" ->
      "'spark.rapids.sql.enabled' should be true to enable SQL operations on the GPU.",
    "spark.sql.adaptive.enabled" ->
      "'spark.sql.adaptive.enabled' should be enabled for better performance."
  ) ++ commentsForMissingMemoryProps

  val recommendationsTarget: Seq[String] = Seq[String](
    "spark.executor.instances",
    "spark.rapids.sql.enabled",
    "spark.executor.cores",
    "spark.executor.memory",
    "spark.rapids.sql.concurrentGpuTasks",
    "spark.task.resource.gpu.amount",
    "spark.sql.shuffle.partitions",
    "spark.sql.files.maxPartitionBytes",
    "spark.rapids.memory.pinnedPool.size",
    "spark.executor.memoryOverhead",
    "spark.executor.memoryOverheadFactor",
    "spark.kubernetes.memoryOverheadFactor")

  val classPathComments: Map[String, String] = Map(
    "rapids.jars.missing" ->
      ("RAPIDS Accelerator for Apache Spark plugin jar is missing\n" +
        "  from the classpath entries.\n" +
        "  If the Spark RAPIDS jar is being bundled with your\n" +
        "  Spark distribution, this step is not needed."),
    "rapids.jars.multiple" ->
      ("Multiple RAPIDS Accelerator for Apache Spark plugin jar\n" +
        "  exist on the classpath.\n" +
        "  Make sure to keep only a single jar."),
    "rapids.shuffle.jars" ->
      ("The RAPIDS Shuffle Manager requires spark.driver.extraClassPath\n" +
        "  and spark.executor.extraClassPath settings to include the\n" +
        "  path to the Spark RAPIDS plugin jar.\n" +
        "  If the Spark RAPIDS jar is being bundled with your Spark\n" +
        "  distribution, this step is not needed.")
  )

  // Recommended values for specific unsupported configurations
  private val recommendationsFromDriverLogs: Map[String, String] = Map(
    "spark.rapids.sql.incompatibleDateFormats.enabled" -> "true"
  )

  def commentForExperimentalConfig(config: String): String = {
    s"Using $config does not guarantee to produce the same results as CPU. " +
      s"Please refer to $DOC_URL."
  }

  // the plugin jar is in the form of rapids-4-spark_scala_binary-(version)-*.jar
  val pluginJarRegEx: Regex = "rapids-4-spark_\\d\\.\\d+-(\\d{2}\\.\\d{2}\\.\\d+).*\\.jar".r

  private def handleException(
      ex: Throwable,
      appInfo: AppSummaryInfoBaseProvider,
      platform: Platform,
      driverInfoProvider: DriverLogInfoProvider): AutoTuner = {
    logError("Exception: " + ex.getStackTrace.mkString("Array(", ", ", ")"))
    val tuning = new AutoTuner(new ClusterProperties(), appInfo, platform, driverInfoProvider)
    val msg = ex match {
      case cEx: ConstructorException => cEx.getContext
      case _ => if (ex.getCause != null) ex.getCause.toString else ex.toString
    }
    tuning.appendComment(msg)
    tuning
  }

  def loadClusterPropertiesFromContent(clusterProps: String): Option[ClusterProperties] = {
    val representer = new Representer(new DumperOptions())
    representer.getPropertyUtils.setSkipMissingProperties(true)
    val constructor = new Constructor(classOf[ClusterProperties], new LoaderOptions())
    val yamlObjNested = new Yaml(constructor, representer)
    val loadedClusterProps = yamlObjNested.load(clusterProps).asInstanceOf[ClusterProperties]
    if (loadedClusterProps != null && loadedClusterProps.softwareProperties == null) {
      logInfo("softwareProperties is empty from input worker_info file")
      loadedClusterProps.softwareProperties = new util.LinkedHashMap[String, String]()
    }
    Option(loadedClusterProps)
  }

  def loadClusterProps(filePath: String): Option[ClusterProperties] = {
    val path = new Path(filePath)
    var fsIs: FSDataInputStream = null
    try {
      val fs = FileSystem.get(path.toUri, new Configuration())
      fsIs = fs.open(path)
      val reader = new BufferedReader(new InputStreamReader(fsIs))
      val fileContent = Stream.continually(reader.readLine()).takeWhile(_ != null).mkString("\n")
      loadClusterPropertiesFromContent(fileContent)
    } catch {
      // In case of missing file/malformed for cluster properties, default properties are used.
      // Hence, catching and logging as a warning
      case _: IOException =>
        logWarning(s"No file found for input workerInfo path: $filePath")
        None
    } finally {
      if (fsIs != null) {
        fsIs.close()
      }
    }
  }

  /**
   * Similar to [[buildAutoTuner]] but it allows constructing the AutoTuner without an
   * existing file. This can be used in testing.
   *
   * @param clusterProps the cluster properties as string.
   * @param singleAppProvider the wrapper implementation that accesses the properties of the profile
   *                          results.
   * @param platform represents the environment created as a target for recommendations.
   * @param driverInfoProvider wrapper implementation that accesses the information from driver log.
   * @return a new AutoTuner object.
   */
  def buildAutoTunerFromProps(
      clusterProps: String,
      singleAppProvider: AppSummaryInfoBaseProvider,
      platform: Platform = PlatformFactory.createInstance(clusterProperties = None),
      driverInfoProvider: DriverLogInfoProvider = BaseDriverLogInfoProvider.noneDriverLog
  ): AutoTuner = {
    try {
      val clusterPropsOpt = loadClusterPropertiesFromContent(clusterProps)
      new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()), singleAppProvider, platform,
        driverInfoProvider)
    } catch {
      case NonFatal(e) =>
        handleException(e, singleAppProvider, platform, driverInfoProvider)
    }
  }

  def buildAutoTuner(
      workerInfoFilePath: String,
      singleAppProvider: AppSummaryInfoBaseProvider,
      platform: Platform = PlatformFactory.createInstance(clusterProperties = None),
      driverInfoProvider: DriverLogInfoProvider = BaseDriverLogInfoProvider.noneDriverLog
  ): AutoTuner = {
    try {
      val clusterPropsOpt = loadClusterProps(workerInfoFilePath)
      val autoT = new AutoTuner(clusterPropsOpt.getOrElse(new ClusterProperties()),
        singleAppProvider, platform, driverInfoProvider)
      autoT
    } catch {
      case NonFatal(e) =>
        handleException(e, singleAppProvider, platform, driverInfoProvider)
    }
  }

  /**
   * Given the spark property "spark.master", it checks whether memoryOverhead should be
   * enabled/disabled. For Spark Standalone Mode, memoryOverhead property is skipped.
   * @param confValue the value of property "spark.master"
   * @return False if the value is a spark standalone. True if the value is not defined or
   *         set for yarn/Mesos
   */
  def enableMemoryOverheadRecommendation(confValue: Option[String]): Boolean = {
    confValue match {
      case Some(sparkMaster) if sparkMaster.startsWith("spark:") => false
      case _ => true
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy