All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.Plugins.scala Maven / Gradle / Ivy

There is a newer version: 0.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.spark

import java.{util => ju}
import java.util.Collections

import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
import org.apache.spark.comet.shims.ShimCometDriverPlugin
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.{EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD}
import org.apache.spark.sql.internal.StaticSQLConf

import org.apache.comet.{CometConf, CometSparkSessionExtensions}

/**
 * Comet driver plugin. This class is loaded by Spark's plugin framework. It will be instantiated
 * on driver side only. It will update the SparkConf with the extra configuration provided by
 * Comet, e.g., Comet memory configurations.
 *
 * Note that `SparkContext.conf` is spark package only. So this plugin must be in spark package.
 * Although `SparkContext.getConf` is public, it returns a copy of the SparkConf, so it cannot
 * actually change Spark configs at runtime.
 *
 * To enable this plugin, set the config "spark.plugins" to `org.apache.spark.CometPlugin`.
 */
class CometDriverPlugin extends DriverPlugin with Logging with ShimCometDriverPlugin {
  override def init(sc: SparkContext, pluginContext: PluginContext): ju.Map[String, String] = {
    logInfo("CometDriverPlugin init")

    // register CometSparkSessionExtensions if it isn't already registered
    CometDriverPlugin.registerCometSessionExtension(sc.conf)

    if (shouldOverrideMemoryConf(sc.getConf)) {
      val execMemOverhead = if (sc.getConf.contains(EXECUTOR_MEMORY_OVERHEAD.key)) {
        sc.getConf.getSizeAsMb(EXECUTOR_MEMORY_OVERHEAD.key)
      } else {
        // By default, executorMemory * spark.executor.memoryOverheadFactor, with minimum of 384MB
        val executorMemory = sc.getConf.getSizeAsMb(EXECUTOR_MEMORY.key)
        val memoryOverheadFactor = getMemoryOverheadFactor(sc.getConf)
        val memoryOverheadMinMib = getMemoryOverheadMinMib(sc.getConf)

        Math.max((executorMemory * memoryOverheadFactor).toLong, memoryOverheadMinMib)
      }

      val cometMemOverhead = CometSparkSessionExtensions.getCometMemoryOverheadInMiB(sc.getConf)
      sc.conf.set(EXECUTOR_MEMORY_OVERHEAD.key, s"${execMemOverhead + cometMemOverhead}M")
      val newExecMemOverhead = sc.getConf.getSizeAsMb(EXECUTOR_MEMORY_OVERHEAD.key)

      logInfo(s"""
         Overriding Spark memory configuration for Comet:
           - Spark executor memory overhead: ${execMemOverhead}MB
           - Comet memory overhead: ${cometMemOverhead}MB
           - Updated Spark executor memory overhead: ${newExecMemOverhead}MB
         """)
    }

    Collections.emptyMap[String, String]
  }

  override def receive(message: Any): AnyRef = super.receive(message)

  override def shutdown(): Unit = {
    logInfo("CometDriverPlugin shutdown")

    super.shutdown()
  }

  override def registerMetrics(appId: String, pluginContext: PluginContext): Unit =
    super.registerMetrics(appId, pluginContext)

  /**
   * Whether we should override Spark memory configuration for Comet. This only returns true when
   * Comet native execution is enabled and/or Comet shuffle is enabled
   */
  private def shouldOverrideMemoryConf(conf: SparkConf): Boolean = {
    conf.getBoolean(CometConf.COMET_ENABLED.key, true) && (
      conf.getBoolean(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, false) ||
        conf.getBoolean(CometConf.COMET_EXEC_ENABLED.key, false)
    )
  }
}

object CometDriverPlugin extends Logging {
  def registerCometSessionExtension(conf: SparkConf): Unit = {
    val extensionKey = StaticSQLConf.SPARK_SESSION_EXTENSIONS.key
    val extensionClass = classOf[CometSparkSessionExtensions].getName
    val extensions = conf.get(extensionKey, "")
    if (extensions.isEmpty) {
      logInfo(s"Setting $extensionKey=$extensionClass")
      conf.set(extensionKey, extensionClass)
    } else {
      val currentExtensions = extensions.split(",").map(_.trim)
      if (!currentExtensions.contains(extensionClass)) {
        val newValue = s"$extensions,$extensionClass"
        logInfo(s"Setting $extensionKey=$newValue")
        conf.set(extensionKey, newValue)
      }
    }
  }
}

/**
 * The Comet plugin for Spark. To enable this plugin, set the config "spark.plugins" to
 * `org.apache.spark.CometPlugin`
 */
class CometPlugin extends SparkPlugin with Logging {
  override def driverPlugin(): DriverPlugin = new CometDriverPlugin

  override def executorPlugin(): ExecutorPlugin = null
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy