All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.metrics.InputMetricsUpdater.scala Maven / Gradle / Ivy

The newest version!
package org.apache.spark.metrics

import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.LongAdder

import scala.concurrent.duration.{FiniteDuration, _}
import org.apache.spark.TaskContext
import org.apache.spark.executor.InputMetrics
import org.apache.spark.util.ThreadUtils
import com.datastax.spark.connector.cql._

import com.datastax.oss.driver.api.core.cql.Row
import com.datastax.spark.connector.rdd.ReadConf

/** A trait that provides a method to update read metrics which are collected for connector related tasks.
  * The appropriate instance is created by the companion object.
  *
  * Instances of this trait are not thread-safe. They do not need to because a single instance should be
  * created for each Cassandra read task. This remains valid as long as Cassandra read tasks are
  * single-threaded.
  */
sealed trait InputMetricsUpdater extends MetricsUpdater {
  /** Updates the metrics being collected for the connector after reading each single row. This method
    * is not thread-safe.
    *
    * @param row the row which has just been read
    */
  def updateMetrics(row: Row): Row = row

  /** For internal use only */
  private[metrics] def updateTaskMetrics(count: Long, dataLength: Long): Unit = {}

  /** For internal use only */
  private[metrics] def updateCodahaleMetrics(count: Long, dataLength: Long): Unit = {}
}

object InputMetricsUpdater {
  val DefaultInterval: FiniteDuration = 1.second

  /** Creates the appropriate instance of `InputMetricsUpdater`.
    *
    * If [[com.datastax.spark.connector.rdd.ReadConf.taskMetricsEnabled ReadConf.taskMetricsEnabled]]
    * is `true`, the created instance will be updating task metrics so
    * that Spark will report them in the UI. Remember that this is supported for Spark 1.2+.
    *
    * If [[org.apache.spark.metrics.CassandraConnectorSource CassandraConnectorSource]]
    * is registered in Spark metrics system, the created instance will be
    * updating the included Codahale metrics. In order to register `CassandraConnectorSource` you need
    * to add it to the metrics configuration file.
    *
    * @param taskContext task context of a task for which this metrics updater is created
    * @param readConf read configuration
    * @param interval allows to update Codahale metrics every the given interval in order to
    *                 decrease overhead
    */
  def apply(
    taskContext: TaskContext,
    readConf: ReadConf,
    interval: FiniteDuration = DefaultInterval
  ): InputMetricsUpdater = {

    val source = MetricsUpdater.getSource(taskContext)

    if (readConf.taskMetricsEnabled) {
      val tm = taskContext.taskMetrics()
      val inputMetrics = tm.inputMetrics

      if (source.isDefined)
        new CodahaleAndTaskMetricsUpdater(interval, source.get, inputMetrics)
      else
        new TaskMetricsUpdater(interval, inputMetrics)

    } else {
      if (source.isDefined)
        new CodahaleMetricsUpdater(interval, source.get)
      else
        new DummyInputMetricsUpdater()
    }
  }

  private abstract class CumulativeInputMetricsUpdater(interval: FiniteDuration)
    extends InputMetricsUpdater with Timer {

    require(interval.length > 0)

    private val cnt = new LongAdder()
    private val dataLength = new LongAdder()
    private val scheduledExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor("input-metrics-updater")

    private val updateMetricsCmd = new Runnable {
      private var lastCnt: Long = 0
      private var lastDataLength: Long = 0
      
      override def run(): Unit = {
        // Codahale metrics introduce some overhead so in order to minimize it we can update them not
        // that often
        val (_cnt, _dataLength) = (cnt.sum(), dataLength.sum())
        updateCodahaleMetrics(_cnt - lastCnt, _dataLength - lastDataLength)
        lastCnt = _cnt
        lastDataLength = _dataLength
      }
    }
    
    private val schedule = scheduledExecutor
        .scheduleAtFixedRate(updateMetricsCmd, interval.toMillis, interval.toMillis, TimeUnit.MILLISECONDS)

    override def updateMetrics(row: Row): Row = {
      cnt.increment()
      dataLength.add(getRowBinarySize(row))
      row
    }

    def finish(): Long = {
      val t = stopTimer()
      scheduledExecutor.shutdown()
      scheduledExecutor.awaitTermination(interval.toMillis, TimeUnit.MILLISECONDS)
      updateMetricsCmd.run()
      updateTaskMetrics(cnt.sum(), dataLength.sum())
      t
    }
  }

  private trait CodahaleMetricsSupport extends InputMetricsUpdater {
    val source: CassandraConnectorSource

    @inline
    override def updateCodahaleMetrics(count: Long, dataLength: Long): Unit = {
      source.readByteMeter.mark(dataLength)
      source.readRowMeter.mark(count)
    }

    val timer: com.codahale.metrics.Timer.Context = source.readTaskTimer.time()
  }

  private trait TaskMetricsSupport extends InputMetricsUpdater {
    val inputMetrics: InputMetrics

    @inline
    override def updateTaskMetrics(count: Long, dataLength: Long): Unit = {
      inputMetrics.incBytesRead(dataLength)
      inputMetrics.incRecordsRead(count)
    }
  }

  /** The implementation of [[InputMetricsUpdater]] which does not update anything. */
  private class DummyInputMetricsUpdater extends InputMetricsUpdater with SimpleTimer {
    def finish(): Long = stopTimer()
  }

  /** The implementation of [[InputMetricsUpdater]] which updates only task metrics. */
  private class TaskMetricsUpdater(interval: FiniteDuration, val inputMetrics: InputMetrics)
    extends CumulativeInputMetricsUpdater(interval) with TaskMetricsSupport with SimpleTimer

  /** The implementation of [[InputMetricsUpdater]] which updates only Codahale metrics defined in
    * [[CassandraConnectorSource]]. */
  private class CodahaleMetricsUpdater(interval: FiniteDuration, val source: CassandraConnectorSource)
    extends CumulativeInputMetricsUpdater(interval) with CodahaleMetricsSupport with CCSTimer

  /** The implementation of [[InputMetricsUpdater]] which updates both Codahale and task metrics. */
  private class CodahaleAndTaskMetricsUpdater(
      interval: FiniteDuration,
      val source: CassandraConnectorSource,
      val inputMetrics: InputMetrics)
    extends CumulativeInputMetricsUpdater(interval)
    with TaskMetricsSupport
    with CodahaleMetricsSupport
    with CCSTimer

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy