All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.execution.metric.SQLMetrics.scala Maven / Gradle / Ivy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.metric

import org.apache.spark.util.Utils
import org.apache.spark.{Accumulable, AccumulableParam, SparkContext}

/**
 * Create a layer for specialized metric. We cannot add `@specialized` to
 * `Accumulable/AccumulableParam` because it will break Java source compatibility.
 *
 * An implementation of SQLMetric should override `+=` and `add` to avoid boxing.
 */
private[sql] abstract class SQLMetric[R <: SQLMetricValue[T], T](
    name: String, val param: SQLMetricParam[R, T])
  extends Accumulable[R, T](param.zero, param, Some(name), true) {

  def reset(): Unit = {
    this.value = param.zero
  }
}

/**
 * Create a layer for specialized metric. We cannot add `@specialized` to
 * `Accumulable/AccumulableParam` because it will break Java source compatibility.
 */
private[sql] trait SQLMetricParam[R <: SQLMetricValue[T], T] extends AccumulableParam[R, T] {

  /**
   * A function that defines how we aggregate the final accumulator results among all tasks,
   * and represent it in string for a SQL physical operator.
   */
  val stringValue: Seq[T] => String

  def zero: R
}

/**
 * Create a layer for specialized metric. We cannot add `@specialized` to
 * `Accumulable/AccumulableParam` because it will break Java source compatibility.
 */
private[sql] trait SQLMetricValue[T] extends Serializable {

  def value: T

  override def toString: String = value.toString
}

/**
 * A wrapper of Long to avoid boxing and unboxing when using Accumulator
 */
private[sql] class LongSQLMetricValue(private var _value : Long) extends SQLMetricValue[Long] {

  def add(incr: Long): LongSQLMetricValue = {
    _value += incr
    this
  }

  // Although there is a boxing here, it's fine because it's only called in SQLListener
  override def value: Long = _value
}

/**
 * A specialized long Accumulable to avoid boxing and unboxing when using Accumulator's
 * `+=` and `add`.
 */
private[sql] class LongSQLMetric private[metric](name: String, param: LongSQLMetricParam)
  extends SQLMetric[LongSQLMetricValue, Long](name, param) {

  override def +=(term: Long): Unit = {
    localValue.add(term)
  }

  override def add(term: Long): Unit = {
    localValue.add(term)
  }
}

private class LongSQLMetricParam(val stringValue: Seq[Long] => String, initialValue: Long)
  extends SQLMetricParam[LongSQLMetricValue, Long] {

  override def addAccumulator(r: LongSQLMetricValue, t: Long): LongSQLMetricValue = r.add(t)

  override def addInPlace(r1: LongSQLMetricValue, r2: LongSQLMetricValue): LongSQLMetricValue =
    r1.add(r2.value)

  override def zero(initialValue: LongSQLMetricValue): LongSQLMetricValue = zero

  override def zero: LongSQLMetricValue = new LongSQLMetricValue(initialValue)
}

private[sql] object SQLMetrics {

  private def createLongMetric(
      sc: SparkContext,
      name: String,
      stringValue: Seq[Long] => String,
      initialValue: Long): LongSQLMetric = {
    val param = new LongSQLMetricParam(stringValue, initialValue)
    val acc = new LongSQLMetric(name, param)
    sc.cleaner.foreach(_.registerAccumulatorForCleanup(acc))
    acc
  }

  def createLongMetric(sc: SparkContext, name: String): LongSQLMetric = {
    createLongMetric(sc, name, _.sum.toString, 0L)
  }

  /**
   * Create a metric to report the size information (including total, min, med, max) like data size,
   * spill size, etc.
   */
  def createSizeMetric(sc: SparkContext, name: String): LongSQLMetric = {
    val stringValue = (values: Seq[Long]) => {
      // This is a workaround for SPARK-11013.
      // We use -1 as initial value of the accumulator, if the accumulator is valid, we will update
      // it at the end of task and the value will be at least 0.
      val validValues = values.filter(_ >= 0)
      val Seq(sum, min, med, max) = {
        val metric = if (validValues.length == 0) {
          Seq.fill(4)(0L)
        } else {
          val sorted = validValues.sorted
          Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
        }
        metric.map(Utils.bytesToString)
      }
      s"\n$sum ($min, $med, $max)"
    }
    // The final result of this metric in physical operator UI may looks like:
    // data size total (min, med, max):
    // 100GB (100MB, 1GB, 10GB)
    createLongMetric(sc, s"$name total (min, med, max)", stringValue, -1L)
  }

  /**
   * A metric that its value will be ignored. Use this one when we need a metric parameter but don't
   * care about the value.
   */
  val nullLongMetric = new LongSQLMetric("null", new LongSQLMetricParam(_.sum.toString, 0L))
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy