All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kamon.metric.Distribution.scala Maven / Gradle / Ivy

There is a newer version: 2.7.5
Show newest version
/*
 * Copyright 2013-2021 The Kamon Project 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kamon.metric

import java.nio.ByteBuffer

import kamon.metric.Histogram.DistributionSnapshotBuilder
import org.HdrHistogram.{BaseLocalHdrHistogram, ZigZag}
import org.slf4j.LoggerFactory


/**
  * A distribution of values observed by an instrument. All Kamon distributions are based on the HdrHistogram and as
  * such, they represent a distribution of values within a configured range and precision. By default, all instruments
  * that generate distributions are configured to accept 1% error margin, meaning that all recorded values are adjusted
  * to the bucket whose value is up to 1% away from the original value.
  *
  * Distributions only expose data for non-empty buckets/percentiles.
  */
trait Distribution {

  /**
    * Describes the range and precision settings of the instrument from which this distribution was taken.
    */
  def dynamicRange: DynamicRange

  /**
    * Minimum value in this distribution.
    */
  def min: Long

  /**
    * Maximum value in this distribution.
    */
  def max: Long

  /**
    * Sum of all values in this distribution.
    */
  def sum: Long

  /**
    * Number of values stored in this distribution.
    */
  def count: Long

  /**
    * Returns the percentile at the specified rank.
    */
  def percentile(rank: Double): Distribution.Percentile

  /**
    * Returns an immutable list of all percentiles on this distribution. Calling this function requires allocation of
    * new percentile instances every time it is called, if you just need to read the values to transfer them into
    * another medium consider using a percentiles iterator instead.
    */
  def percentiles: Seq[Distribution.Percentile]

  /**
    * Returns an iterator of all percentile values on this distribution. This iterator uses a mutable percentiles
    * instance that gets updated as the user iterates through them to avoid allocating intermediary objects and thus,
    * trying to copy percentile instances from the iterator into other structures might not work as you expect.
    */
  def percentilesIterator: Iterator[Distribution.Percentile]

  /**
    * Returns an immutable list of all buckets on this distribution. Calling this function requires allocation of
    * new bucket instances every time it is called, if you just need to read the values to transfer them into
    * another medium consider using a buckets iterator instead.
    */
  def buckets: Seq[Distribution.Bucket]

  /**
    * Returns an iterator of all buckets on this distribution. This iterator uses a mutable buckets instance that gets
    * updated as the user iterates through them to avoid allocating intermediary objects and thus, trying to copy bucket
    * instances from the iterator into other structures might not work as you expect.
    */
  def bucketsIterator: Iterator[Distribution.Bucket]
}

object Distribution {

  private val _logger = LoggerFactory.getLogger(classOf[Distribution])

  /**
    * Describes a single bucket within a distribution.
    */
  trait Bucket {
    /**
      * Value at which the bucket starts
      */
    def value: Long

    /**
      * Number of times the value of this bucket was observed.
      */
    def frequency: Long
  }

  /**
    * Indicates the value below which a given percentage (or rank) of the entire distribution are.
    */
  trait Percentile {

    /**
      * Percentile rank for a given percentile. E.g. a rank of 99.05 expresses that 99.95% of all values on a
      * distribution are at or below the value of this percentile.
      */
    def rank: Double

    /**
      * The cutoff value for this percentile in a distribution. E.g. a value of 742ms with a rank of 95.0 express that
      * 95% of all values on a distribution are a 742ms or smaller.
      */
    def value: Long

    /**
      * Expresses how many values are at or under this percentile's rank. E.g. a percentile with value of 742ms and
      * count at rank of 500 expresses that there are 500 values at or under 742ms on a distribution.
      */
    def countAtRank: Long
  }

  /**
    * Merges two distributions into a new one, which includes the values from both distributions. The resulting
    * distribution will always have the dynamic range of the "left" distribution.
    */
  def merge(left: Distribution, right: Distribution): Distribution =
    merge(left, right, left.dynamicRange)


  /**
    * Merges two distributions into a new one, which includes the values from both distributions, adjusting the values
    * to the provided dynamic range if necessary.
    */
  def merge(left: Distribution, right: Distribution, dynamicRange: DynamicRange): Distribution = {
    val h = Histogram.Local.get(dynamicRange)
    left.bucketsIterator.foreach(b => h.recordValueWithCount(b.value, b.frequency))
    right.bucketsIterator.foreach(b => h.recordValueWithCount(b.value, b.frequency))
    h.snapshot(true)
  }

  /**
    * Tries to convert a distribution to the provided unit. Take into account that since Distributions are based on
    * buckets with integer boundaries, converting from greater to lower magnitudes (e.g. from seconds to milliseconds)
    * will always preserve precision, but the same is not true when converting the  way around (e.g. from milliseconds
    * to seconds) since the conversion could produce floating point result like which will always be rounded to the
    * nearest integer equal or greater than 1. For example, when converting a value of 3500 milliseconds to seconds,
    * the converted value of 3.2 seconds will be rounded down to 3 seconds and when converting a value of 300 milliseconds
    * to seconds, it will be rounded up to 1 (the smallest possible value in a histogram).
    *
    * If the distribution and target unit dimensions are not the same then a warning will be logged and the distribution
    * will be returned unchanged.
    */
  def convert(distribution: Distribution, unit: MeasurementUnit, toUnit: MeasurementUnit): Distribution =
    convert(distribution, unit, toUnit, distribution.dynamicRange)

  /**
    * Tries to convert the a distribution to the provided unit and dynamic range. Take into account that
    * since Distributions are based on buckets with integer boundaries, converting from greater to lower magnitudes
    * (e.g. from seconds to milliseconds) will always preserve precision, but the same is not true when converting the
    * way around (e.g. from milliseconds to seconds) since the conversion could produce floating point result like which
    * will always be rounded to the nearest integer equal or greater than 1. For example, when converting a value of
    * 3500 milliseconds to seconds, the converted value of 3.2 seconds will be rounded down to 3 seconds and when
    * converting a value of 300 milliseconds to seconds, it will be rounded up to 1 (the smallest possible value in a
    * histogram).
    *
    * If the distribution and target unit dimensions are not the same then a warning will be logged and the distribution
    * will be returned unchanged.
    */
  def convert(distribution: Distribution, unit: MeasurementUnit, toUnit: MeasurementUnit, toDynamicRange: DynamicRange): Distribution = {

    if(unit == toUnit && distribution.dynamicRange == toDynamicRange)
      distribution
    else {
      val actualToUnit = if(unit.dimension == toUnit.dimension) toUnit else {
        _logger.warn(
          s"Can't convert distributions from the [${unit.dimension.name}] dimension into the " +
          s"[${toUnit.dimension.name}] dimension."
        )

        unit
      }

      if(unit == actualToUnit && distribution.dynamicRange == toDynamicRange)
        distribution
      else {
        val scaledHistogram = Histogram.Local.get(toDynamicRange)
        distribution.bucketsIterator.foreach(bucket => {
          val roundValue = Math.round(MeasurementUnit.convert(bucket.value, unit, toUnit))
          val convertedValue = if(roundValue == 0L) 1L else roundValue
          scaledHistogram.recordValueWithCount(convertedValue, bucket.frequency)
        })

        scaledHistogram.snapshot(true)
      }
    }
  }

  /**
    * Holds an immutable value distribution for a given range and precision. This implementation is closely coupled with
    * the HdrHistogram internal mechanics and uses several bits of its internal state to translate between the internal
    * zero run-length encoded version of the counts array in a histogram and the actual buckets and percentiles expected
    * to be seen by users.
    */
  private[kamon] class ZigZagCounts(val count: Long, minIndex: Int, maxIndex: Int, zigZagCounts: ByteBuffer, unitMagnitude: Int,
      subBucketHalfCount: Int, subBucketHalfCountMagnitude: Int, val dynamicRange: DynamicRange) extends Distribution {

    val min: Long = if(count == 0) 0 else bucketValueAtIndex(minIndex)
    val max: Long = bucketValueAtIndex(maxIndex)
    lazy val sum: Long = bucketsIterator.foldLeft(0L)((a, b) => a + (b.value * b.frequency))

    def buckets: Seq[Bucket] = {
      val builder = Seq.newBuilder[Bucket]
      val allBuckets = bucketsIterator
      while(allBuckets.hasNext) {
        val b = allBuckets.next()
        builder += immutable.Bucket(b.value, b.frequency)
      }

      builder.result()
    }

    def bucketsIterator: Iterator[Bucket] = new Iterator[Bucket] {
      val buffer = zigZagCounts.duplicate()
      val bucket = mutable.Bucket(0, 0)
      var countsArrayIndex = 0

      def hasNext: Boolean =
        buffer.remaining() > 0

      def next(): Bucket = {
        val readLong = ZigZag.getLong(buffer)
        val frequency = if(readLong > 0) {
          readLong
        } else {
          countsArrayIndex += (-readLong.toInt)
          ZigZag.getLong(buffer)
        }

        bucket.value = bucketValueAtIndex(countsArrayIndex)
        bucket.frequency = frequency
        countsArrayIndex += 1
        bucket
      }
    }

    def percentilesIterator: Iterator[Percentile] = new Iterator[Percentile]{
      val buckets = bucketsIterator
      val percentile = mutable.Percentile(0D, 0, 0)
      var countUnderQuantile = 0L

      def hasNext: Boolean =
        buckets.hasNext

      def next(): Percentile = {
        val bucket = buckets.next()
        countUnderQuantile += bucket.frequency
        percentile.rank = (countUnderQuantile * 100D) / ZigZagCounts.this.count
        percentile.countAtRank = countUnderQuantile
        percentile.value = bucket.value
        percentile
      }
    }

    def percentile(p: Double): Percentile = {
      val percentiles = percentilesIterator
      if(percentiles.hasNext) {
        var currentPercentile = percentiles.next()
        while(percentiles.hasNext && currentPercentile.rank < p) {
          currentPercentile = percentiles.next()
        }

        immutable.Percentile(p, currentPercentile.value, currentPercentile.countAtRank)

      } else immutable.Percentile(p, 0, 0)
    }


    def percentiles: Seq[Percentile] = {
      val builder = Seq.newBuilder[Percentile]
      val allPercentiles = percentilesIterator
      while(allPercentiles.hasNext) {
        val p = allPercentiles.next()
        builder += immutable.Percentile(p.rank, p.value, p.countAtRank)
      }

      builder.result()
    }

    def countsArray(): ByteBuffer = {
      zigZagCounts.duplicate()
    }

    @inline private def bucketValueAtIndex(index: Int): Long = {
      var bucketIndex: Int = (index >> subBucketHalfCountMagnitude) - 1
      var subBucketIndex: Int = (index & (subBucketHalfCount - 1)) + subBucketHalfCount
      if (bucketIndex < 0) {
        subBucketIndex -= subBucketHalfCount
        bucketIndex = 0
      }

      subBucketIndex.toLong << (bucketIndex + unitMagnitude)
    }

    override def toString(): String = {
      s"Distribution{count=$count,min=$min,max=$max}"
    }
  }

  /** Mutable versions of buckets and percentiles, used to avoid allocations when returned iterators */
  private object mutable {
    case class Bucket(var value: Long, var frequency: Long) extends Distribution.Bucket
    case class Percentile(var rank: Double, var value: Long, var countAtRank: Long) extends Distribution.Percentile
  }

  /** Immutable versions of buckets and percentiles, used when fully materialized views are needed */
  private object immutable {
    case class Bucket(value: Long, frequency: Long) extends Distribution.Bucket
    case class Percentile(rank: Double, value: Long, countAtRank: Long) extends Distribution.Percentile
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy