All Downloads are FREE. Search and download functionalities are using the official Maven repository.

geotrellis.statistics.Histogram.scala Maven / Gradle / Ivy

The newest version!
package geotrellis.statistics

import math.{abs, round, sqrt}

/**
  * Data object representing a histogram of values.
  */
abstract trait Histogram extends Serializable {
  /**
   * Return the number of occurances for 'item'.
   */
  def getItemCount(item:Int): Int

  /**
   * Return the total number of occurances for all items.
   */
  def getTotalCount(): Int

  /**
   * Return the smallest item seen.
   */
  def getMinValue(): Int

  /**
   * Return the largest item seen.
   */
  def getMaxValue(): Int

  /**
   * Return the smallest and largest items seen as a tuple.
   */
  def getMinMaxValues():(Int, Int) = (getMinValue, getMaxValue)

  /**
   * Return a mutable copy of this histogram.
   */
  def mutable(): MutableHistogram

  def getValues():Array[Int]

  def rawValues():Array[Int]

  def foreach(f:(Int, Int) => Unit) {
    getValues.foreach(z => f(z, getItemCount(z)))
  }

  def foreachValue(f:Int => Unit): Unit

  def getQuantileBreaks(num:Int):Array[Int]

  def getMode():Int = {
    if(getTotalCount == 0) { return geotrellis.NODATA }
    val values = getValues()
    var mode = values(0)
    var count = getItemCount(mode)
    var i = 1
    val len = values.length
    while (i < len) {
      val z = values(i)
      val c = getItemCount(z)
      if (c > count) {
        count = c
        mode = z
      }
      i += 1
    }
    mode
  }

  def getMedian() = if (getTotalCount == 0) {
    geotrellis.NODATA
  } else {
    val values = getValues
    val middle = getTotalCount() / 2
    var total = 0
    var i = 0
    while (total <= middle) {
      total += getItemCount(values(i))
      i += 1
    }
    values(i-1)
  }

  def getMean():Double = {
    if(getTotalCount == 0) { return geotrellis.NODATA }

    val values = rawValues()
    var mean = 0.0
    var total = 0.0
    var i = 0
    val len = values.length

    while (i < len) {
      val value = values(i)
      val count = getItemCount(value)
      val delta = value - mean
      total += count
      mean += (count * delta) / total

      i += 1
    }
    mean
  }

  def generateStatistics() = {
    val values = getValues()
    if (values.length == 0) {
      Statistics.EMPTY
    } else {

      var mode = 0
      var modeCount = 0

      var mean = 0.0
      var total = 0

      var median = 0
      var needMedian = true
      val limit = getTotalCount() / 2

      var i = 0
      val len = values.length

      while (i < len) {
        val value = values(i)
        val count = getItemCount(value)
        if (count != 0) {
          // update the mode
          if (count > modeCount) {
            mode = value
            modeCount = count
          }

          // update the mean
          val delta = value - mean
          total += count
          mean += (count * delta) / total

          // update median if needed
          if (needMedian && total > limit) {
            median = values(i)
            needMedian = false
          }
        }
        i += 1
      }

      // find the min value
      val zmin = values(0)

      // find the max value
      val zmax = values(len - 1)

      // find stddev
      i = 0
      total = 0
      var mean2 = 0.0
      while (i < len) {
        val value = values(i)
        val count = getItemCount(value)

        if (count > 0) {
          val x = value - mean
          val y = x * x

          val delta = y - mean2
          total += count
          mean2 += (count * delta) / total
        }

        i += 1
      }
      val stddev = sqrt(mean2)

      Statistics(mean, median, mode, stddev, zmin, zmax)
    }
  }

  def toJSON = {
    val counts = getValues.map(v => s"[$v,${getItemCount(v)}]").mkString(",")
    s"[$counts]"
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy