org.dianahep.histogrammar.primitives.bin.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of histogrammar_2.10 Show documentation
Histogram abstraction to simplify complex aggregations in distributed environments.
The newest version!
// Copyright 2016 DIANA-HEP
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.dianahep

import scala.language.existentials

import org.dianahep.histogrammar.json._
import org.dianahep.histogrammar.util._

package histogrammar {
  //////////////////////////////////////////////////////////////// Bin/Binned/Binning

  /** Split a quantity into equally spaced bins between a low and high threshold and fill exactly one bin per datum.
    * 
    * When composed with [[org.dianahep.histogrammar.Count]], this produces a standard histogram:
    * 
    * {{{Bin.ing(100, 0, 10, fill_x, Count.ing())}}}
    * 
    * and when nested, it produces a two-dimensional histogram:
    * 
    * {{{Bin.ing(100, 0, 10, fill_x,
    *   Bin.ing(100, 0, 10, fill_y, Count.ing()))}}}
    * 
    * Combining with [[org.dianahep.histogrammar.Deviate]] produces a physicist's "profile plot:"
    * 
    * {{{Bin.ing(100, 0, 10, fill_x, Deviate.ing(fill_y))}}}
    * 
    * and so on.
    * 
    * Factory produces mutable [[org.dianahep.histogrammar.Binning]] and immutable [[org.dianahep.histogrammar.Binned]] objects.
    */
  object Bin extends Factory {
    val name = "Bin"
    val help = "Split a quantity into equally spaced bins between a low and high threshold and fill exactly one bin per datum."
    val detailedHelp ="""When composed with [[org.dianahep.histogrammar.Count]], this produces a standard histogram:

{{{Bin.ing(100, 0, 10, fill_x, Count.ing())}}}

and when nested, it produces a two-dimensional histogram:

{{{Bin.ing(100, 0, 10, fill_x,
  Bin.ing(100, 0, 10, fill_y, Count.ing()))}}}

Combining with [[org.dianahep.histogrammar.Deviate]] produces a physicist's "profile plot:"

{{{Bin.ing(100, 0, 10, fill_x, Deviate.ing(fill_y))}}}

and so on."""

    /** Create an immutable [[org.dianahep.histogrammar.Binned]] from arguments (instead of JSON).
      * 
      * @param low Minimum-value edge of the first bin.
      * @param high Maximum-value edge of the last bin.
      * @param entries Weighted number of entries (sum of all observed weights).
      * @param values Containers for data sent to each bin.
      * @param underflow Container for data below the first bin.
      * @param overflow Container for data above the last bin.
      * @param nanflow Container for data that resulted in `NaN`.
      */
    def ed[V <: Container[V] with NoAggregation, U <: Container[U] with NoAggregation, O <: Container[O] with NoAggregation, N <: Container[N] with NoAggregation]
      (low: Double,
       high: Double,
       entries: Double,
       values: Seq[V],
       underflow: U,
       overflow: O,
       nanflow: N) = new Binned[V, U, O, N](low, high, entries, None, values, underflow, overflow, nanflow)

    /** Create an empty, mutable [[org.dianahep.histogrammar.Binning]].
      * 
      * @param number Of bins.
      * @param low Minimum-value edge of the first bin.
      * @param high Maximum-value edge of the last bin.
      * @param quantity Numerical function to split into bins.
      * @param value Template used to create zero values (by calling this `value`'s `zero` method).
      * @param underflow Container for data below the first bin.
      * @param overflow Container for data above the last bin.
      * @param nanflow Container for data that resulted in `NaN`.
      */
    def apply[DATUM, V <: Container[V] with Aggregation{type Datum >: DATUM}, U <: Container[U] with Aggregation{type Datum >: DATUM}, O <: Container[O] with Aggregation{type Datum >: DATUM}, N <: Container[N] with Aggregation{type Datum >: DATUM}]
      (num: Int,
       low: Double,
       high: Double,
       quantity: UserFcn[DATUM, Double],
       value: => V = Count(),
       underflow: U = Count(),
       overflow: O = Count(),
       nanflow: N = Count()) =
      new Binning[DATUM, V, U, O, N](low, high, quantity, 0.0, Seq.fill(num)(value.zero), underflow, overflow, nanflow)

    /** Synonym for `apply`. */
    def ing[DATUM, V <: Container[V] with Aggregation{type Datum >: DATUM}, U <: Container[U] with Aggregation{type Datum >: DATUM}, O <: Container[O] with Aggregation{type Datum >: DATUM}, N <: Container[N] with Aggregation{type Datum >: DATUM}]
      (num: Int,
       low: Double,
       high: Double,
       quantity: UserFcn[DATUM, Double],
       value: => V = Count(),
       underflow: U = Count(),
       overflow: O = Count(),
       nanflow: N = Count()) = apply(num, low, high, quantity, value, underflow, overflow, nanflow)

    trait Methods {
      /** Number of bins. */
      def num: Int
      def low: Double
      def high: Double

      /** Find the bin index associated with numerical value `x`.
        * 
        * @return -1 if `x` is out of range; the bin index otherwise.
        */
      def bin(x: Double): Int =
        if (under(x)  ||  over(x)  ||  nan(x))
          -1
        else
          Math.floor(num * (x - low) / (high - low)).toInt

      /** Return `true` iff `x` is in the underflow region (less than `low`). */
      def under(x: Double): Boolean = !x.isNaN  &&  x < low
      /** Return `true` iff `x` is in the overflow region (greater than `high`). */
      def over(x: Double): Boolean = !x.isNaN  &&  x >= high
      /** Return `true` iff `x` is in the nanflow region (equal to `NaN`). */
      def nan(x: Double): Boolean = x.isNaN

      /** Get a sequence of valid indexes. */
      def indexes: Seq[Int] = 0 until num
      /** Get the low and high edge of a bin (given by index number). */
      def range(index: Int): (Double, Double) = ((high - low) * index / num + low, (high - low) * (index + 1) / num + low)
    }

    import KeySetComparisons._
    def fromJsonFragment(json: Json, nameFromParent: Option[String]): Container[_] with NoAggregation = json match {
      case JsonObject(pairs @ _*) if (pairs.keySet has Set("low", "high", "entries", "values:type", "values", "underflow:type", "underflow", "overflow:type", "overflow", "nanflow:type", "nanflow").maybe("name").maybe("values:name")) =>
        val get = pairs.toMap

        val low = get("low") match {
          case JsonNumber(x) => x
          case x => throw new JsonFormatException(x, name + ".low")
        }

        val high = get("high") match {
          case JsonNumber(x) => x
          case x => throw new JsonFormatException(x, name + ".high")
        }

        val entries = get("entries") match {
          case JsonNumber(x) => x
          case x => throw new JsonFormatException(x, name + ".entries")
        }

        val quantityName = get.getOrElse("name", JsonNull) match {
          case JsonString(x) => Some(x)
          case JsonNull => None
          case x => throw new JsonFormatException(x, name + ".name")
        }

        val valuesFactory = get("values:type") match {
          case JsonString(name) => Factory(name)
          case x => throw new JsonFormatException(x, name + ".values:type")
        }
        val valuesName = get.getOrElse("values:name", JsonNull) match {
          case JsonString(x) => Some(x)
          case JsonNull => None
          case x => throw new JsonFormatException(x, name + ".values:name")
        }
        val values = get("values") match {
          case JsonArray(sub @ _*) => sub.map(valuesFactory.fromJsonFragment(_, valuesName))
          case x => throw new JsonFormatException(x, name + ".values")
        }

        val underflowFactory = get("underflow:type") match {
          case JsonString(name) => Factory(name)
          case x => throw new JsonFormatException(x, name + ".underflow:type")
        }
        val underflow = underflowFactory.fromJsonFragment(get("underflow"), None)

        val overflowFactory = get("overflow:type") match {
          case JsonString(name) => Factory(name)
          case x => throw new JsonFormatException(x, name + ".overflow:type")
        }
        val overflow = overflowFactory.fromJsonFragment(get("overflow"), None)

        val nanflowFactory = get("nanflow:type") match {
          case JsonString(name) => Factory(name)
          case x => throw new JsonFormatException(x, name + ".nanflow:type")
        }
        val nanflow = nanflowFactory.fromJsonFragment(get("nanflow"), None)

        new Binned(low, high, entries, (nameFromParent ++ quantityName).lastOption, values.asInstanceOf[Seq[C] forSome {type C <: Container[C] with NoAggregation}], underflow.asInstanceOf[C forSome {type C <: Container[C] with NoAggregation}], overflow.asInstanceOf[C forSome {type C <: Container[C] with NoAggregation}], nanflow.asInstanceOf[C forSome {type C <: Container[C] with NoAggregation}])

      case _ => throw new JsonFormatException(json, name)
    }
  }

  /** An accumulated quantity that was split into equally spaced bins between specified limits and filling only one bin per datum.
    * 
    * Use the factory [[org.dianahep.histogrammar.Bin]] to construct an instance.
    * 
    * @param low Minimum-value edge of the first bin.
    * @param high Maximum-value edge of the last bin.
    * @param entries Weighted number of entries (sum of all observed weights).
    * @param quantityName Optional name given to the quantity function, passed for bookkeeping.
    * @param values Containers for data sent to each bin.
    * @param underflow Container for data below the first bin.
    * @param overflow Container for data above the last bin.
    * @param nanflow Container for data that resulted in `NaN`.
    */
  class Binned[V <: Container[V] with NoAggregation, U <: Container[U] with NoAggregation, O <: Container[O] with NoAggregation, N <: Container[N] with NoAggregation] private[histogrammar](
    val low: Double,
    val high: Double,
    val entries: Double,
    val quantityName: Option[String],
    val values: Seq[V],
    val underflow: U,
    val overflow: O,
    val nanflow: N) extends Container[Binned[V, U, O, N]] with NoAggregation with QuantityName with Bin.Methods {

    type Type = Binned[V, U, O, N]
    type EdType = Binned[V, U, O, N]
    def factory = Bin

    if (low >= high)
      throw new ContainerException(s"low ($low) must be less than high ($high)")
    if (values.size < 1)
      throw new ContainerException(s"values ($values) must have at least one element")
    if (entries < 0.0)
      throw new ContainerException(s"entries ($entries) cannot be negative")
    def num = values.size

    /** Extract the container at a given index. */
    def at(index: Int) = values(index)

    def zero = new Binned[V, U, O, N](low, high, 0.0, quantityName, Seq.fill(values.size)(values.head.zero), underflow.zero, overflow.zero, nanflow.zero)
    def +(that: Binned[V, U, O, N]): Binned[V, U, O, N] = {
      if (this.quantityName != that.quantityName)
        throw new ContainerException(s"cannot add ${getClass.getName} because quantityName differs (${this.quantityName} vs ${that.quantityName})")
      if (this.low != that.low)
        throw new ContainerException(s"cannot add ${getClass.getName} because low differs (${this.low} vs ${that.low})")
      if (this.high != that.high)
        throw new ContainerException(s"cannot add ${getClass.getName} because high differs (${this.high} vs ${that.high})")
      if (this.values.size != that.values.size)
        throw new ContainerException(s"cannot add ${getClass.getName} because number of values differs (${this.values.size} vs ${that.values.size})")

      new Binned(
        low,
        high,
        this.entries + that.entries,
        this.quantityName,
        this.values zip that.values map {case (me, you) => me + you},
        this.underflow + that.underflow,
        this.overflow + that.overflow,
        this.nanflow + that.nanflow)
    }
    def *(factor: Double) =
      if (factor.isNaN  ||  factor <= 0.0)
        zero
      else
        new Binned[V, U, O, N](low, high, factor * entries, quantityName, values.map(_ * factor), underflow * factor, overflow * factor, nanflow * factor)

    def children = underflow :: overflow :: nanflow :: values.toList

    def toJsonFragment(suppressName: Boolean) = JsonObject(
      "low" -> JsonFloat(low),
      "high" -> JsonFloat(high),
      "entries" -> JsonFloat(entries),
      "values:type" -> JsonString(values.head.factory.name),
      "values" -> JsonArray(values.map(_.toJsonFragment(true)): _*),
      "underflow:type" -> JsonString(underflow.factory.name),
      "underflow" -> underflow.toJsonFragment(false),
      "overflow:type" -> JsonString(overflow.factory.name),
      "overflow" -> overflow.toJsonFragment(false),
      "nanflow:type" -> JsonString(nanflow.factory.name),
      "nanflow" -> nanflow.toJsonFragment(false)).
      maybe(JsonString("name") -> (if (suppressName) None else quantityName.map(JsonString(_)))).
      maybe(JsonString("values:name") -> (values.head match {case x: QuantityName => x.quantityName.map(JsonString(_)); case _ => None}))

    override def toString() = s""""""
    override def equals(that: Any) = that match {
      case that: Binned[V, U, O, N] => this.low === that.low  &&  this.high === that.high  &&  this.entries === that.entries  &&  this.quantityName == that.quantityName  &&  this.values == that.values  &&  this.underflow == that.underflow  &&  this.overflow == that.overflow  &&  this.nanflow == that.nanflow
      case _ => false
    }
    override def hashCode() = (low, high, entries, quantityName, values, underflow, overflow, nanflow).hashCode
  }

  /** Accumulating a quantity by splitting it into equally spaced bins between specified limits and filling only one bin per datum.
    * 
    * Use the factory [[org.dianahep.histogrammar.Bin]] to construct an instance.
    * 
    * @param low Minimum-value edge of the first bin.
    * @param high Maximum-value edge of the last bin.
    * @param quantity Numerical function to track.
    * @param entries Weighted number of entries (sum of all observed weights).
    * @param values Containers for data sent to each bin.
    * @param underflow Container for data below the first bin.
    * @param overflow Container for data above the last bin.
    * @param nanflow Container for data that resulted in `NaN`.
    */
  class Binning[DATUM, V <: Container[V] with Aggregation{type Datum >: DATUM}, U <: Container[U] with Aggregation{type Datum >: DATUM}, O <: Container[O] with Aggregation{type Datum >: DATUM}, N <: Container[N] with Aggregation{type Datum >: DATUM}] private[histogrammar](
    val low: Double,
    val high: Double,
    val quantity: UserFcn[DATUM, Double],
    var entries: Double,
    val values: Seq[V],
    val underflow: U,
    val overflow: O,
    val nanflow: N) extends Container[Binning[DATUM, V, U, O, N]] with AggregationOnData with NumericalQuantity[DATUM] with Bin.Methods {

    if (low >= high)
      throw new ContainerException(s"low ($low) must be less than high ($high)")
    if (values.size < 1)
      throw new ContainerException(s"values ($values) must have at least one element")
    if (entries < 0.0)
      throw new ContainerException(s"entries ($entries) cannot be negative")
    def num = values.size

    protected val v = values.head
    type Type = Binning[DATUM, V, U, O, N]
    type EdType = Binned[v.EdType, underflow.EdType, overflow.EdType, nanflow.EdType]
    type Datum = DATUM
    def factory = Bin

    /** Extract the container at a given index. */
    def at(index: Int) = values(index)

    def zero = new Binning[DATUM, V, U, O, N](low, high, quantity, 0.0, values.map(_.zero), underflow.zero, overflow.zero, nanflow.zero)
    def +(that: Binning[DATUM, V, U, O, N]): Binning[DATUM, V, U, O, N] = {
      if (this.quantity.name != that.quantity.name)
        throw new ContainerException(s"cannot add ${getClass.getName} because quantity name differs (${this.quantity.name} vs ${that.quantity.name})")
      if (this.low != that.low)
        throw new ContainerException(s"cannot add ${getClass.getName} because low differs (${this.low} vs ${that.low})")
      if (this.high != that.high)
        throw new ContainerException(s"cannot add ${getClass.getName} because high differs (${this.high} vs ${that.high})")
      if (this.values.size != that.values.size)
        throw new ContainerException(s"cannot add ${getClass.getName} because number of values differs (${this.values.size} vs ${that.values.size})")

      new Binning[DATUM, V, U, O, N](
        low,
        high,
        this.quantity,
        this.entries + that.entries,
        this.values zip that.values map {case (me, you) => me + you},
        this.underflow + that.underflow,
        this.overflow + that.overflow,
        this.nanflow + that.nanflow)
    }
    def *(factor: Double) =
      if (factor.isNaN  ||  factor <= 0.0)
        zero
      else
        new Binning[DATUM, V, U, O, N](low, high, quantity, factor * entries, values.map(_ * factor), underflow * factor, overflow * factor, nanflow * factor)

    def fill[SUB <: Datum](datum: SUB, weight: Double = 1.0) {
      checkForCrossReferences()
      if (weight > 0.0) {
        val q = quantity(datum)

        if (under(q))
          underflow.fill(datum, weight)
        else if (over(q))
          overflow.fill(datum, weight)
        else if (nan(q))
          nanflow.fill(datum, weight)
        else
          values(bin(q)).fill(datum, weight)

        // no possibility of exception from here on out (for rollback)
        entries += weight
      }
    }

    def children = underflow :: overflow :: nanflow :: values.toList

    def toJsonFragment(suppressName: Boolean) = JsonObject(
      "low" -> JsonFloat(low),
      "high" -> JsonFloat(high),
      "entries" -> JsonFloat(entries),
      "values:type" -> JsonString(values.head.factory.name),
      "values" -> JsonArray(values.map(_.toJsonFragment(true)): _*),
      "underflow:type" -> JsonString(underflow.factory.name),
      "underflow" -> underflow.toJsonFragment(false),
      "overflow:type" -> JsonString(overflow.factory.name),
      "overflow" -> overflow.toJsonFragment(false),
      "nanflow:type" -> JsonString(nanflow.factory.name),
      "nanflow" -> nanflow.toJsonFragment(false)).
      maybe(JsonString("name") -> (if (suppressName) None else quantity.name.map(JsonString(_)))).
      maybe(JsonString("values:name") -> (values.head match {case x: AnyQuantity[_, _] => x.quantity.name.map(JsonString(_)); case _ => None}))

    override def toString() = s""""""
    override def equals(that: Any) = that match {
      case that: Binning[DATUM, V, U, O, N] => this.low === that.low  &&  this.high === that.high  &&  this.quantity == that.quantity  &&  this.entries === that.entries  &&  this.values == that.values  &&  this.underflow == that.underflow  &&  this.overflow == that.overflow  &&  this.nanflow == that.nanflow
      case _ => false
    }
    override def hashCode() = (low, high, quantity, entries, values, underflow, overflow, nanflow).hashCode
  }
}