net.opentsdb.stats.Histogram Maven / Gradle / Ivy
Show all versions of opentsdb Show documentation
// This file is part of OpenTSDB.
// Copyright (C) 2010-2012 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see .
package net.opentsdb.stats;
import java.util.Arrays;
/**
* A histogram to keep track of the approximation of a distribution of values.
*
* This is not a general purpose implementation of histogram. It's
* specifically designed for "small" values (close to 0) as the primary
* use case is latency histograms.
*
* All values must be positive ({@code >= 0}).
*
* The histogram is linear (fixed size buckets) up to a given cutoff
* point. Beyond that point, the histogram becomes exponential (each
* bucket is twice as large as the previous one). This gives good
* granularity for lower values while still allowing a rough
* classification for the "long tail" of larger values.
*
* Note that this implementation doesn't allow you to directly control
* the number of buckets in the histogram. The number will depend on
* the arguments given to the constructor.
*
* This class is not synchronized.
*/
public final class Histogram {
/** Interval between each bucket for the linear part of the histogram. */
private final short interval;
/** Inclusive value beyond which we switch to exponential buckets. */
private final int cutoff;
/**
* How many linear buckets we have.
* Technically we don't need to store this value but we do in order to
* avoid having to re-compute it in the fast path each time we add a
* new value.
*/
private final short num_linear_buckets;
/**
* The power of 2 used by the first exponential bucket.
* Technically we don't need to store this value but we do in order to
* avoid having to re-compute it in the fast path each time we add a
* new value.
*/
private final short exp_bucket_shift;
/** Buckets where we actually store the values. */
private final int[] buckets;
/**
* Constructor.
* @param max The maximum value of the histogram. Any value greater
* than this will be considered to be "infinity".
* @param interval The interval (size) of each linear bucket.
* @param cutoff The value beyond which to switch to exponential
* buckets. The histogram may actually use this value or a value up
* to {@code interval} greater.
* @throws IllegalArgumentException if any of following conditions are
* not met:
*
* 0 < interval <= max
* 0 <= cutoff <= max
*
*/
public Histogram(final int max,
final short interval, final int cutoff) {
if (interval > max) {
throw new IllegalArgumentException("interval > max! interval="
+ interval + ", max=" + max);
} else if (cutoff > max) {
throw new IllegalArgumentException("cutoff > max! cutoff="
+ cutoff + ", max=" + max);
} else if (interval < 1) {
throw new IllegalArgumentException("interval < 1! interval=" + interval);
} else if (cutoff < 0) {
throw new IllegalArgumentException("cutoff < 0! interval=" + cutoff);
}
this.interval = interval;
// One linear bucket every `interval' up to `cutoff'.
num_linear_buckets = (short) (cutoff / interval);
this.cutoff = num_linear_buckets * interval;
this.exp_bucket_shift = (short) log2rounddown(interval);
this.buckets = new int[num_linear_buckets
// Find how many exponential buckets we need, starting from the
// first power of 2 that's less than or equal to `interval'.
+ log2roundup((max - cutoff) >> exp_bucket_shift)
// Add an extra overflow bucket at the end.
+ 1];
}
/**
* Computes the logarithm base 2 (rounded up) of an integer.
*
* This is essentially equivalent to
* {@code Math.ceil(Math.log(n) / Math.log(2))}
* except it's 3 times faster.
* @param n A strictly positive integer.
* @return The logarithm base 2. As a special case, if the integer
* given in argument is 0, this function returns 0. If the integer
* given in argument is negative, the return value is undefined.
* @see #log2rounddown
*/
static final int log2roundup(final int n) {
int log2 = 0;
while (n > 1 << log2) {
log2++;
}
return log2;
}
/**
* Computes the logarithm base 2 (rounded down) of an integer.
*
* This is essentially equivalent to
* {@code Math.floor(Math.log(n) / Math.log(2))}
* except it's 4.5 times faster. This function is also almost 70%
* faster than {@link #log2roundup}.
* @param n A strictly positive integer.
* @return The logarithm base 2. As a special case, if the integer
* given in argument is 0, this function returns 0. If the integer
* given in argument is negative, the return value is undefined.
* @see #log2roundup
*/
static final int log2rounddown(int n) {
int log2 = 0;
while (n > 1) {
n >>>= 1;
log2++;
}
return log2;
}
/** Returns the number of buckets in this histogram. */
public int buckets() {
return buckets.length;
}
/**
* Adds a value to the histogram.
*
* This method works in {@code O(1)}.
* @param value The value to add.
* @throws IllegalArgumentException if the value given is negative.
*/
public void add(final int value) {
if (value < 0) {
throw new IllegalArgumentException("negative value: " + value);
}
buckets[bucketIndexFor(value)]++;
}
/**
* Returns the value of the pth percentile in this histogram.
*
* This method works in {@code O(N)} where {@code N} is the number of
* {@link #buckets buckets}.
* @param p A strictly positive integer in the range {@code [1; 100]}
* @throws IllegalArgumentException if {@code p} is not valid.
*/
public int percentile(int p) {
if (p < 1 || p > 100) {
throw new IllegalArgumentException("invalid percentile: " + p);
}
int count = 0; // Count of values in the histogram.
for (int i = 0; i < buckets.length; i++) {
count += buckets[i];
}
if (count == 0) { // Empty histogram. Need to special-case it, otherwise
return 0; // the `if (count <= p)' below will be erroneously true.
}
// Find the number of elements at or below which the pth percentile is.
p = count * p / 100;
// Now walk the array backwards and decrement the count until it reaches p.
for (int i = buckets.length - 1; i >= 0; i--) {
count -= buckets[i];
if (count <= p) {
return bucketHighInterval(i);
}
}
return 0;
}
/**
* Prints this histogram in a human readable ASCII format.
*
* This is equivalent to calling {@link #printAsciiBucket} on every
* bucket.
* @param out The buffer to which to write the output.
*/
public void printAscii(final StringBuilder out) {
for (int i = 0; i < buckets.length; i++) {
printAsciiBucket(out, i);
}
}
/**
* Prints a bucket of this histogram in a human readable ASCII format.
* @param out The buffer to which to write the output.
* @see #printAscii
*/
final void printAsciiBucket(final StringBuilder out, final int i) {
out.append('[')
.append(bucketLowInterval(i))
.append('-')
.append(i == buckets.length - 1 ? "Inf" : bucketHighInterval(i))
.append("): ")
.append(buckets[i])
.append('\n');
}
/** Helper for unit tests that returns the value in the given bucket. */
final int valueInBucket(final int index) {
return buckets[index];
}
/** Finds the index of the bucket in which the given value should be. */
private int bucketIndexFor(final int value) {
if (value < cutoff) {
return value / interval;
}
int bucket = num_linear_buckets // Skip all linear buckets.
// And find which bucket the rest (after `cutoff') should be in.
// Reminder: the first exponential bucket ends at 2^exp_bucket_shift.
+ log2rounddown((value - cutoff) >> exp_bucket_shift);
if (bucket >= buckets.length) {
return buckets.length - 1;
}
return bucket;
}
/** Returns the low interval (inclusive) of the given bucket. */
private int bucketLowInterval(final int index) {
if (index <= num_linear_buckets) {
return index * interval;
} else {
return cutoff + (1 << (index - num_linear_buckets + exp_bucket_shift));
}
}
/** Returns the high interval (exclusive) of the given bucket. */
private int bucketHighInterval(final int index) {
if (index == buckets.length - 1) {
return Integer.MAX_VALUE;
} else {
return bucketLowInterval(index + 1);
}
}
public String toString() {
return "Histogram(interval=" + interval + ", cutoff=" + cutoff
+ ", num_linear_buckets=" + num_linear_buckets
+ ", exp_bucket_shift=" + exp_bucket_shift
+ ", buckets=" + Arrays.toString(buckets) + ')';
}
}