smile.stat.distribution.PoissonDistribution Maven / Gradle / Ivy
/******************************************************************************
* Confidential Proprietary *
* (c) Copyright Haifeng Li 2011, All Rights Reserved *
******************************************************************************/
package smile.stat.distribution;
import smile.math.special.Gamma;
import smile.math.Math;
/**
* Poisson distribution expresses the probability of a number of events
* occurring in a fixed period of time if these events occur with a known
* average rate and independently of the time since the last event. The Poisson
* distribution can also be used for the number of events in other specified
* intervals such as distance, area or volume. If the expected number of
* occurrences in this interval is λ, then the probability that there
* are exactly n occurrences (n = 0, 1, 2, ...) is equal to
*
* &lambdan e-λ
* f(n; λ) = ---------
* n!
*
* For sufficiently large values of λ, (say λ > 1000), the normal
* distribution with mean λ and variance λ, is an excellent
* approximation to the Poisson distribution. If λ is greater than about
* 10, then the normal distribution is a good approximation if an appropriate
* continuity correction is performed, i.e., P(X ≤ x), where (lower-case) x
* is a non-negative integer, is replaced by P(X ≤ x + 0.5).
*
* When a variable is Poisson distributed, its square root is approximately
* normally distributed with expected value of about λ1/2
* and variance of about 1/4.
*
* @author Haifeng Li
*/
public class PoissonDistribution extends DiscreteDistribution implements DiscreteExponentialFamily {
private double lambda;
private double entropy;
private RandomNumberGenerator rng;
/**
* Constructor.
*/
public PoissonDistribution(double lambda) {
if (lambda < 0.0) {
throw new IllegalArgumentException("Invalid lambda: " + lambda);
}
this.lambda = lambda;
entropy = (Math.log(2 * Math.PI * Math.E) + Math.log(lambda)) / 2 - 1 / (12 * lambda) - 1 / (24 * lambda * lambda) - 19 / (360 * lambda * lambda * lambda);
}
/**
* Constructor. Parameter will be estimated from the data by MLE.
*/
public PoissonDistribution(int[] data) {
for (int i = 0; i < data.length; i++) {
if (data[i] < 0) {
throw new IllegalArgumentException("Samples contain negative values.");
}
}
lambda = Math.mean(data);
entropy = (Math.log(2 * Math.PI * Math.E) + Math.log(lambda)) / 2 - 1 / (12 * lambda) - 1 / (24 * lambda * lambda) - 19 / (360 * lambda * lambda * lambda);
}
/**
* Returns the rate parameter, the expected number of occurrences in a time unit.
*/
public double getLambda() {
return lambda;
}
@Override
public int npara() {
return 1;
}
@Override
public double mean() {
return lambda;
}
@Override
public double var() {
return lambda;
}
@Override
public double sd() {
return Math.sqrt(lambda);
}
@Override
public double entropy() {
return entropy;
}
@Override
public String toString() {
return String.format("Poisson Distribution(%.4f)", lambda);
}
@Override
public double p(int k) {
if (k < 0) {
return 0.0;
} else {
return Math.pow(lambda, k) * Math.exp(-lambda) / Math.factorial(k);
}
}
@Override
public double logp(int k) {
if (k < 0) {
return Double.NEGATIVE_INFINITY;
} else {
return k * Math.log(lambda) - lambda - Math.logFactorial(k);
}
}
@Override
public double cdf(double k) {
if (lambda == 0) {
if (k >= 0) {
return 1.0;
} else {
return 0.0;
}
}
if (k < 0) {
return 0.0;
} else {
return Gamma.regularizedUpperIncompleteGamma(Math.floor(k + 1), lambda);
}
}
@Override
public double quantile(double p) {
if (p < 0.0 || p > 1.0) {
throw new IllegalArgumentException("Invalid p: " + p);
}
if (p < Math.exp(-lambda)) {
return 0;
}
int n = (int) Math.max(Math.sqrt(lambda), 5.0);
int nl, nu, inc = 1;
if (p < cdf(n)) {
do {
n = Math.max(n - inc, 0);
inc *= 2;
} while (p < cdf(n) && n > 0);
nl = n;
nu = n + inc / 2;
} else {
do {
n += inc;
inc *= 2;
} while (p > cdf(n));
nu = n;
nl = n - inc / 2;
}
return quantile(p, nl, nu);
}
@Override
public DiscreteMixture.Component M(int[] x, double[] posteriori) {
double alpha = 0.0;
double mean = 0.0;
for (int i = 0; i < x.length; i++) {
alpha += posteriori[i];
mean += x[i] * posteriori[i];
}
mean /= alpha;
DiscreteMixture.Component c = new DiscreteMixture.Component();
c.priori = alpha;
c.distribution = new PoissonDistribution(mean);
return c;
}
/**
* This function generates a random variate with the poisson distribution.
*
* Uses down/up search from the mode by chop-down technique for λ < 20,
* and patchwork rejection method for λ ≥ 20.
*
* For λ < 1.E-6 numerical inaccuracy is avoided by direct calculation.
*/
@Override
public double rand() {
if (lambda > 2E9) {
throw new IllegalArgumentException("Too large lambda for random number generator.");
}
if (lambda == 0) {
return 0;
}
// For extremely small L we calculate the probabilities of x = 1
// and x = 2 (ignoring higher x). The reason for using this
// method is to prevent numerical inaccuracies in other methods.
if (lambda < 1.E-6) {
return tinyLambdaRand(lambda);
}
if (rng == null) {
if (lambda < 20) {
// down/up search from mode
// The computation time for this method grows with sqrt(L).
// Gives overflow for L > 60
rng = new ModeSearch();
} else {
// patchword rejection method
// The computation time for this method does not depend on L.
// Use where other methods would be slower.
rng = new Patchwork();
}
}
return rng.rand();
}
interface RandomNumberGenerator {
public int rand();
}
class ModeSearch implements RandomNumberGenerator {
/**
* value at x=0 or at mode
*/
private double f0Mode;
/**
* upper bound
*/
private int upperBound;
/**
* Initialize the Poisson random number generator.
*/
ModeSearch() {
int mode = (int) lambda;
// safety-bound
upperBound = (int) Math.floor(lambda + 0.5 + 7.0 * (Math.sqrt(lambda + lambda + 1.) + 1.5));
// probability of x=mode
f0Mode = Math.exp(mode * Math.log(lambda) - lambda - Math.logFactorial(mode));
}
/**
* This method is based on down/up search from the mode, using the
* chop-down technique (PMDU).
*
* Execution time grows with sqrt(L). Gives overflow for L > 60.
*/
@Override
public int rand() {
double r, c, d;
int x, mode;
mode = (int) lambda;
while (true) {
r = Math.random();
if ((r -= f0Mode) <= 0) {
return mode;
}
c = d = f0Mode;
// alternating down/up search from the mode
for (int i = 1; i <= mode; i++) {
// down
x = mode - i;
c *= x + 1;
r *= lambda;
d *= lambda;
if ((r -= c) <= 0) {
return x;
}
// up
x = mode + i;
d *= lambda;
r *= x;
c *= x;
if ((r -= d) <= 0) {
return x;
}
}
// continue upward search from 2*mode+1 to bound
for (x = mode + mode + 1; x <= upperBound; x++) {
d *= lambda;
r *= x;
if ((r -= d) <= 0) {
return x;
}
}
}
}
}
class Patchwork implements RandomNumberGenerator {
private int k1, k2, k4, k5;
private double dl, dr, r1, r2, r4, r5, ll, rr, l_my, c_pm, f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
/**
* Initialize the Poisson random number generator.
*/
Patchwork() {
int mode = (int) lambda;
// approximate deviation of reflection points k2, k4 from L - 1/2
double Ds = Math.sqrt(lambda + 0.25);
// mode, reflection points k2 and k4, and points k1 and k5, which
// delimit the centre region of h(x)
k2 = (int) Math.ceil(lambda - 0.5 - Ds);
k4 = (int) (lambda - 0.5 + Ds);
k1 = k2 + k2 - mode + 1;
k5 = k4 + k4 - mode;
// range width of the critical left and right centre region
dl = (double) (k2 - k1);
dr = (double) (k5 - k4);
// recurrence constants r(k) = p(k)/p(k-1) at k = k1, k2, k4+1, k5+1
r1 = lambda / (double) k1;
r2 = lambda / (double) k2;
r4 = lambda / (double) (k4 + 1);
r5 = lambda / (double) (k5 + 1);
// reciprocal values of the scale parameters of expon. tail envelopes
ll = Math.log(r1); // expon. tail left
rr = -Math.log(r5); // expon. tail right
// Poisson constants, necessary for computing function values f(k)
l_my = Math.log(lambda);
c_pm = mode * l_my - Math.logFactorial(mode);
// function values f(k) = p(k)/p(mode) at k = k2, k4, k1, k5
f2 = f(k2, l_my, c_pm);
f4 = f(k4, l_my, c_pm);
f1 = f(k1, l_my, c_pm);
f5 = f(k5, l_my, c_pm);
// area of the two centre and the two exponential tail regions
// area of the two immediate acceptance regions between k2, k4
p1 = f2 * (dl + 1.); // immed. left
p2 = f2 * dl + p1; // centre left
p3 = f4 * (dr + 1.) + p2; // immed. right
p4 = f4 * dr + p3; // centre right
p5 = f1 / ll + p4; // expon. tail left
p6 = f5 / rr + p5; // expon. tail right
}
/**
* This method is based on Patchwork Rejection method (PPRS):
* The area below the histogram function f(x) is rearranged in
* its body by two point reflections. Within a large center
* interval variates are sampled efficiently by rejection from
* uniform hats. Rectangular immediate acceptance regions speed
* up the generation. The remaining tails are covered by
* exponential functions.
*
* For detailed explanation, see:
* Stadlober, E & Zechner, H: "The Patchwork Rejection Technique for
* Sampling from Unimodal Distributions". ACM Transactions on Modeling
* and Computer Simulation, vol. 9, no. 1, 1999, p. 59-83.
*
* This method is valid for λ >= 10.
*
* The computation time hardly depends on λ, except that it matters
* a lot whether λ is within the range where the LnFac function is
* tabulated.
*/
@Override
public int rand() {
int Dk, X, Y;
double U, V, W;
for (;;) {
// generate uniform number U -- U(0, p6)
// case distinction corresponding to U
if ((U = Math.random() * p6) < p2) { // centre left
// immediate acceptance region R2 = [k2, mode) *[0, f2), X = k2, ... mode -1
if ((V = U - p1) < 0.0) {
return (k2 + (int) (U / f2));
}
// immediate acceptance region R1 = [k1, k2)*[0, f1), X = k1, ... k2-1
if ((W = V / dl) < f1) {
return (k1 + (int) (V / f1));
}
// computation of candidate X < k2, and its counterpart Y > k2
// either squeeze-acceptance of X or acceptance-rejection of Y
Dk = (int) (dl * Math.random()) + 1;
if (W <= f2 - Dk * (f2 - f2 / r2)) { // quick accept of
return (k2 - Dk); // X = k2 - Dk
}
if ((V = f2 + f2 - W) < 1.0) { // quick reject of Y
Y = k2 + Dk;
if (V <= f2 + Dk * (1.0 - f2) / (dl + 1.0)) { // quick accept of
return Y; // Y = k2 + Dk
}
if (V <= f(Y, l_my, c_pm)) {
return Y; // final accept of Y
}
}
X = k2 - Dk;
} else if (U < p4) { // centre right
// immediate acceptance region R3 = [mode, k4+1)*[0, f4), X = mode, ... k4
if ((V = U - p3) < 0.) {
return (k4 - (int) ((U - p2) / f4));
}
// immediate acceptance region R4 = [k4+1, k5+1)*[0, f5)
if ((W = V / dr) < f5) {
return (k5 - (int) (V / f5));
}
// computation of candidate X > k4, and its counterpart Y < k4
// either squeeze-acceptance of X or acceptance-rejection of Y
Dk = (int) (dr * Math.random()) + 1;
if (W <= f4 - Dk * (f4 - f4 * r4)) { // quick accept of
return (k4 + Dk); // X = k4 + Dk
}
if ((V = f4 + f4 - W) < 1.0) { // quick reject of Y
Y = k4 - Dk;
if (V <= f4 + Dk * (1.0 - f4) / dr) { // quick accept of
return Y; // Y = k4 - Dk
}
if (V <= f(Y, l_my, c_pm)) {
return Y; // final accept of Y
}
}
X = k4 + Dk;
} else {
W = Math.random();
if (U < p5) { // expon. tail left
Dk = (int) (1.0 - Math.log(W) / ll);
if ((X = k1 - Dk) < 0L) {
continue; // 0 <= X <= k1 - 1
}
W *= (U - p4) * ll; // W -- U(0, h(x))
if (W <= f1 - Dk * (f1 - f1 / r1)) {
return X; // quick accept of X
}
} else { // expon. tail right
Dk = (int) (1.0 - Math.log(W) / rr);
X = k5 + Dk; // X >= k5 + 1
W *= (U - p5) * rr; // W -- U(0, h(x))
if (W <= f5 - Dk * (f5 - f5 * r5)) {
return X; // quick accept of X
}
}
}
// acceptance-rejection test of candidate X from the original area
// test, whether W <= f(k), with W = U*h(x) and U -- U(0, 1)
// log f(X) = (X - mode)*log(L) - log X! + log mode!
if (Math.log(W) <= X * l_my - Math.logFactorial(X) - c_pm) {
return X;
}
}
}
/**
* used by Patchwork
*/
private double f(int k, double l_nu, double c_pm) {
return Math.exp(k * l_nu - Math.logFactorial(k) - c_pm);
}
}
/**
* For extremely low values of lambda, the method is a simple
* calculation of the probabilities of x = 1 and x = 2. Higher values
* are ignored.
*
* The reason for using this method is to avoid the numerical inaccuracies
* in other methods.
*/
static int tinyLambdaRand(double lambda) {
double d, r;
d = Math.sqrt(lambda);
if (Math.random() >= d) {
return 0;
}
r = Math.random() * d;
if (r > lambda * (1.0 - lambda)) {
return 0;
}
if (r > 0.5 * lambda * lambda * (1. - lambda)) {
return 1;
}
return 2;
}
}