All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.stat.distribution.BinomialDistribution Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see .
 ******************************************************************************/

package smile.stat.distribution;

import smile.math.MathEx;
import smile.math.special.Beta;

import static java.lang.Math.*;
import static smile.math.MathEx.lfactorial;

/**
 * The binomial distribution is the discrete probability distribution of
 * the number of successes in a sequence of n independent yes/no experiments,
 * each of which yields success with probability p. Such a success/failure
 * experiment is also called a Bernoulli experiment or Bernoulli trial.
 * In fact, when n = 1, the binomial distribution is a Bernoulli
 * distribution. The probability of getting exactly k successes in n trials
 * is given by the probability mass function:
 * 

*

 *     Pr(K = k) = nCk pk (1-p)n-k
 * 
* where nCk is n choose k. *

* It is frequently used to model number of successes in a sample of size * n from a population of size N. Since the samples are not independent * (this is sampling without replacement), the resulting distribution * is a hypergeometric distribution, not a binomial one. However, for N much * larger than n, the binomial distribution is a good approximation, and * widely used. *

* Binomial distribution describes the number of successes for draws with * replacement. In contrast, the hypergeometric distribution describes the * number of successes for draws without replacement. *

* Although Binomial distribution belongs to exponential family, we don't * implement DiscreteExponentialFamily interface here since it is impossible * and meaningless to estimate a mixture of Binomial distributions. * * @see HyperGeometricDistribution * * @author Haifeng Li */ public class BinomialDistribution extends DiscreteDistribution { private static final long serialVersionUID = 2L; /** The probability of success. */ public final double p; /** The number of experiments. */ public final int n; /** The entropy. */ private final double entropy; /** The random number generator. */ private RandomNumberGenerator rng; /** * Constructor. * @param p the probability of success. * @param n the number of experiments. */ public BinomialDistribution(int n, double p) { if (p < 0 || p > 1) { throw new IllegalArgumentException("Invalid p: " + p); } if (n < 0) { throw new IllegalArgumentException("Invalid n: " + n); } this.n = n; this.p = p; entropy = log(2 * PI * E * n * p * (1 - p)) / 2; } @Override public int length() { return 2; } @Override public double mean() { return n * p; } @Override public double variance() { return n * p * (1 - p); } @Override public double sd() { return Math.sqrt(n * p * (1 - p)); } @Override public double entropy() { return entropy; } @Override public String toString() { return String.format("Binomial Distribution(%d, %.4f)", n, p); } @Override public double p(int k) { if (k < 0 || k > n) { return 0.0; } else { return Math.floor(0.5 + exp(lfactorial(n) - lfactorial(k) - lfactorial(n - k))) * Math.pow(p, k) * Math.pow(1.0 - p, n - k); } } @Override public double logp(int k) { if (k < 0 || k > n) { return Double.NEGATIVE_INFINITY; } else { return lfactorial(n) - lfactorial(k) - lfactorial(n - k) + k * log(p) + (n - k) * log(1.0 - p); } } @Override public double cdf(double k) { if (k < 0) { return 0.0; } else if (k >= n) { return 1.0; } else { return Beta.regularizedIncompleteBetaFunction(n - k, k + 1, 1 - p); } } @Override public double quantile(double p) { if (p < 0.0 || p > 1.0) { throw new IllegalArgumentException("Invalid p: " + p); } if (p == 0.0) { return 0; } if (p == 1.0) { return n; } // Starting guess near peak of density. // Expand interval until we bracket. int kl, ku, inc = 1; int k = Math.max(0, Math.min(n, (int) (n * p))); if (p < cdf(k)) { do { k = Math.max(k - inc, 0); inc *= 2; } while (p < cdf(k) && k > 0); kl = k; ku = k + inc / 2; } else { do { k = Math.min(k + inc, n + 1); inc *= 2; } while (p > cdf(k)); ku = k; kl = k - inc / 2; } return quantile(p, kl, ku); } /** * This function generates a random variate with the binomial distribution. * * Uses down/up search from the mode by chop-down technique for n*p < 55, * and patchwork rejection method for n*p ≥ 55. * * For n*p < 1.E-6 numerical inaccuracy is avoided by poisson approximation. */ @Override public double rand() { double np = n * p; // Poisson approximation for extremely low np if (np < 1.E-6) { return PoissonDistribution.tinyLambdaRand(np); } boolean inv = false; // invert if (p > 0.5) { // faster calculation by inversion inv = true; } if (np < 55) { // inversion method, using chop-down search from 0 if (p <= 0.5) { rng = new ModeSearch(p); } else { rng = new ModeSearch(1.0 - p); // faster calculation by inversion } } else { // ratio of uniforms method if (p <= 0.5) { rng = new Patchwork(p); } else { rng = new Patchwork(1.0 - p); // faster calculation by inversion } } int x = rng.rand(); if (inv) { x = n - x; // undo inversion } return x; } interface RandomNumberGenerator { public int rand(); } class Patchwork implements RandomNumberGenerator { private int mode; private int k1, k2, k4, k5; private double dl, dr, r1, r2, r4, r5, ll, lr, l_pq, c_pm, f1, f2, f4, f5, p1, p2, p3, p4, p5, p6; public Patchwork(double p) { double nu = (n + 1) * p; double q = 1.0 - p; // main parameters // approximate deviation of reflection points k2, k4 from nu - 1/2 double W = Math.sqrt(nu * q + 0.25); // mode, reflection points k2 and k4, and points k1 and k5, which // delimit the centre region of h(x) mode = (int) nu; k2 = (int) Math.ceil(nu - 0.5 - W); k4 = (int) (nu - 0.5 + W); k1 = k2 + k2 - mode + 1; k5 = k4 + k4 - mode; // range width of the critical left and right centre region dl = (double) (k2 - k1); dr = (double) (k5 - k4); // recurrence constants r(k) = p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 nu = nu / q; p = p / q; r1 = nu / (double) k1 - p; // nu = (n+1)p / q r2 = nu / (double) k2 - p; // p = p / q r4 = nu / (double) (k4 + 1) - p; r5 = nu / (double) (k5 + 1) - p; // reciprocal values of the scale parameters of expon. tail envelopes ll = log(r1); // expon. tail left lr = -log(r5); // expon. tail right // binomial constants, necessary for computing function values f(k) l_pq = log(p); c_pm = mode * l_pq - lfactorial(mode) - lfactorial(n - mode); // function values f(k) = p(k)/p(mode) at k = k2, k4, k1, k5 f2 = f(k2, n, l_pq, c_pm); f4 = f(k4, n, l_pq, c_pm); f1 = f(k1, n, l_pq, c_pm); f5 = f(k5, n, l_pq, c_pm); // area of the two centre and the two exponential tail regions // area of the two immediate acceptance regions between k2, k4 p1 = f2 * (dl + 1.); // immed. left p2 = f2 * dl + p1; // centre left p3 = f4 * (dr + 1.) + p2; // immed. right p4 = f4 * dr + p3; // centre right p5 = f1 / ll + p4; // expon. tail left p6 = f5 / lr + p5; // expon. tail right } /** * Ppatchwork rejection method (BPRS). */ @Override public int rand() { int Dk, X, Y; double U, V, W; for (;;) { // generate uniform number U -- U(0, p6) // case distinction corresponding to U if ((U = MathEx.random() * p6) < p2) { // centre left // immediate acceptance region R2 = [k2, mode) *[0, f2), X = k2, ... mode -1 if ((V = U - p1) < 0.) { return (k2 + (int) (U / f2)); } // immediate acceptance region R1 = [k1, k2)*[0, f1), X = k1, ... k2-1 if ((W = V / dl) < f1) { return (k1 + (int) (V / f1)); } // computation of candidate X < k2, and its counterpart Y > k2 // either squeeze-acceptance of X or acceptance-rejection of Y Dk = (int) (dl * MathEx.random()) + 1; if (W <= f2 - Dk * (f2 - f2 / r2)) { // quick accept of return (k2 - Dk); } // X = k2 - Dk if ((V = f2 + f2 - W) < 1.) { // quick reject of Y Y = k2 + Dk; if (V <= f2 + Dk * (1. - f2) / (dl + 1.)) { // quick accept of return (Y); } // Y = k2 + Dk if (V <= f(Y, n, l_pq, c_pm)) { return (Y); } } // final accept of Y X = k2 - Dk; } else if (U < p4) { // centre right // immediate acceptance region R3 = [mode, k4+1)*[0, f4), X = mode, ... k4 if ((V = U - p3) < 0.) { return (k4 - (int) ((U - p2) / f4)); } // immediate acceptance region R4 = [k4+1, k5+1)*[0, f5) if ((W = V / dr) < f5) { return (k5 - (int) (V / f5)); } // computation of candidate X > k4, and its counterpart Y < k4 // either squeeze-acceptance of X or acceptance-rejection of Y Dk = (int) (dr * MathEx.random()) + 1; if (W <= f4 - Dk * (f4 - f4 * r4)) { // quick accept of return (k4 + Dk); } // X = k4 + Dk if ((V = f4 + f4 - W) < 1.) { // quick reject of Y Y = k4 - Dk; if (V <= f4 + Dk * (1. - f4) / dr) { // quick accept of return (Y); } // Y = k4 - Dk if (V <= f(Y, n, l_pq, c_pm)) { return (Y); // final accept of Y } } X = k4 + Dk; } else { W = MathEx.random(); if (U < p5) { // expon. tail left Dk = (int) (1. - log(W) / ll); if ((X = k1 - Dk) < 0) { continue; // 0 <= X <= k1 - 1 } W *= (U - p4) * ll; // W -- U(0, h(x)) if (W <= f1 - Dk * (f1 - f1 / r1)) { return X; // quick accept of X } } else { // expon. tail right Dk = (int) (1. - log(W) / lr); if ((X = k5 + Dk) > n) { continue; // k5 + 1 <= X <= n } W *= (U - p5) * lr; // W -- U(0, h(x)) if (W <= f5 - Dk * (f5 - f5 * r5)) { return X; // quick accept of X } } } // acceptance-rejection test of candidate X from the original area // test, whether W <= BinomialF(k), with W = U*h(x) and U -- U(0, 1) // log BinomialF(X) = (X - mode)*log(p/q) - log X!(n - X)! + log mode!(n - mode)! if (log(W) <= X * l_pq - lfactorial(X) - lfactorial(n - X) - c_pm) { return X; } } } // used by BinomialPatchwork private double f(int k, int n, double l_pq, double c_pm) { return exp(k * l_pq - lfactorial(k) - lfactorial(n - k) - c_pm); } } class ModeSearch implements RandomNumberGenerator { private int mode; // mode private int bound; // upper bound private double modeValue; // value at mode private double r1; public ModeSearch(double p) { double nu = (n + 1) * p; // safety bound guarantees at least 17 significant decimal digits bound = (int) (nu + 11.0 * (Math.sqrt(nu) + 1.0)); if (bound > n) { bound = n; } mode = (int) nu; if (mode == nu && p == 0.5) { mode--; // mode } r1 = p / (1.0 - p); modeValue = exp(lfactorial(n) - lfactorial(mode) - lfactorial(n - mode) + mode * log(p) + (n - mode) * log(1. - p)); } /** * Uses inversion method by down-up search starting at the mode (BMDU). *

* Assumes p < 0.5. Gives overflow for n*p > 60. *

* This method is fast when n*p is low. */ @Override public int rand() { int K, x; double U, c, d, divisor; while (true) { U = MathEx.random(); if ((U -= modeValue) <= 0.0) { return (mode); } c = d = modeValue; // down- and upward search from the mode for (K = 1; K <= mode; K++) { x = mode - K; // downward search from mode divisor = (n - x) * r1; c *= x + 1; U *= divisor; d *= divisor; if ((U -= c) <= 0.0) { return x; } x = mode + K; // upward search from mode divisor = x; d *= (n - x + 1) * r1; U *= divisor; c *= divisor; if ((U -= d) <= 0.0) { return x; } } // upward search from 2*mode + 1 to bound for (x = mode + mode + 1; x <= bound; x++) { d *= (n - x + 1) * r1; U *= x; if ((U -= d) <= 0.0) { return x; } } } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy