com.aliasi.stats.BinomialDistribution Maven / Gradle / Ivy
Show all versions of aliasi-lingpipe Show documentation
/*
* LingPipe v. 4.1.0
* Copyright (C) 2003-2011 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
package com.aliasi.stats;
/**
* A BinomialDistribution
is a discrete distribution over
* the number of successes given a fixed number of Bernoulli trials.
* A binomial distribution is constructed from a specified Bernoulli
* distribution which determines the success probability. The minimum
* outcome is 0
and the maximum outcome is the number of
* trials. This class also defines a constant method {@link
* #log2BinomialCoefficient(long,long)} for computing binomial
* coefficients.
*
* The method {@link #z(int)} returns the z-score statistic for a
* specified number of outcomes.
*
*
Computing P-Values
*
* As of LingPipe 3.2.0, the dependency on Jakarta Commons Math was
* removed. As a result, we removed the two methods that computed
* p-values. Here's their implementation in case you need the
* functionality (you may need to increas the text size):
*
*
* import org.apache.commons.math.MathException;
* import org.apache.commons.math.distribution.NormalDistribution;
* import org.apache.commons.math.distribution.NormalDistributionImpl;
*
* static final NormalDistribution Z_DISTRIBUTION
* = new NormalDistributionImpl();
*
* /**
* * Returns the two-sided p-value computed from the z-score for
* * this distribution for the specified number of successes.
* ...
* double pValue(int numSuccesses) throws MathException {
* return pValue(bernoulliDistribution().successProbability(),
* numSuccesses,
* numTrials());
* }
*
* /**
* * Returns the one-sided p-value computed from the z-score for
* * this distribution for the specified number of successes.
* ...
* double pValueLess(int numSuccesses) throws MathException {
* return pValueLess(bernoulliDistribution().successProbability(),
* numSuccesses,
* mNumTrials());
* }
*
* /**
* * Returns the two-sided p-value for the z-score statistic on the
* * specified number of successes out of the specified number of
* * trials for the specified success probability.
* ...
* static double pValue(double successProbability,
* int numSuccesses,
* int numTrials) throws MathException {
*
* double z = z(successProbability,numSuccesses,numTrials);
* return 2.0 * Z_DISTRIBUTION.cumulativeProbability(Math.min(-z,z));
* }
* /**
* * Returns the one-sided (lower) p-value for the z-score statistic
* * on the specified number of successes out of the specified
* * number of trials for the specified success probability.
* ...
* static double pValueLess(double successProbability,
* int numSuccesses,
* int numTrials) throws MathException {
* double z = z(successProbability,numSuccesses,numTrials);
* return 1.0 - Z_DISTRIBUTION.cumulativeProbability(z);
* }
*
* For more information, see:
*
* - Eric W. Weisstein.
* Binomial Distribution.
* From MathWorld--A Wolfram Web Resource.
*
- Eric W. Weisstein.
* Binomial Coefficient.
* From MathWorld--A Wolfram Web Resource.
*
- Eric W. Weisstein.
* z-Score.
* From MathWorld--A Wolfram Web Resource.
*
- Eric W. Weisstein.
* P-Value.
* From MathWorld--A Wolfram Web Resource.
*
- Eric W. Weisstein.
* Hypothesis Testing.
* From MathWorld--A Wolfram Web Resource.
*
*
* @author Bob Carpenter
* @version 3.2.0
* @since LingPipe2.0
*/
public class BinomialDistribution extends AbstractDiscreteDistribution {
private final BernoulliDistribution mBernoulliDistribution;
private final int mNumTrials;
/**
* Construct a binomial distribution that samples from the
* specified Bernoulli distribution the specified number of times.
* The resulting distribution is over the number of successes,
* with a range between zero and the number of trials.
*
* The Bernoulli distribution is stored and any change to it
* will affect the constructed binomial distribution.
*
* @param distribution Underlying Bernoulli distribution.
*/
public BinomialDistribution(BernoulliDistribution distribution,
int numTrials) {
if (numTrials < 0) {
String msg = "Number of trials must be non-negative."
+ " Found num trials=" + numTrials;
throw new IllegalArgumentException(msg);
}
mBernoulliDistribution = distribution;
mNumTrials = numTrials;
}
/**
* Returns the underlying Bernoulli (two outcome) distribution
* underlying this binomial distribution.
*
* @return The base distribution.
*/
public BernoulliDistribution bernoulliDistribution() {
return mBernoulliDistribution;
}
/**
* Returns zero, the minimum outcome for a binomial distribution.
*
* @return Zero, the minimum outcome for a binomial distribution.
*/
@Override
public long minOutcome() {
return 0l;
}
/**
* Returns the maximum non-zero probability outcome, which is the
* number of trials for this distribution.
*
* @return The maximum non-zero probability outcome.
*/
@Override
public long maxOutcome() {
return mNumTrials;
}
/**
* Returns the number of trials for this binomial distribution.
* This is the same as the result of {@link #maxOutcome()}.
*
* @return The number of trials.
*/
public long numTrials() {
return mNumTrials;
}
/**
* Returns the probability of the specified outcome. The
* probability is determined by the likelihood of the specified
* number of successes out of the number of trials for this
* distribution.
*
*
The probability for a specified number of outcomes is:
*
*
* P(numSuccesses)
*
* = binomialCoefficient(numTrials,numSuccesses)
*
* * P(success)n
*
* * (1 - P(success))numTrials - numSuccesses
*
*
* where numTrials
is the number of trials for this
* binomial distribution and P(success)
is the
* success probability of the Bernoulli distribution underlying
* this binomial distribution.
*
* @param outcome Number of successes.
* @return Probability of specified number of successes.
*/
@Override
public double probability(long outcome) {
return java.lang.Math.pow(2.0,log2Probability(outcome));
}
/**
* Returns the log (base 2) probability of the specified outcome.
* The probability is determined by the likelihood of the
* specified number of successes out of the number of trials for
* this distribution. See the documentation for the method {@link
* #probability(long)} for an exact definition.
*
* @param outcome Number of successes.
* @return Probability of specified number of successes.
*/
@Override
public double log2Probability(long outcome) {
if (outcome < 0 || outcome > maxOutcome())
return Double.NEGATIVE_INFINITY;
return log2BinomialCoefficient(mNumTrials,outcome)
+ ( ((double) outcome)
* mBernoulliDistribution.log2Probability(1l) )
+ ( ((double) (mNumTrials - outcome))
* mBernoulliDistribution.log2Probability(0l) );
}
/**
* Returns the z-score for the specified number of successes given
* this distribution's success probability and number of trials.
* Z-scores may take on any value from negative to positive
* infinity. A z-score is the number of standard deviations above
* or below the expected number of successes for this
* distribution. Thus the greater the absolute value of the
* z-score, the less likely the number of successes was drawn from
* this distribution. The lower a negative z-score, the more
* likely it was drawn from a distribution with a lower success
* probability and the higher a positive z-score, the more likely
* it was drawn from a distribution with a higher success
* probability.
*
* The formula for z-scores is provided in the documentation
* for the static method {@link #z(double,int,int)}.
*
* @param numSuccesses Number of successes in sample.
* @return Z score value.
* @throws IllegalArgumentException If the number of successes is less
* than 0 or more than the number of trials for this distribution.
*/
public double z(int numSuccesses) {
return z(mBernoulliDistribution.successProbability(),
numSuccesses,
mNumTrials);
}
/**
* Returns the variance of this binomial distribution. The
* variance of a binomial distribution is:
*
*
* variance = numTrials * P(success) * (1 - P(success))
*
*
* @return The variance of this binomial distribution.
*/
@Override
public double variance() {
double successProb = mBernoulliDistribution.successProbability();
return successProb * (1.0 - successProb) * (double) mNumTrials;
}
/**
* Returns the z score for the specified number of successes out
* of the specified number of trials given the specified success
* probability. The z-score is the number of standard deviations
* above or below the median number of outcomes the given number
* of successes lies given the success probability and number of
* trials.
*
* The z-score for binomial distributions is defined by:
*
*
* z = (numSuccesses - expectedSuccesses)
*
* / (numTrials * P(success) * (1-P(success)))1/2
*
*
* where
*
*
* expectedSuccesses = P(success) * numTrials
*
*
* Thus numerator is the difference between observed and expected
* values for the number of successes and the denominator is the
* standard deviation for the Bernoulli trial iterated over the
* specified number of trials.
*
* @param successProbability Probability of success.
* @param numSuccesses Number of successes.
* @param numTrials Number of trials.
* @throws IllegalArgumentException If the success probability is
* not between 0 and 1 or if the number of successes is less than
* zero or greater than the number of trials.
*/
public static double z(double successProbability,
int numSuccesses,
int numTrials) {
if (successProbability < 0.0
|| successProbability > 1.0
|| Double.isNaN(successProbability)) {
String msg = "Require probability between 0 and 1 for success."
+ " Found success probability=" + successProbability;
throw new IllegalArgumentException(msg);
}
if (numSuccesses < 0 || numSuccesses > numTrials) {
String msg = "Require 0 <= num successes <= num trials"
+ " Found num successes= " + numSuccesses
+ " num successes=" + numTrials;
throw new IllegalArgumentException(msg);
}
double numTrialsD = numTrials;
double numSuccessesD = numSuccesses;
double expectedSuccesses = successProbability * numTrialsD;
return (numSuccessesD - expectedSuccesses)
/ Math.sqrt(numTrialsD
* successProbability
* (1.0 - successProbability));
}
/**
* Returns the log (base 2) of the binomial coefficient of the
* specified arguments. The binomial coefficient is equal to the
* number of ways to choose a subset of size m
from a
* set of n
objects, which is pronounced "n choose
* m", and is given by:
*
*
* binomialCoefficient(n,m) = n! / ( m! * (n-m)!)
*
* log2 choose(n,m)
* = log2 n - log2 m
* - log2 (n-m)
*
*
* @return The log (base 2) of the binomial coefficient of the
* specified arguments.
*/
public static double log2BinomialCoefficient(long n, long m) {
if (n < m) {
String msg = "Require n > m for binomial coefficient."
+ " Found n= " + n
+ " m = " + m;
throw new IllegalArgumentException(msg);
}
return com.aliasi.util.Math.log2Factorial(n)
- com.aliasi.util.Math.log2Factorial(m)
- com.aliasi.util.Math.log2Factorial(n-m);
}
}