
htsjdk.variant.utils.BinomialCoefficientUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of htsjdk Show documentation
Show all versions of htsjdk Show documentation
A Java API for high-throughput sequencing data (HTS) formats
package htsjdk.variant.utils;
import java.lang.ArithmeticException;
import java.lang.Math;
/**
* A modified version of the Apache Math implementation of binomial
* coefficient calculation
* Derived from code within the CombinatoricsUtils and FastMath classes
* within Commons Math3 (https://commons.apache.org/proper/commons-math/)
*
* Included here for use in Genotype Likelihoods calculation, instead
* of adding Commons Math3 as a dependency
*
* Commons Math3 is licensed using the Apache License 2.0
* Full text of this license can be found here:
* https://www.apache.org/licenses/LICENSE-2.0.txt
*
* This product includes software developed at
* The Apache Software Foundation (http://www.apache.org/).
*
* This product includes software developed for Orekit by
* CS Systèmes d'Information (http://www.c-s.fr/)
* Copyright 2010-2012 CS Systèmes d'Information
*
*/
public class BinomialCoefficientUtil {
/**
* Binomial Coefficient, "{@code n choose k}", the number of
* {@code k}-element subsets that can be selected from an
* {@code n}-element set.
*
* Preconditions:
*
* - {@code 0 <= k <= n } (otherwise
* {@code IllegalArgumentException} is thrown)
* - The result is small enough to fit into a {@code long}. The
* largest value of {@code n} for which all coefficients are
* {@code < Long.MAX_VALUE} is 66. If the computed value exceeds
* {@code Long.MAX_VALUE} an {@code ArithmeticException} is
* thrown.
*
*
* @param n the size of the set
* @param k the size of the subsets to be counted
* @return {@code n choose k}
* @throws ArithmeticException if {@code n < 0} or {@code k > n} or
* the result is too large to be represented by a long integer.
*/
public static long binomialCoefficient(final int n, final int k) throws ArithmeticException {
checkBinomial(n, k);
if ((n == k) || (k == 0)) {
return 1;
}
if ((k == 1) || (k == n - 1)) {
return n;
}
// Use symmetry for large k
if (k > n / 2) {
return binomialCoefficient(n, n - k);
}
// We use the formula
// (n choose k) = n! / (n-k)! / k!
// (n choose k) == ((n-k+1)*...*n) / (1*...*k)
// which could be written
// (n choose k) == (n-1 choose k-1) * n / k
long result = 1;
if (n <= 61) {
// For n <= 61, the naive implementation cannot overflow.
int i = n - k + 1;
for (int j = 1; j <= k; j++) {
result = result * i / j;
i++;
}
} else if (n <= 66) {
// For n > 61 but n <= 66, the result cannot overflow,
// but we must take care not to overflow intermediate values.
int i = n - k + 1;
for (int j = 1; j <= k; j++) {
// We know that (result * i) is divisible by j,
// but (result * i) may overflow, so we split j:
// Filter out the gcd, d, so j/d and i/d are integer.
// result is divisible by (j/d) because (j/d)
// is relative prime to (i/d) and is a divisor of
// result * (i/d).
final long d = gcd(i, j);
result = (result / (j / d)) * (i / d);
i++;
}
} else {
// For n > 66, a result overflow might occur, so we check
// the multiplication, taking care to not overflow
// unnecessary.
int i = n - k + 1;
for (int j = 1; j <= k; j++) {
final long d = gcd(i, j);
result = mulAndCheck(result / (j / d), i / d);
i++;
}
}
return result;
}
/**
* Check binomial preconditions.
*
* @param n Size of the set.
* @param k Size of the subsets to be counted.
* @throws IllegalArgumentException if {@code n < 0} or {@code k > n}.
*/
private static void checkBinomial(final int n, final int k) throws IllegalArgumentException{
if (n < k) {
throw new IllegalArgumentException("The first value (" + n + ") must not be exceeded by the second value (" + k + ") in a binomial coefficient");
}
if (n < 0) {
throw new IllegalArgumentException("The first value (" + n + ") in a binomial coefficient must not be negative.");
}
}
/**
* Computes the greatest common divisor of the absolute value of two
* numbers, using a modified version of the "binary gcd" method.
* See Knuth 4.5.2 algorithm B.
* The algorithm is due to Josef Stein (1961).
*
* Special cases:
*
* - The invocations
* {@code gcd(Integer.MIN_VALUE, Integer.MIN_VALUE)},
* {@code gcd(Integer.MIN_VALUE, 0)} and
* {@code gcd(0, Integer.MIN_VALUE)} throw an
* {@code ArithmeticException}, because the result would be 2^31, which
* is too large for an int value.
* - The result of {@code gcd(x, x)}, {@code gcd(0, x)} and
* {@code gcd(x, 0)} is the absolute value of {@code x}, except
* for the special cases above.
* - The invocation {@code gcd(0, 0)} is the only one which returns
* {@code 0}.
*
*
* @param p Number.
* @param q Number.
* @return the greatest common divisor (never negative).
* @throws ArithmeticException if the result cannot be represented as
* a non-negative {@code int} value.
* @since 1.1
*/
private static int gcd(int p, int q) throws ArithmeticException {
int a = p;
int b = q;
if (a == 0 ||
b == 0) {
if (a == Integer.MIN_VALUE ||
b == Integer.MIN_VALUE) {
throw new ArithmeticException("overflow: gcd(" + p + ", " + q + ") is 2^31");
}
return Math.abs(a + b);
}
long al = a;
long bl = b;
boolean useLong = false;
if (a < 0) {
if(Integer.MIN_VALUE == a) {
useLong = true;
} else {
a = -a;
}
al = -al;
}
if (b < 0) {
if (Integer.MIN_VALUE == b) {
useLong = true;
} else {
b = -b;
}
bl = -bl;
}
if (useLong) {
if(al == bl) {
throw new ArithmeticException("overflow: gcd(" + p + ", " + q + ") is 2^31");
}
long blbu = bl;
bl = al;
al = blbu % al;
if (al == 0) {
if (bl > Integer.MAX_VALUE) {
throw new ArithmeticException("overflow: gcd(" + p + ", " + q + ") is 2^31");
}
return (int) bl;
}
blbu = bl;
// Now "al" and "bl" fit in an "int".
b = (int) al;
a = (int) (blbu % al);
}
return gcdPositive(a, b);
}
/**
* Computes the greatest common divisor of two positive numbers
* (this precondition is not checked and the result is undefined
* if not fulfilled) using the "binary gcd" method which avoids division
* and modulo operations.
* See Knuth 4.5.2 algorithm B.
* The algorithm is due to Josef Stein (1961).
*
* Special cases:
*
* - The result of {@code gcd(x, x)}, {@code gcd(0, x)} and
* {@code gcd(x, 0)} is the value of {@code x}.
* - The invocation {@code gcd(0, 0)} is the only one which returns
* {@code 0}.
*
*
* @param a Positive number.
* @param b Positive number.
* @return the greatest common divisor.
*/
private static int gcdPositive(int a, int b) {
if (a == 0) {
return b;
}
else if (b == 0) {
return a;
}
// Make "a" and "b" odd, keeping track of common power of 2.
final int aTwos = Integer.numberOfTrailingZeros(a);
a >>= aTwos;
final int bTwos = Integer.numberOfTrailingZeros(b);
b >>= bTwos;
final int shift = Math.min(aTwos, bTwos);
// "a" and "b" are positive.
// If a > b then "gdc(a, b)" is equal to "gcd(a - b, b)".
// If a < b then "gcd(a, b)" is equal to "gcd(b - a, a)".
// Hence, in the successive iterations:
// "a" becomes the absolute difference of the current values,
// "b" becomes the minimum of the current values.
while (a != b) {
final int delta = a - b;
b = Math.min(a, b);
a = Math.abs(delta);
// Remove any power of 2 in "a" ("b" is guaranteed to be odd).
a >>= Integer.numberOfTrailingZeros(a);
}
// Recover the common power of 2.
return a << shift;
}
/**
* Multiply two long integers, checking for overflow.
*
* @param a Factor.
* @param b Factor.
* @return the product {@code a * b}.
* @throws ArithmeticException if the result can not be represented
* as a {@code long}.
* @since 1.2
*/
private static long mulAndCheck(long a, long b) throws ArithmeticException {
long ret;
if (a > b) {
// use symmetry to reduce boundary cases
ret = mulAndCheck(b, a);
} else {
if (a < 0) {
if (b < 0) {
// check for positive overflow with negative a, negative b
if (a >= Long.MAX_VALUE / b) {
ret = a * b;
} else {
throw new ArithmeticException();
}
} else if (b > 0) {
// check for negative overflow with negative a, positive b
if (Long.MIN_VALUE / b <= a) {
ret = a * b;
} else {
throw new ArithmeticException();
}
} else {
// assert b == 0
ret = 0;
}
} else if (a > 0) {
// assert a > 0
// assert b > 0
// check for positive overflow with positive a, positive b
if (a <= Long.MAX_VALUE / b) {
ret = a * b;
} else {
throw new ArithmeticException();
}
} else {
// assert a == 0
ret = 0;
}
}
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy