All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.statistics.inference.BinomialTest Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.statistics.inference;

import java.util.Objects;
import org.apache.commons.statistics.distribution.BinomialDistribution;

/**
 * Implements binomial test statistics.
 *
 * 

Performs an exact test for the statistical significance of deviations from a * theoretically expected distribution of observations into two categories. * * @see Binomial test (Wikipedia) * @since 1.1 */ public final class BinomialTest { /** Default instance. */ private static final BinomialTest DEFAULT = new BinomialTest(AlternativeHypothesis.TWO_SIDED); /** Alternative hypothesis. */ private final AlternativeHypothesis alternative; /** * @param alternative Alternative hypothesis. */ private BinomialTest(AlternativeHypothesis alternative) { this.alternative = alternative; } /** * Return an instance using the default options. * *

    *
  • {@link AlternativeHypothesis#TWO_SIDED} *
* * @return default instance */ public static BinomialTest withDefaults() { return DEFAULT; } /** * Return an instance with the configured alternative hypothesis. * * @param v Value. * @return an instance */ public BinomialTest with(AlternativeHypothesis v) { return new BinomialTest(Objects.requireNonNull(v)); } /** * Performs a binomial test about the probability of success \( \pi \). * *

The null hypothesis is \( H_0:\pi=\pi_0 \) where \( \pi_0 \) is between 0 and 1. * *

The probability of observing \( k \) successes from \( n \) trials with a given * probability of success \( p \) is: * *

\[ \Pr(X=k)=\binom{n}{k}p^k(1-p)^{n-k} \] * *

The test is defined by the {@link AlternativeHypothesis}. * *

To test \( \pi < \pi_0 \) (less than): * *

\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=0}^k\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] * *

To test \( \pi > \pi_0 \) (greater than): * *

\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=k}^n\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] * *

To test \( \pi \ne \pi_0 \) (two-sided) requires finding all \( i \) such that * \( \mathcal{I}=\{i:\Pr(X=i)\leq \Pr(X=k)\} \) and compute the sum: * *

\[ p = \sum_{i\in\mathcal{I}}\Pr(X=i)=\sum_{i\in\mathcal{I}}\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] * *

The two-sided p-value represents the likelihood of getting a result at least as * extreme as the sample, given the provided {@code probability} of success on a * single trial. * *

The test statistic is equal to the estimated proportion \( \frac{k}{n} \). * * @param numberOfTrials Number of trials performed. * @param numberOfSuccesses Number of successes observed. * @param probability Assumed probability of a single trial under the null * hypothesis. * @return test result * @throws IllegalArgumentException if {@code numberOfTrials} or * {@code numberOfSuccesses} is negative; {@code probability} is not between 0 * and 1; or if {@code numberOfTrials < numberOfSuccesses} * @see #with(AlternativeHypothesis) */ public SignificanceResult test(int numberOfTrials, int numberOfSuccesses, double probability) { // Note: The distribution validates number of trials and probability. // Here we only have to validate the number of successes. Arguments.checkNonNegative(numberOfSuccesses); if (numberOfTrials < numberOfSuccesses) { throw new InferenceException( "must have n >= k for binomial coefficient (n, k), got n = %d, k = %d", numberOfSuccesses, numberOfTrials); } final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability); final double p; if (alternative == AlternativeHypothesis.GREATER_THAN) { p = distribution.survivalProbability(numberOfSuccesses - 1); } else if (alternative == AlternativeHypothesis.LESS_THAN) { p = distribution.cumulativeProbability(numberOfSuccesses); } else { p = twoSidedBinomialTest(numberOfTrials, numberOfSuccesses, probability, distribution); } return new BaseSignificanceResult((double) numberOfSuccesses / numberOfTrials, p); } /** * Returns the observed significance level, or p-value, associated with a * two-sided binomial test about the probability of success \( \pi \). * * @param n Number of trials performed. * @param k Number of successes observed. * @param probability Assumed probability of a single trial under the null * hypothesis. * @param distribution Binomial distribution. * @return p-value */ private static double twoSidedBinomialTest(int n, int k, double probability, BinomialDistribution distribution) { // Find all i where Pr(X = i) <= Pr(X = k) and sum them. // Exploit the known unimodal distribution to increase the // search speed. Note the search depends only on magnitude differences. // The current BinomialDistribution is faster using log probability // as it omits a call to Math.exp. // Use the mode as the point of largest probability. // The lower or upper mode is important for the search below. final int m1 = (int) Math.ceil((n + 1.0) * probability) - 1; final int m2 = (int) Math.floor((n + 1.0) * probability); if (k < m1) { final double pk = distribution.logProbability(k); // Lower half = cdf(k) // Find upper half. As k < lower mode i should never // reach the lower mode based on the probability alone. // Bracket with the upper mode. final int i = Searches.searchDescending(m2, n, pk, distribution::logProbability); return distribution.cumulativeProbability(k) + distribution.survivalProbability(i - 1); } else if (k > m2) { final double pk = distribution.logProbability(k); // Upper half = sf(k - 1) // Find lower half. As k > upper mode i should never // reach the upper mode based on the probability alone. // Bracket with the lower mode. final int i = Searches.searchAscending(0, m1, pk, distribution::logProbability); return distribution.cumulativeProbability(i) + distribution.survivalProbability(k - 1); } // k == mode // Edge case where the sum of probabilities will be either // 1 or 1 - Pr(X = mode) where mode != k final double pk = distribution.probability(k); final double pm = distribution.probability(k == m1 ? m2 : m1); return pm > pk ? 1 - pm : 1; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy