All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.statistics.method.ChiSquareConfidence Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                ChiSquareConfidence.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright Nov 23, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 * 
 */

package gov.sandia.cognition.statistics.method;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.statistics.ProbabilityMassFunction;
import gov.sandia.cognition.statistics.distribution.ChiSquareDistribution;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
 * This is the chi-square goodness-of-fit test. This test allows us to compare
 * observations against expected results, where the observations and 
 * expectations are recorded for discrete groups/conditions/bins.  The null
 * hypothesis is that the observed values were drawn from the same distribution
 * as the expected values.
 * 

* The chi-square goodness-of-fit test is a discrete version of the more * general Kolmogorov-Smirnov test. If your data were drawn from a continuous * distribution, I would recommend the K-S test instead. * * @author Kevin R. Dixon * @since 2.0 */ @ConfidenceTestAssumptions( name="Chi-Squre test", alsoKnownAs="Pearson's Chi-Square test", description="The chi-square test determines if the given data were generated from the same discrete distributions.", assumptions={ "A large sample, typically above 30.", "Typically, each bin from the discrete distribution must have at least 5 samples.", "The underlying discrete distribution must obey the weak law of large numbers.", "The observations are assumed to be independent." }, nullHypothesis="The frequency of events in the two datasets is consistent.", dataPaired=true, dataSameSize=true, distribution=ChiSquareDistribution.CDF.class, reference=@PublicationReference( author="Wikipedia", title="Pearson's chi-square test", type=PublicationType.WebPage, year=2009, url="http://en.wikipedia.org/wiki/Pearson%27s_chi-square_test" ) ) public class ChiSquareConfidence extends AbstractCloneableSerializable implements NullHypothesisEvaluator> { /** * Default instance variable since the class has no members. */ public static final ChiSquareConfidence INSTANCE = new ChiSquareConfidence(); /** * Creates a new instance of ChiSquareConfidence */ public ChiSquareConfidence() { } /** * Computes the chi-square test between a collection of data and a * Probability Mass Function that may have create the observed data. * @param Domain type of the PMF. * @param data * Data observed from some discrete distribution. * @param pmf * Probability mass function that may have created the observed data. * @return * Chi-square test results. */ public static ChiSquareConfidence.Statistic evaluateNullHypothesis( Collection data, ProbabilityMassFunction pmf ) { // Add up all the counts from the sampled data. Collection domain = pmf.getDomain(); Map counts = new HashMap( domain.size() ); for( DomainType x : data ) { // Ensure that the data are drawn from a subset of the // domain of the PMF if( !domain.contains(x) ) { throw new IllegalArgumentException( "Observed data " + x + " is not in domain of PMF" ); } Double c = counts.get(x); if( c == null ) { c = 0.0; } c++; counts.put(x,c); } // Compute the expected counts, as well as the observed counts. final int numSamples = data.size(); ArrayList expected = new ArrayList( domain.size() ); ArrayList observed = new ArrayList( domain.size() ); for( DomainType d : domain ) { double p = pmf.evaluate(d); double expectedCount = p*numSamples; if( expectedCount <= 0.0 ) { expectedCount = Double.MIN_VALUE; } expected.add( expectedCount ); Double count = counts.get(d); if( count == null ) { count = 0.0; } observed.add( count ); } // Run a chi-square test on the observed versus expected counts. return INSTANCE.evaluateNullHypothesis(observed, expected); } @Override public ChiSquareConfidence.Statistic evaluateNullHypothesis( Collection data1, Collection data2) { if (data1.size() != data2.size()) { throw new IllegalArgumentException( "data1 must have the same size as data2!"); } double degreesOfFreedom = data1.size() - 1.0; double chiSquare = 0.0; Iterator i1 = data1.iterator(); Iterator i2 = data2.iterator(); for (int i = 0; i < data1.size(); i++) { double v1 = i1.next().doubleValue(); double v2 = i2.next().doubleValue(); if (v2 <= 0.0) { throw new IllegalArgumentException( "Value of bin " + i + " in data2 cannot be <= 0.0!"); } double temp = v1 - v2; chiSquare += temp * temp / v2; } return new ChiSquareConfidence.Statistic(chiSquare, degreesOfFreedom); } /** * Confidence Statistic for a chi-square test */ public static class Statistic extends AbstractConfidenceStatistic { /** * Chi-square value */ private double chiSquare; /** * Number of degrees of freedom in the test */ private double degreesOfFreedom; /** * Creates a new instance of chiSquare * @param chiSquare * Chi-square value * @param degreesOfFreedom * Number of degrees of freedom in the test */ public Statistic( double chiSquare, double degreesOfFreedom) { super(1.0 - ChiSquareDistribution.CDF.evaluate( chiSquare, degreesOfFreedom)); this.chiSquare = chiSquare; this.degreesOfFreedom = degreesOfFreedom; } /** * Gets for chiSquare * @return * Chi-square value */ public double getChiSquare() { return this.chiSquare; } /** * Getter for degreesOfFreedom * @return * Number of degrees of freedom */ public double getDegreesOfFreedom() { return this.degreesOfFreedom; } @Override public double getTestStatistic() { return this.getChiSquare(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy