gov.sandia.cognition.statistics.method.ChiSquareConfidence Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: ChiSquareConfidence.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Nov 23, 2007, Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
* or on behalf of the U.S. Government. Export of this program may require a
* license from the United States Government. See CopyrightHistory.txt for
* complete details.
*
*/
package gov.sandia.cognition.statistics.method;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.statistics.ProbabilityMassFunction;
import gov.sandia.cognition.statistics.distribution.ChiSquareDistribution;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* This is the chi-square goodness-of-fit test. This test allows us to compare
* observations against expected results, where the observations and
* expectations are recorded for discrete groups/conditions/bins. The null
* hypothesis is that the observed values were drawn from the same distribution
* as the expected values.
*
* The chi-square goodness-of-fit test is a discrete version of the more
* general Kolmogorov-Smirnov test. If your data were drawn from a continuous
* distribution, I would recommend the K-S test instead.
*
* @author Kevin R. Dixon
* @since 2.0
*/
@ConfidenceTestAssumptions(
name="Chi-Squre test",
alsoKnownAs="Pearson's Chi-Square test",
description="The chi-square test determines if the given data were generated from the same discrete distributions.",
assumptions={
"A large sample, typically above 30.",
"Typically, each bin from the discrete distribution must have at least 5 samples.",
"The underlying discrete distribution must obey the weak law of large numbers.",
"The observations are assumed to be independent."
},
nullHypothesis="The frequency of events in the two datasets is consistent.",
dataPaired=true,
dataSameSize=true,
distribution=ChiSquareDistribution.CDF.class,
reference=@PublicationReference(
author="Wikipedia",
title="Pearson's chi-square test",
type=PublicationType.WebPage,
year=2009,
url="http://en.wikipedia.org/wiki/Pearson%27s_chi-square_test"
)
)
public class ChiSquareConfidence
extends AbstractCloneableSerializable
implements NullHypothesisEvaluator>
{
/**
* Default instance variable since the class has no members.
*/
public static final ChiSquareConfidence INSTANCE = new ChiSquareConfidence();
/**
* Creates a new instance of ChiSquareConfidence
*/
public ChiSquareConfidence()
{
}
/**
* Computes the chi-square test between a collection of data and a
* Probability Mass Function that may have create the observed data.
* @param Domain type of the PMF.
* @param data
* Data observed from some discrete distribution.
* @param pmf
* Probability mass function that may have created the observed data.
* @return
* Chi-square test results.
*/
public static ChiSquareConfidence.Statistic evaluateNullHypothesis(
Collection extends DomainType> data,
ProbabilityMassFunction pmf )
{
// Add up all the counts from the sampled data.
Collection extends DomainType> domain = pmf.getDomain();
Map counts =
new HashMap( domain.size() );
for( DomainType x : data )
{
// Ensure that the data are drawn from a subset of the
// domain of the PMF
if( !domain.contains(x) )
{
throw new IllegalArgumentException(
"Observed data " + x + " is not in domain of PMF" );
}
Double c = counts.get(x);
if( c == null )
{
c = 0.0;
}
c++;
counts.put(x,c);
}
// Compute the expected counts, as well as the observed counts.
final int numSamples = data.size();
ArrayList expected = new ArrayList( domain.size() );
ArrayList observed = new ArrayList( domain.size() );
for( DomainType d : domain )
{
double p = pmf.evaluate(d);
double expectedCount = p*numSamples;
if( expectedCount <= 0.0 )
{
expectedCount = Double.MIN_VALUE;
}
expected.add( expectedCount );
Double count = counts.get(d);
if( count == null )
{
count = 0.0;
}
observed.add( count );
}
// Run a chi-square test on the observed versus expected counts.
return INSTANCE.evaluateNullHypothesis(observed, expected);
}
@Override
public ChiSquareConfidence.Statistic evaluateNullHypothesis(
Collection extends Number> data1,
Collection extends Number> data2)
{
if (data1.size() != data2.size())
{
throw new IllegalArgumentException(
"data1 must have the same size as data2!");
}
double degreesOfFreedom = data1.size() - 1.0;
double chiSquare = 0.0;
Iterator extends Number> i1 = data1.iterator();
Iterator extends Number> i2 = data2.iterator();
for (int i = 0; i < data1.size(); i++)
{
double v1 = i1.next().doubleValue();
double v2 = i2.next().doubleValue();
if (v2 <= 0.0)
{
throw new IllegalArgumentException(
"Value of bin " + i + " in data2 cannot be <= 0.0!");
}
double temp = v1 - v2;
chiSquare += temp * temp / v2;
}
return new ChiSquareConfidence.Statistic(chiSquare, degreesOfFreedom);
}
/**
* Confidence Statistic for a chi-square test
*/
public static class Statistic
extends AbstractConfidenceStatistic
{
/**
* Chi-square value
*/
private double chiSquare;
/**
* Number of degrees of freedom in the test
*/
private double degreesOfFreedom;
/**
* Creates a new instance of chiSquare
* @param chiSquare
* Chi-square value
* @param degreesOfFreedom
* Number of degrees of freedom in the test
*/
public Statistic(
double chiSquare,
double degreesOfFreedom)
{
super(1.0 - ChiSquareDistribution.CDF.evaluate(
chiSquare, degreesOfFreedom));
this.chiSquare = chiSquare;
this.degreesOfFreedom = degreesOfFreedom;
}
/**
* Gets for chiSquare
* @return
* Chi-square value
*/
public double getChiSquare()
{
return this.chiSquare;
}
/**
* Getter for degreesOfFreedom
* @return
* Number of degrees of freedom
*/
public double getDegreesOfFreedom()
{
return this.degreesOfFreedom;
}
@Override
public double getTestStatistic()
{
return this.getChiSquare();
}
}
}