![JAR search and dependency download from the Maven repository](/logo.png)
gov.sandia.cognition.statistics.method.ShafferStaticCorrection Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: ShafferStaticCorrection.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright May 18, 2011, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.statistics.method;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.collection.CollectionUtil;
import gov.sandia.cognition.math.MathUtil;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.util.ArrayIndexSorter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
/**
* The Shaffer Static Correction uses logical relationships to tighten up the
* Bonferroni/Sidak corrections when performing pairwise multiple hypothesis
* comparisons. This is uniformly tighter bound than the Bonferroni/Sidak
* values and also uniformly tighter than the Holm correction. The original
* algorithm proposed by Shaffer appears to grow super exponentially as
* O(2^(N^2)), where N is the number of treatments. We have pre-computed
* various quantities and used caching to minimize the amount of repeated
* recursion and it appears that the runtime grows as O(N^4), where N is the
* number of treatments. Since there are N(N-1)/2 comparisons, this quantity
* is quadratic in the number of comparisons. This means the computation is
* reasonable for N=90 (4005 comparisons). However, the algorithm seems
* to slow down significantly above N=100 (4950 comparisons) or so.
* This implementation uses the slightly tighter Sidak correction,
* as opposed to the standard Bonferroni correction.
*
* For example, if you have three hypothesis you are testing,
* then there is no way that there can be 2 true hypotheses. (Because u1=u2 and
* u2=u3 and therefore u1=u3.) With this logic, Shaffer Static correction
* can greatly reduce the false-negative rate while not impacting the
* false-discovery rate.
* @author Kevin R. Dixon
* @since 3.3.0
*/
@PublicationReferences(
references={
@PublicationReference(
author="Juliet Popper Shaffer",
title="Modified Sequentially Rejective Multiple Test Procedures",
type=PublicationType.Journal,
publication="Journal of the American Statistical Association",
year=1986,
url="http://www.jstor.org/stable/2289016"
)
,
@PublicationReference(
author="Juliet Popper Shaffer",
title="Multiple Hypothesis Testing",
type=PublicationType.Journal,
publication="Annual Review of Psychology",
year=1995,
url="http://www.dm.uba.ar/materias/optativas/aspectos_estadisticos_de_microarreglos/2010/1/teoricas/Shaffer%201995%20Multiple%20hypothesis%20testing.pdf"
)
,
@PublicationReference(
author={
"Salvador Garcia",
"Francisco Herrera"
},
title="An Extension on \"Statistical Comparisons of Classifiers over Multiple Data Sets\" for all Pairwise Comparisons",
type=PublicationType.Journal,
publication="Journal of Machine Learning Research",
year=2008,
url="http://sci2s.ugr.es/publications/ficheros/2008-Garcia-JMLR.pdf"
)
}
)
public class ShafferStaticCorrection
extends AbstractPairwiseMultipleHypothesisComparison
{
/**
* Default constructor
*/
public ShafferStaticCorrection()
{
this( DEFAULT_PAIRWISE_TEST );
}
/**
* Creates a new instance of BonferroniCorrection
* @param pairwiseTest
* Confidence test used for pair-wise null-hypothesis tests.
*/
public ShafferStaticCorrection(
final NullHypothesisEvaluator> pairwiseTest)
{
super( pairwiseTest );
}
@Override
public ShafferStaticCorrection clone()
{
return (ShafferStaticCorrection) super.clone();
}
@Override
public ShafferStaticCorrection.Statistic evaluateNullHypotheses(
final Collection extends Collection extends Number>> data,
final double uncompensatedAlpha)
{
return new ShafferStaticCorrection.Statistic(
data, uncompensatedAlpha, this.getPairwiseTest() );
}
/**
* Test statistic from the Shaffer static multiple-comparison test
*/
public static class Statistic
extends AbstractPairwiseMultipleHypothesisComparison.Statistic
{
/**
* Matrix of adjusted alphas (p-value thresholds) for each comparison
*/
protected Matrix adjustedAlphas;
/**
* Creates a new instance of Statistic
* @param data
* Data from each treatment to consider
* @param uncompensatedAlpha
* Uncompensated alpha (p-value threshold) for the multiple comparison
* test
* @param pairwiseTest
* Confidence test used for pair-wise null-hypothesis tests.
*/
public Statistic(
final Collection extends Collection extends Number>> data,
final double uncompensatedAlpha,
final NullHypothesisEvaluator> pairwiseTest )
{
super( data, uncompensatedAlpha, pairwiseTest );
final int numComparisons =
this.treatmentCount * (this.treatmentCount-1) / 2;
this.computePairwiseTestResults(data, pairwiseTest );
ArrayList possibleTruths =
possibleTruthsSorted( this.treatmentCount );
double[] pvalues = new double[ numComparisons ];
int index = 0;
for( int i = 0; i < this.treatmentCount; i++ )
{
for( int j = i+1; j < this.treatmentCount; j++ )
{
pvalues[index] =
this.nullHypothesisProbabilities.getElement(i, j);
index++;
}
}
// Sort the p-values from smallest to largest
int[] sortedIndices =
ArrayIndexSorter.sortArrayAscending(pvalues);
double[] adjustedAlpha = new double[ numComparisons ];
int maxPossibleTruthIndex = possibleTruths.size()-1;
int hypothesesRemaining = numComparisons;
for( int i = 0; i < numComparisons; i++ )
{
int maxPossibleTrueHypotheses =
possibleTruths.get(maxPossibleTruthIndex);
// adjustedAlpha[sortedIndices[i]] = BonferroniCorrection.adjust(
// this.uncompensatedAlpha, maxPossibleTrueHypotheses );
adjustedAlpha[sortedIndices[i]] = SidakCorrection.adjust(
this.uncompensatedAlpha, maxPossibleTrueHypotheses );
if( pvalues[sortedIndices[i]] >= adjustedAlpha[sortedIndices[i]] )
{
// This is the critical value, there are no more remaining
// false hypotheses, so the remaining ones must be true
break;
}
else
{
hypothesesRemaining--;
if( hypothesesRemaining < maxPossibleTrueHypotheses )
{
maxPossibleTruthIndex--;
}
}
}
// Stuff the adjusted alphas back into the
Matrix alphaMatrix = MatrixFactory.getDefault().createMatrix(
this.treatmentCount,this.treatmentCount);
index = 0;
for( int i = 0; i < this.treatmentCount; i++ )
{
for( int j = i+1; j < this.treatmentCount; j++ )
{
alphaMatrix.setElement(i, j, adjustedAlpha[index]);
alphaMatrix.setElement(j, i, adjustedAlpha[index]);
index++;
}
}
this.adjustedAlphas = alphaMatrix;
}
/**
* Returns the sorted-ascending set of the possible set of true
* hypothesis given the number of treatments being considered.
* @param treatmentCount
* Number of treatments to consider
* @return
* Set of possible true hypotheses given the number of treatments.
*/
public static ArrayList possibleTruthsSorted(
int treatmentCount )
{
int[] jchoose2 = new int[ treatmentCount+1 ];
for( int j = 2; j <= treatmentCount; j++ )
{
jchoose2[j] = (int) (Math.round(Math.exp(MathUtil.logBinomialCoefficient(j, 2))));
}
ArrayList> recusionCaches =
new ArrayList>( treatmentCount );
for( int j = 0; j < treatmentCount; j++ )
{
recusionCaches.add( null );
}
LinkedHashSet set =
possibleTruthsSet( treatmentCount, jchoose2, recusionCaches );
ArrayList sortedSet = CollectionUtil.asArrayList(set);
Collections.sort(sortedSet);
return sortedSet;
}
/**
* Recursive algorithm to compute the possible set of true hypotheses
* @param treatmentCount
* Number of treatments to consider
* @param jchoose2
* Pre-computed cache of all "j choose 2" values to the max treatment
* @param recursionCaches
* Dynamic cache of recursion results for various treatmentCounts.
* If entry j is null, then we have not recursed to a
* treatmentCount of j yet.
* @return
* Unsorted set of all possible true hypothesis counts.
*/
private static LinkedHashSet possibleTruthsSet(
final int treatmentCount,
final int[] jchoose2,
ArrayList> recursionCaches )
{
LinkedHashSet set =
new LinkedHashSet( treatmentCount );
if( treatmentCount <= 1 )
{
set.add( 0 );
}
else if( treatmentCount == 2 )
{
set.add( 1 );
}
else
{
for( int j = 1; j <= treatmentCount; j++ )
{
// If we've already recursed using the give index (tcmj),
// then don't do it again... It leads to a super-polynomial
// explosion in the recursion. (Shaffer's original This caching keeps it
// polynomial.
final int tcmj = treatmentCount-j;
LinkedHashSet recursion =
recursionCaches.get( tcmj );
if( recursion == null )
{
recursion = possibleTruthsSet(
tcmj, jchoose2, recursionCaches );
recursionCaches.set( tcmj, recursion );
}
// Uncomment the line below to run Shaffer's original
// algorithm without caching... but it's superpolynomial,
// so I would suggest only doing it if you're insane.
//LinkedHashSet recursion = possibleTruthsSet( treatmentCount - j, jchoose2, recursionCaches );
for( Integer value : recursion )
{
int choose;
if( j < 2 )
{
choose = 0;
}
else
{
choose = jchoose2[j];
}
set.add( value + choose );
}
}
}
return set;
}
@Override
public double getAdjustedAlpha(
int i,
int j)
{
return this.adjustedAlphas.getElement(i, j);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy