gov.sandia.cognition.statistics.distribution.StudentizedRangeDistribution Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
There is a newer version: 4.0.1
/*
 * File:                StudentizedRangeDistribution.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright May 10, 2011, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government.
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.statistics.distribution;

import gov.sandia.cognition.algorithm.ParallelUtil;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.statistics.AbstractClosedFormUnivariateDistribution;
import gov.sandia.cognition.statistics.ClosedFormCumulativeDistributionFunction;
import gov.sandia.cognition.statistics.InvertibleCumulativeDistributionFunction;
import gov.sandia.cognition.statistics.SmoothUnivariateDistribution;
import gov.sandia.cognition.util.ArgumentChecker;
import gov.sandia.cognition.util.ObjectUtil;
import gov.sandia.cognition.util.Pair;
import gov.sandia.cognition.util.Randomized;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Random;
import java.util.concurrent.Callable;

/**
 * Implementation of the Studentized Range distribution, which defines the
 * population correction factor when performing multiple comparisons.  In other
 * words, if you compute the p-value of 20 objects, there's a 65% chance of
 * finding at least one with a p-value of less than 0.05.  The Studentized
 * Range compensates for this chance when conducting multiple comparisons.
 * @author Kevin R. Dixon
 * @since 3.1
 */
@PublicationReferences(
    references={
        @PublicationReference(
            author="David M. Lane",
            title="Studentized Range Distribution",
            type=PublicationType.WebPage,
            year=2011,
            url="http://davidmlane.com/hyperstat/A47912.html"
        )
        ,
        @PublicationReference(
            author="Wikipedia",
            title="Studentized range",
            type=PublicationType.WebPage,
            year=2011,
            url="http://en.wikipedia.org/wiki/Studentized_range"
        )
    }
)
public class StudentizedRangeDistribution
    extends AbstractClosedFormUnivariateDistribution
    implements Randomized
{

    /**
     * Default treatment count, {@value}.
     */
    public static final int DEFAULT_TREATMENT_COUNT = 2;

    /**
     * Default degrees of freedom, {@value}.
     */
    public static final double DEFAULT_DEGREES_OF_FREEDOM = Double.POSITIVE_INFINITY;

    /**
     * Number of samples to draw for Monte Carlo estimates, {@value}.
     */
    public static final int DEFAULT_NUM_SAMPLES = 1000;

    /**
     * Number of comparisons made
     */
    protected int treatmentCount;

    /**
     * Number of subjects in each treatment minus one.
     */
    protected double degreesOfFreedom;

    /**
     * Random number generator for Monte Carlo simulations
     */
    protected Random random;

    /**
     * Default constructor
     */
    public StudentizedRangeDistribution()
    {
        this( DEFAULT_TREATMENT_COUNT, DEFAULT_DEGREES_OF_FREEDOM );
    }

    /**
     * Creates a new instance of StudentizedRangeDistribution
     * @param treatmentCount
     * Number of comparisons made
     * @param degreesOfFreedom
     * Number of subjects in each treatment minus one.
     */
    public StudentizedRangeDistribution(
        final int treatmentCount,
        final double degreesOfFreedom)
    {
        this.setTreatmentCount(treatmentCount);
        this.setDegreesOfFreedom(degreesOfFreedom);
        this.setRandom( new Random(2) );
    }

    /**
     * Copy constructor
     * @param other
     * StudentizedRangeDistribution to copy
     */
    public StudentizedRangeDistribution(
        final StudentizedRangeDistribution other )
    {
        this( other.getTreatmentCount(), other.getDegreesOfFreedom() );
    }

    @Override
    public StudentizedRangeDistribution clone()
    {
        StudentizedRangeDistribution clone =
            (StudentizedRangeDistribution) super.clone();
        clone.setRandom( ObjectUtil.cloneSmart( this.getRandom() ) );
        return clone;
    }

    @Override
    public void sampleInto(
        final Random random,
        final int sampleCount,
        final Collection output)
    {
        ArrayList tasks = new ArrayList(sampleCount);
        SmoothUnivariateDistribution t;
        if( this.getDegreesOfFreedom() < 30.0 )
        {
            t = new StudentTDistribution(this.getDegreesOfFreedom());
        }
        else
        {
            t = new UnivariateGaussian();
        }
        for( int n = 0; n < sampleCount; n++ )
        {
            tasks.add( new SampleRange(random, this.getTreatmentCount(), t) );
        }

        try
        {
            output.addAll(ParallelUtil.executeInSequence(tasks));
        }
        catch (Exception ex)
        {
            throw new RuntimeException( ex );
        }

    }

    @Override
    public StudentizedRangeDistribution.CDF getCDF()
    {
        return new StudentizedRangeDistribution.CDF( this );
    }

    @Override
    public Double getMean()
    {
        return this.getMeanAsDouble();
    }
    
    @Override
    public double getMeanAsDouble()
    {
        return UnivariateStatisticsUtil.computeMean(
            this.sample(this.getRandom(), DEFAULT_NUM_SAMPLES ) );
    }

    @Override
    public Vector convertToVector()
    {
        return VectorFactory.getDefault().copyValues(
            this.getTreatmentCount(), this.getDegreesOfFreedom() );
    }

    @Override
    public void convertFromVector(
        final Vector parameters)
    {
        parameters.assertDimensionalityEquals(2);
        this.setTreatmentCount( (int) parameters.getElement(0) );
        this.setDegreesOfFreedom( parameters.getElement(1) );
    }

    @Override
    public Double getMinSupport()
    {
        return 0.0;
    }

    @Override
    public Double getMaxSupport()
    {
        return Double.POSITIVE_INFINITY;
    }

    @Override
    public double getVariance()
    {
        return UnivariateStatisticsUtil.computeVariance(
            this.sample(this.getRandom(), 10*DEFAULT_NUM_SAMPLES ) );
    }

    /**
     * Getter for treatmentCount
     * @return
     * Number of comparisons made
     */
    public int getTreatmentCount()
    {
        return this.treatmentCount;
    }

    /**
     * Setter for treatmentCount
     * @param treatmentCount
     * Number of comparisons made
     */
    public void setTreatmentCount(
        final int treatmentCount)
    {
        ArgumentChecker.assertIsInRangeInclusive(
            "treatmentCount", treatmentCount, 2.0, Double.POSITIVE_INFINITY );
        this.treatmentCount = treatmentCount;
    }

    /**
     * Getter for degreesOfFreedom
     * @return
     * Number of subjects in each treatment minus one.
     */
    public double getDegreesOfFreedom()
    {
        return this.degreesOfFreedom;
    }

    /**
     * Setter for degreesOfFreedom
     * @param degreesOfFreedom 
     * Number of subjects in each treatment minus one.
     */
    public void setDegreesOfFreedom(
        final double degreesOfFreedom)
    {
        ArgumentChecker.assertIsPositive("degreesOfFreedom", degreesOfFreedom);
        this.degreesOfFreedom = degreesOfFreedom;
    }

    @Override
    public Random getRandom()
    {
        return this.random;
    }

    @Override
    public void setRandom(
        final Random random)
    {
        this.random = random;
    }

    /**
     * Computes the estimate of the Studentized Range for a single sample
     */
    protected static class SampleRange
        implements Callable
    {

        /**
         * Number of comparisons being made
         */
        int treatmentCount;

        /**
         * Random number generator
         */
        Random random;

        /**
         * Distribution from which we sample, Student-t or Gaussian
         */
        SmoothUnivariateDistribution t;

        /**
         * Creates a new instance of SampleRange
         * @param random
         * Random number generator
         * @param treatmentCount
         * Number of comparisons made
         * @param t
         * Distribution from which we sample, Student-t or Gaussian
         */
        public SampleRange(
            final Random random,
            final int treatmentCount,
            final SmoothUnivariateDistribution t )
        {
            this.random = random;
            this.treatmentCount = treatmentCount;
            this.t = t;
        }

        @Override
        public Double call()
            throws Exception
        {
            ArrayList means =
                this.t.sample( this.random, this.treatmentCount );
            Pair result =
                UnivariateStatisticsUtil.computeMinAndMax(means);
            double delta = result.getSecond() - result.getFirst();
            means = null;
            return delta;
        }

    }

    /**
     * CDF of the StudentizedRangeDistribution
     */
    public static class CDF
        extends StudentizedRangeDistribution
        implements ClosedFormCumulativeDistributionFunction,
        InvertibleCumulativeDistributionFunction
    {

        /**
         * Default constructor
         */
        public CDF()
        {
            super();
        }

        /**
         * Creates a new instance of StudentizedRangeDistribution
         * @param treatmentCount
         * Number of comparisons made
         * @param degreesOfFreedom
         * Number of subjects in each treatment minus one.
         */
        public CDF(
            final int treatmentCount,
            final double degreesOfFreedom)
        {
            super( treatmentCount, degreesOfFreedom );
        }

        /**
         * Copy constructor
         * @param other
         * StudentizedRangeDistribution to copy
         */
        public CDF(
            final StudentizedRangeDistribution other )
        {
            super( other );
        }


        @Override
        public Double evaluate(
            final Double input)
        {
            return APStat.prtrng(input, this.getDegreesOfFreedom(), this.getTreatmentCount() );
        }

        @Override
        public Double inverse(
            final double probability)
        {
            return APStat.qtrng(probability, this.getDegreesOfFreedom(), this.getTreatmentCount() );
        }

        @Override
        public StudentizedRangeDistribution.CDF getCDF()
        {
            return this;
        }

    }

    /**
     * This is a translation of Fortran code taken from
     * http://lib.stat.cmu.edu/apstat/, and the comments on the individual functions
     * in this class are taken directly from the original.
     */
    public static class APStat
    {
        /**
         * Algorithm AS66 Applied Statistics (1973) vol22 no.3.
         *
         * Evaluates the tail area of the standardised normal curve from x to
         * infinity if upper is .true. or from minus infinity to x if upper is
         * .false.
         *
         * @param x
         *            Location for which to compute tail area
         * @param upper
         *            True to find upper tail area, false to find lower tail area
         * @return Tail area for given x
         */
        public static double alnorm(double x, boolean upper) {
            if (x < 0) {
                x = -x;
                upper = !upper;
            }

            double y = 0.5 * x * x;
            double alnorm;

            if ((x > 7.0) && !(upper && x <= 18.66)) {
                alnorm = 0;
            } else if (x <= 1.28) {
                alnorm = 0.5
                        - x
                        * (0.398942280444 - 0.39990348504
                                * y
                                / (y + 5.75885480458 - 29.8213557807 / (y + 2.62433121679 + 48.6959930692 / (y + 5.92885724438))));
            } else {
                alnorm = 0.398942280385
                        * Math.exp(-y)
                        / (x - 3.8052e-8 + 1.00000615302 / (x + 3.98064794e-4 + 1.98615381364 / (x - 0.151679116635 + 5.29330324926 / (x + 4.8385912808 - 15.1508972451 / (x + 0.742380924027 + 30.789933034 / (x + 3.99019417011))))));
            }

            return upper ? alnorm : 1 - alnorm;
        }

        /**
         * ALGORITHM AS 111, APPL.STATIST., VOL.26, 118-121, 1977.
         *
         * PRODUCES NORMAL DEVIATE CORRESPONDING TO LOWER TAIL AREA = P.
         *
         * See also AS 241 which contains alternative routines accurate to about 7
         * and 16 decimal digits.
         *
         * @param p
         *            P-value for which to compute normal deviate
         * @return Normal deviate corresponding to lower tail area = p
         */
        public static double ppnd(double p) {
            double q = p - 0.5;

            // p < 0.08 or p > 0.92, set r = min(p,1-p)
            if (Math.abs(q) > 0.42) {
                double r = Math.min(p, 1 - p);

                if (r <= 0) {
                    throw new IllegalArgumentException("Invalid p value: " + p);
                }

                r = Math.sqrt(-Math.log(r));

                return (q < 0 ? -1 : 1)
                        * (((2.32121276858 * r + 4.85014127135) * r - 2.29796479134)
                                * r - 2.78718931138)
                        / ((1.63706781897 * r + 3.54388924762) * r + 1);
            }

            // 0.08 < p < 0.92
            else {
                double r = q * q;

                return q
                        * (((-25.44106049637 * r + 41.39119773534) * r - 18.61500062529)
                                * r + 2.50662823884)
                        / ((((3.13082909833 * r - 21.06224101826) * r + 23.08336743743)
                                * r - 8.47351093090)
                                * r + 1);
            }
        }

        /**
         * Algorithm AS 190 Appl Statist (1983) Vol.32, No.2. Incorporates
         * corrections from Appl. Statist. (1985) Vol.34 (1)
         *
         * Evaluates the probability from 0 to q for a studentized range having v
         * degrees of freedom and r samples.
         *
         * Uses subroutine ALNORM = algorithm AS66.
         *
         * Arrays vw and qw store transient values used in the quadrature summation.
         * Node spacing is controlled by step. pcutj and pcutk control truncation.
         * Minimum and maximum number of steps are controlled by jmin, jmax, kmin
         * and kmax. Accuracy can be increased by use of a finer grid - Increase
         * sizes of arrays vw and qw, and jmin, jmax, kmin, kmax and 1/step
         * proportionally.
         *
         * @param q
         *            Quantile for which to find p-value
         * @param v
         *            Degrees of freedom for distribution
         * @param r
         *            Number of samples for distribution
         * @return P-value for q for given distribution
         */
        public static double prtrng(double q, double v, double r) {
            final double pcutj = 0.00003;
            final double pcutk = 0.0001;
            final double step = 0.45;
            final double vmax = 120.0;
            final int jmin = 3;
            final int jmax = 15;
            final int kmin = 7;
            final int kmax = 15;

            // Check initial values

            double prtrng = 0;

            if (v < 1) {
                throw new IllegalArgumentException(
                        "Degrees of freedom must be >= 1.");
            }

            if (r < 2) {
                throw new IllegalArgumentException(
                        "Number of samples must be >= 2.");
            }

            if (q <= 0) {
                return prtrng;
            }

            // Computing constants, local midpoint, adjusting steps.

            double g = step * Math.pow(r, -0.2);
            double gmid = 0.5 * Math.log(r);
            double r1 = r - 1;
            double c = Math.log(r * g * 0.39894228);
            double h = 0;
            double v2 = 0;

            if (v <= vmax) {
                h = step * Math.pow(v, -0.5);
                v2 = v * 0.5;

                if (v == 1) {
                    c = 0.193064705;
                } else if (v == 2) {
                    c = 0.293525326;
                } else {
                    c = Math.sqrt(v2)
                            * 0.318309886
                            / (1 + ((-0.268132716e-2 / v2 + 0.347222222e-2) / v2 + 0.833333333e-1)
                                    / v2);
                }

                c = Math.log(c * r * g * h);
            }

            // Computing integral
            // Given a row k, the procedure starts at the midpoint and works
            // outward (index j) in calculating the probability at nodes
            // symmetric about the midpoint. The rows (index k) are also
            // processed outwards symmetrically about the midpoint. The
            // centre row is unpaired.

            double gstep = g;
            double[] qw = new double[30];
            double[] vw = new double[30];

            qw[0] = -1;
            qw[jmax] = -1;

            double pk1 = 1;
            double pk2 = 1;

            for (int k = 1; k <= kmax; ++k) {
                gstep -= g;

                do {
                    gstep = -gstep;

                    double gk = gmid + gstep;
                    double pk = 0;

                    if (pk2 > pcutk || k <= kmin) {
                        double w0 = c - gk * gk * 0.5;
                        double pz = alnorm(gk, true);
                        double x = alnorm(gk - q, true) - pz;

                        if (x > 0) {
                            pk = Math.exp(w0 + r1 * Math.log(x));
                        }

                        if (v <= vmax) {
                            int jump = -jmax;

                            do {
                                jump = jump + jmax;

                                for (int j = 1; j <= jmax; ++j) {
                                    int jj = j + jump;

                                    if (qw[jj - 1] <= 0) {
                                        double hj = h * j;

                                        if (j < jmax) {
                                            qw[jj] = -1;
                                        }

                                        double ehj = Math.exp(hj);

                                        qw[jj - 1] = q * ehj;
                                        vw[jj - 1] = v
                                                * (hj + 0.5 - ehj * ehj * 0.5);
                                    }

                                    double pj = 0;
                                    x = alnorm(gk - qw[jj - 1], true) - pz;

                                    if (x > 0) {
                                        pj = Math.exp(w0 + vw[jj - 1] + r1
                                                * Math.log(x));
                                    }

                                    pk += pj;

                                    if (pj > pcutj) {
                                        continue;
                                    }

                                    if (jj > jmin || k > kmin) {
                                        break;
                                    }
                                }

                                h = -h;
                            } while (h < 0);
                        }
                    }

                    prtrng = prtrng + pk;

                    if (k > kmin && pk <= pcutk && pk1 <= pcutk) {
                        return prtrng;
                    }

                    pk2 = pk1;
                    pk1 = pk;
                } while (gstep > 0);
            }

            return prtrng;
        }

        /**
         * Algorithm AS 190.1 Appl Statist (1983) Vol.32, No.2.
         *
         * Approximates the quantile p for a studentized range distribution having v
         * degrees of freedom and r samples for probability p, p.ge.0.90 .and.
         * p.le.0.99.
         *
         * Uses functions alnorm, ppnd, prtrng and qtrng0 - Algorithms AS 66, AS
         * 111, AS 190 and AS 190.2
         *
         * @param p
         *            P-value for which to find quantile
         * @param v
         *            Degrees of freedom for distribution
         * @param r
         *            Number of samples for distribution
         * @return Quantile at p for given distribution
         */
        public static double qtrng(double p, double v, double r) {
            final int jmax = 8;
            final double pcut = 0.001;
            final double eps = 1e-4;

            // Check input parameters

            if (v < 1) {
                throw new IllegalArgumentException(
                        "Degrees of freedom must be >= 1.");
            }

            if (r < 2) {
                throw new IllegalArgumentException(
                        "Number of samples must be >= 2.");
            }

            if (p < 0.9 || p > 0.99) {
                throw new IllegalArgumentException(
                        "P-value must be in range [0.9,0.99].");
            }

            // Obtain initial values

            double q1 = qtrng0(p, v, r);
            double p1 = prtrng(q1, v, r);
            double q2 = 0;
            double p2 = 0;
            double qtrng = q1;

            if (Math.abs(p1 - p) < pcut) {
                return qtrng;
            }

            if (p1 > p) {
                p1 = 1.75 * p - 0.75 * p1;
            }

            if (p1 < p) {
                p2 = p + (p - p1) * (1 - p) / (1 - p1) * 0.75;
            }

            if (p2 < 0.8) {
                p2 = 0.8;
            }

            if (p2 > 0.995) {
                p2 = 0.995;
            }

            q2 = qtrng0(p2, v, r);

            // Refine approximation

            double e1 = 0;
            double e2 = 0;
            double d = 0;

            for (int j = 2; j <= jmax; ++j) {
                p2 = prtrng(q2, v, r);
                e1 = p1 - p;
                e2 = p2 - p;
                qtrng = (q1 + q2) / 2;
                d = e2 - e1;

                if (Math.abs(d) > eps) {
                    qtrng = (e2 * q1 - e1 * q2) / d;
                }

                if (Math.abs(e1) >= Math.abs(e2)) {
                    q1 = q2;
                    p1 = p2;
                }

                if (Math.abs(p1 - p) < pcut * 5) {
                    return qtrng;
                }

                q2 = qtrng;
            }

            return qtrng;
        }

        /**
         * Algorithm AS 190.2 Appl Statist (1983) Vol.32, No.2.
         *
         * Calculates an initial quantile p for a studentized range distribution
         * having v degrees of freedom and r samples for probability p, p.gt.0.80
         * .and. p.lt.0.995.
         *
         * Uses function ppnd - Algorithm AS 111
         *
         * @param p
         *            P-value for which to find initial quantile
         * @param v
         *            Degrees of freedom for distribution
         * @param r
         *            Number of samples for distribution
         * @return Initial quantile at p for given distribution
         */
        public static double qtrng0(double p, double v, double r) {
            final double vmax = 120;

            double t = ppnd(0.5 + 0.5 * p);

            if (v < vmax) {
                t += (t * t * t + t) / v / 4;
            }

            double q = 0.8843 - 0.2368 * t;

            if (v < vmax) {
                q += -1.214 / v + 1.208 * t / v;
            }

            return t * (q * Math.log(r - 1) + 1.4142);
        }

    }

}