All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetradapp.editor.NormalityTests Maven / Gradle / Ivy

The newest version!
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetradapp.editor;

import cern.jet.random.Normal;
import cern.jet.random.engine.MersenneTwister;
import edu.cmu.tetrad.data.AndersonDarlingTest;
import edu.cmu.tetrad.data.ContinuousVariable;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.Variable;
import edu.cmu.tetrad.util.NumberFormatUtil;
import org.apache.commons.math3.util.FastMath;

import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Contains some normality tests.
 *
 * @author Michael Freenor
 */
class NormalityTests {

    /**
     * Constructs a readable table of normality test results
     *
     * @param dataSet  a {@link edu.cmu.tetrad.data.DataSet} object
     * @param variable a {@link edu.cmu.tetrad.data.ContinuousVariable} object
     * @return a {@link java.lang.String} object
     */
    public static String runNormalityTests(DataSet dataSet, ContinuousVariable variable) {
        NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();

        StringBuilder result = new StringBuilder("Normality Tests for: " + variable.getName() + " (sample size:" + dataSet.getNumRows() + ")");
        int lengthOfTitle = result.length();
        result.append("\n");
        for (int i = 0; i < lengthOfTitle; i++) {
            result.append("-");
        }
        result.append("\n\nKolmogorov Smirnov:\n--------------------------------\n");
        double[] ksResults = NormalityTests.kolmogorovSmirnov(dataSet, variable);
        double ksStat = FastMath.round((ksResults[0] * 10000000.0)) / 10000000.0;
        result.append("K-S Statistic: ").append(ksStat).append("\n\n");
        result.append("Significance Levels:\t.20\t.15\t.10\t.05\t.01\nK-S Critical Values:");

        result.append("\t").append(nf.format(ksResults[0])).append("\t").append(nf.format(ksResults[1])).append("\t").append(nf.format(ksResults[2])).append("\t").append(nf.format(ksResults[3])).append("\t").append(nf.format(ksResults[4])).append("\n");

        boolean testResult = false;
        String pass;
        if (ksResults[0] < ksResults[1]) testResult = true;
        if (testResult) pass = "ACCEPT";
        else pass = "FAIL";
        result.append("Test Result:\t\t").append(pass);
        testResult = ksResults[0] < ksResults[2];
        if (testResult) pass = "ACCEPT";
        else pass = "FAIL";
        result.append("\t").append(pass);
        testResult = ksResults[0] < ksResults[3];
        if (testResult) pass = "ACCEPT";
        else pass = "FAIL";
        result.append("\t").append(pass);
        testResult = ksResults[0] < ksResults[4];
        if (testResult) pass = "ACCEPT";
        else pass = "FAIL";
        result.append("\t").append(pass);
        testResult = ksResults[0] < ksResults[5];
        if (testResult) pass = "ACCEPT";
        else pass = "FAIL";
        result.append("\t").append(pass);

        result.append("\n\nH0 = ").append(variable).append(" is Normal.\n");
        result.append("(Normal if ACCEPT.)\n");

        result.append("\n\n");

        result.append("Anderson Darling Test:\n");
        result.append("---------------------\n");

        int column = dataSet.getVariables().indexOf(variable);
        double[] data = dataSet.getDoubleData().getColumn(column).toArray();
        AndersonDarlingTest andersonDarlingTest = new AndersonDarlingTest(data);
        result.append("A^2 = ").append(nf.format(andersonDarlingTest.getASquared())).append("\n");
        result.append("A^2* = ").append(nf.format(andersonDarlingTest.getASquaredStar())).append("\n");
        result.append("p = ").append(nf.format(andersonDarlingTest.getP())).append("\n");

        result.append("\nH0 = ").append(variable).append(" is Non-normal.");
        result.append("\n(Normal if p > alpha.)\n");

        return result.toString();
    }

    /**
     * Calculates the Kolmogorov-Smirnov statistics for a variable
     *
     * @param dataSet  relevant data set
     * @param variable continuous variable whose normality is in question
     * @return Kolmogorov-Smirnov statistics: index 0 is the D_n value, 1-5 are the critical values at alpha = .2, .15.
     * .10, .05, and .01 respectively.
     */
    public static double[] kolmogorovSmirnov(DataSet dataSet, ContinuousVariable variable) {
        int n = dataSet.getNumRows();
        int columnIndex = dataSet.getColumn(variable);
        Normal idealDistribution = NormalityTests.getNormal(dataSet, variable);

        double[] ks = new double[6];

        //get all critical values
        for (int i = 1; i < 6; i++) {
            ks[i] = NormalityTests.estimateKSCriticalValue(i, n);
        }

        double[] _data = dataSet.getDoubleData().getColumn(columnIndex).toArray();

        List _leaveOutMissing = new ArrayList<>();

        for (double datum : _data) {
            if (!Double.isNaN(datum)) {
                _leaveOutMissing.add(datum);
            }
        }

        double[] data = new double[_leaveOutMissing.size()];

        for (int i = 0; i < _leaveOutMissing.size(); i++) data[i] = _leaveOutMissing.get(i);

        Arrays.sort(data);

        double d = 0.0;
        for (int i = 1; i <= n; i++) {
            double x = data[i - 1];
            double idealValue = idealDistribution.cdf(x);
            double difference = FastMath.abs(idealValue - ((double) i / n));
            if (difference > d) {
                d = difference;
            }
        }

        ks[0] = d;

        return ks;
    }

    /**
     * Calculates the K-S critical value
     *
     * @param level the level at which you are rejecting or accepting the test.  Use one of the following values: 1 for
     *              alpha = .20, 2 for .15, 3 for .10, 4 for .05, 5 for .01
     * @param n     sample size
     * @return criticalValue the critical value for the given level
     */
    private static double estimateKSCriticalValue(int level, int n) {
        double criticalValue = 0.0;
        //if n <= 35, lookup from table . . .
        if (n <= 35) {
            if (n >= 20 && n < 25) n = 20;
            if (n >= 25 && n < 30) n = 25;
            if (n >= 30 && n < 35) n = 30;
            double[] table = new double[36];
            switch (level) {
                case (1):
                    table[1] = .900;
                    table[2] = .684;
                    table[3] = .565;
                    table[4] = .494;
                    table[5] = .446;
                    table[6] = .410;
                    table[7] = .381;
                    table[8] = .358;
                    table[9] = .339;
                    table[10] = .322;
                    table[11] = .307;
                    table[12] = .295;
                    table[13] = .284;
                    table[14] = .274;
                    table[15] = .266;
                    table[16] = .258;
                    table[17] = .250;
                    table[18] = .244;
                    table[19] = .237;
                    table[20] = .231;
                    table[25] = .210;
                    table[30] = .190;
                    table[35] = .180;
                    break;
                case (2):
                    table[1] = .925;
                    table[2] = .726;
                    table[3] = .597;
                    table[4] = .525;
                    table[5] = .474;
                    table[6] = .436;
                    table[7] = .405;
                    table[8] = .381;
                    table[9] = .360;
                    table[10] = .342;
                    table[11] = .326;
                    table[12] = .313;
                    table[13] = .302;
                    table[14] = .292;
                    table[15] = .283;
                    table[16] = .274;
                    table[17] = .266;
                    table[18] = .259;
                    table[19] = .252;
                    table[20] = .246;
                    table[25] = .220;
                    table[30] = .200;
                    table[35] = .190;
                    break;
                case (3):
                    table[1] = .950;
                    table[2] = .776;
                    table[3] = .642;
                    table[4] = .564;
                    table[5] = .510;
                    table[6] = .470;
                    table[7] = .438;
                    table[8] = .411;
                    table[9] = .388;
                    table[10] = .368;
                    table[11] = .352;
                    table[12] = .338;
                    table[13] = .325;
                    table[14] = .314;
                    table[15] = .304;
                    table[16] = .295;
                    table[17] = .286;
                    table[18] = .278;
                    table[19] = .272;
                    table[20] = .264;
                    table[25] = .240;
                    table[30] = .220;
                    table[35] = .210;
                    break;
                case (4):
                    table[1] = .975;
                    table[2] = .842;
                    table[3] = .708;
                    table[4] = .624;
                    table[5] = .565;
                    table[6] = .521;
                    table[7] = .486;
                    table[8] = .457;
                    table[9] = .432;
                    table[10] = .410;
                    table[11] = .391;
                    table[12] = .375;
                    table[13] = .361;
                    table[14] = .349;
                    table[15] = .338;
                    table[16] = .328;
                    table[17] = .318;
                    table[18] = .309;
                    table[19] = .301;
                    table[20] = .294;
                    table[25] = .270;
                    table[30] = .240;
                    table[35] = .230;
                    break;
                case (5):
                    table[1] = .995;
                    table[2] = .929;
                    table[3] = .828;
                    table[4] = .733;
                    table[5] = .669;
                    table[6] = .618;
                    table[7] = .577;
                    table[8] = .543;
                    table[9] = .514;
                    table[10] = .490;
                    table[11] = .468;
                    table[12] = .450;
                    table[13] = .433;
                    table[14] = .418;
                    table[15] = .404;
                    table[16] = .392;
                    table[17] = .381;
                    table[18] = .371;
                    table[19] = .363;
                    table[20] = .356;
                    table[25] = .320;
                    table[30] = .290;
                    table[35] = .270;
                    break;
            }
            criticalValue = table[n];
        }
        //else, estimate
        else {
            switch (level) {
                case (1):
                    criticalValue = 1.07 / FastMath.sqrt(n);
                    break;
                case (2):
                    criticalValue = 1.14 / FastMath.sqrt(n);
                    break;
                case (3):
                    criticalValue = 1.22 / FastMath.sqrt(n);
                    break;
                case (4):
                    criticalValue = 1.36 / FastMath.sqrt(n);
                    break;
                case (5):
                    criticalValue = 1.63 / FastMath.sqrt(n);
                    break;
            }
        }
        return criticalValue;
    }

    /**
     * Generates an ideal Normal distribution for some variable.
     *
     * @return Ideal Normal distribution for a variable.
     */

    private static Normal getNormal(DataSet dataSet, Variable variable) {
        double[] paramsForNormal = NormalityTests.normalParams(dataSet, variable);
        double mean = paramsForNormal[0];
        double sd = paramsForNormal[1];

        return new Normal(mean, sd, new MersenneTwister());
    }

    /**
     * Given some variable, returns the mean and standard deviation in indices 0 and 1 respectively.
     *
     * @return [0] -> mean, [1] -> standard deviation
     */

    private static double[] normalParams(DataSet dataSet, Variable variable) {
        int columnIndex = dataSet.getColumn(variable);
        double mean = 0.0;
        double sd = 0.0;

        //calculate the mean
        for (int i = 0; i < dataSet.getNumRows(); i++) {
            mean += dataSet.getDouble(i, columnIndex);
        }

        mean /= dataSet.getNumRows();

        //calculate the standard deviation
        for (int i = 0; i < dataSet.getNumRows(); i++) {
            sd += (dataSet.getDouble(i, columnIndex) - mean) * (dataSet.getDouble(i, columnIndex) - mean);
        }

        sd /= dataSet.getNumRows() - 1.0;
        sd = FastMath.sqrt(sd);

        double[] result = new double[2];
        result[0] = mean;
        result[1] = sd;

        return result;
    }
}







© 2015 - 2025 Weber Informatics LLC | Privacy Policy