edu.cmu.tetrad.search.utils.DeltaTetradTest Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of tetrad-lib Show documentation
There is a newer version: 7.6.5
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.search.utils;

import edu.cmu.tetrad.data.*;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.util.Matrix;
import org.apache.commons.math3.distribution.ChiSquaredDistribution;

import java.util.*;

/**
 * Implements a test for simultaneously zero tetrads in Bollen, K. (1990). "Outlier screening and distribution-free test
 * for vanishing tetrads." Sociological Methods and Research 19, 80-92 and Bollen and Ting, Confirmatory Tetrad
 * Analysis.
 *
 * @author josephramsey
 */
public class DeltaTetradTest {
    private final int N;
    private final ICovarianceMatrix cov;
    private final List variables;
    private final Map variablesHash;
    private DataSet dataSet;
    private double[][] data;
    private int df;
    private double chisq;

    // As input we require a data set and a list of non-redundant Tetrads.

    // Need a method to remove Tetrads from the input list until what's left is
    // non-redundant. Or at least to check for non-redundancy. Maybe simply
    // checking to see whether a matrix exception is thrown is sufficient.
    // Peter suggests looking at Modern Factor Analysis by Harmon, at triplets.

    /**
     * Constructs a test using a given data set. If a data set is provided (that is, a tabular data set), fourth moment
     * statistics can be calculated (p. 160); otherwise, it must be assumed that the data are multivariate Gaussian.
     *
     * @param dataSet The dataset to use.
     */
    public DeltaTetradTest(DataSet dataSet) {
        if (dataSet == null) {
            throw new NullPointerException();
        }

        if (!dataSet.isContinuous()) {
            throw new IllegalArgumentException();
        }

        this.cov = new CovarianceMatrix(dataSet);

        List data1 = new ArrayList<>();
        data1.add(dataSet);
        List data2 = DataTransforms.center(data1);

        this.dataSet = data2.get(0);

        this.data = this.dataSet.getDoubleData().transpose().toArray();
        this.N = dataSet.getNumRows();
        this.variables = dataSet.getVariables();

        this.variablesHash = new HashMap<>();

        for (int i = 0; i < this.variables.size(); i++) {
            this.variablesHash.put(this.variables.get(i), i);
        }
    }

    /**
     * Constructs a test using the given covariance matrix. Fourth moment statistics are not caculated; it is assumed
     * that the data are distributed as multivariate Gaussian.
     *
     * @param cov The covaraince matrix to use.
     */
    public DeltaTetradTest(ICovarianceMatrix cov) {
        if (cov == null) {
            throw new NullPointerException();
        }

        this.cov = cov;
        this.N = cov.getSampleSize();
        this.variables = cov.getVariables();

        this.variablesHash = new HashMap<>();

        for (int i = 0; i < this.variables.size(); i++) {
            this.variablesHash.put(this.variables.get(i), i);
        }
    }

    /**
     * Takes a list of tetrads for the given data set and returns the chi square value for the test. We assume that the
     * tetrads are non-redundant; if not, a matrix exception will be thrown.
     * 
     * Calculates the T statistic (Bollen and Ting, p. 161). This is significant if tests as significant using the Chi
     * Square distribution with degrees of freedom equal to the number of nonredundant tetrads tested.
     *
     * @param tetrads The tetrads for which a chi-square is needed.
     */
    public double calcChiSquare(Tetrad... tetrads) {
        this.df = tetrads.length;

        // Need a list of symbolic covariances--i.e. covariances that appear in tetrads.
        Set boldSigmaSet = new LinkedHashSet<>();

        for (Tetrad tetrad : tetrads) {
            boldSigmaSet.add(new Sigma(tetrad.getI(), tetrad.getK()));
            boldSigmaSet.add(new Sigma(tetrad.getI(), tetrad.getL()));
            boldSigmaSet.add(new Sigma(tetrad.getJ(), tetrad.getK()));
            boldSigmaSet.add(new Sigma(tetrad.getJ(), tetrad.getL()));
        }

        List boldSigma = new ArrayList<>(boldSigmaSet);

        // Need a matrix of variances and covariances of sample covariances.
        Matrix sigma_ss = new Matrix(boldSigma.size(), boldSigma.size());

        for (int i = 0; i < boldSigma.size(); i++) {
            for (int j = 0; j < boldSigma.size(); j++) {
                Sigma sigmaef = boldSigma.get(i);
                Sigma sigmagh = boldSigma.get(j);

                Node e = sigmaef.getA();
                Node f = sigmaef.getB();
                Node g = sigmagh.getA();
                Node h = sigmagh.getB();

                if (this.cov != null && this.cov instanceof CorrelationMatrix) {

//                Assumes multinormality. Using formula 23. (Not implementing formula 22 because that case
//                does not come up.)
                    double rr = 0.5 * (sxy(e, f) * sxy(g, h))
                            * (sxy(e, g) * sxy(e, g) + sxy(e, h) * sxy(e, h) + sxy(f, g) * sxy(f, g) + sxy(f, h) * sxy(f, h))
                            + sxy(e, g) * sxy(f, h) + sxy(e, h) * sxy(f, g)
                            - sxy(e, f) * (sxy(f, g) * sxy(f, h) + sxy(e, g) * sxy(e, h))
                            - sxy(g, h) * (sxy(f, g) * sxy(e, g) + sxy(f, h) * sxy(e, h));

                    sigma_ss.set(i, j, rr);
                } else if (this.cov != null && this.dataSet == null) {

                    // Assumes multinormality--see p. 160.
                    double _ss = sxy(e, g) * sxy(f, h) - sxy(e, h) * sxy(f, g);   // + or -? Different advise. + in the code.
                    sigma_ss.set(i, j, _ss);
                } else {
                    double _ss = sxyzw(e, f, g, h) - sxy(e, f) * sxy(g, h);
                    sigma_ss.set(i, j, _ss);
                }
            }
        }

        // Need a matrix of of population estimates of partial derivatives of tetrads
        // with respect to covariances in boldSigma.w
        Matrix del = new Matrix(boldSigma.size(), tetrads.length);

        for (int i = 0; i < boldSigma.size(); i++) {
            for (int j = 0; j < tetrads.length; j++) {
                Sigma sigma = boldSigma.get(i);
                Tetrad tetrad = tetrads[j];

                Node e = tetrad.getI();
                Node f = tetrad.getJ();
                Node g = tetrad.getK();
                Node h = tetrad.getL();

                double derivative = getDerivative(e, f, g, h, sigma.getA(), sigma.getB());
                del.set(i, j, derivative);
            }
        }

        // Need a vector of population estimates of the tetrads.
        Matrix t = new Matrix(tetrads.length, 1);

        for (int i = 0; i < tetrads.length; i++) {
            Tetrad tetrad = tetrads[i];

            Node e = tetrad.getI();
            Node f = tetrad.getJ();
            Node g = tetrad.getK();
            Node h = tetrad.getL();

            double d1 = sxy(e, f);
            double d2 = sxy(g, h);
            double d3 = sxy(e, g);
            double d4 = sxy(f, h);

            double value = d1 * d2 - d3 * d4;
            t.set(i, 0, value);
        }

        // Now multiply to get Sigma_tt
        Matrix w1 = del.transpose().times(sigma_ss);
        Matrix sigma_tt = w1.times(del);

        // And now invert and multiply to get T.
        Matrix v0 = sigma_tt.inverse();
        Matrix v1 = t.transpose().times(v0);
        Matrix v2 = v1.times(t);
        double chisq = this.N * v2.get(0, 0);

        this.chisq = chisq;

        return chisq;
    }

    /**
     * @return the p value for the most recent test.
     */
    public double getPValue() {
        double cdf = new ChiSquaredDistribution(this.df).cumulativeProbability(this.chisq);
        return 1.0 - cdf;
    }

    /**
     * Returns a p-value for the given list of tetrad.
     *
     * @param tetrads The tetrad for which a p-vaue is needed.
     * @return The p-value.
     */
    public double getPValue(Tetrad... tetrads) {
        calcChiSquare(tetrads);
        return getPValue();
    }

    private double sxyzw(Node e, Node f, Node g, Node h) {
        if (this.dataSet == null) {
            throw new IllegalArgumentException("To calculate sxyzw, tabular data is needed.");
        }

        int x = this.variablesHash.get(e);
        int y = this.variablesHash.get(f);
        int z = this.variablesHash.get(g);
        int w = this.variablesHash.get(h);

        return getForthMoment(x, y, z, w);
    }

    private double getForthMoment(int x, int y, int z, int w) {
        return sxyzw(x, y, z, w);
    }

    /**
     * If using a covariance matrix or a correlation matrix, just returns the lookups. Otherwise calculates the
     * covariance.
     */
    private double sxy(Node _node1, Node _node2) {
        int i = this.variablesHash.get(_node1);
        int j = this.variablesHash.get(_node2);

        if (this.cov != null) {
            return this.cov.getValue(i, j);
        } else {
            double[] arr1 = this.data[i];
            double[] arr2 = this.data[j];
            return sxy(arr1, arr2, arr1.length);
        }
    }

    private double getDerivative(Node node1, Node node2, Node node3, Node node4, Node a, Node b) {
        if (node1 == a && node2 == b) {
            return sxy(node3, node4);
        }

        if (node1 == b && node2 == a) {
            return sxy(node3, node4);
        }

        if (node3 == a && node4 == b) {
            return sxy(node1, node2);
        }

        if (node3 == b && node4 == a) {
            return sxy(node1, node2);
        }

        if (node1 == a && node3 == b) {
            return -sxy(node2, node4);
        }

        if (node1 == b && node3 == a) {
            return -sxy(node2, node4);
        }

        if (node2 == a && node4 == b) {
            return -sxy(node1, node3);
        }

        if (node2 == b && node4 == a) {
            return -sxy(node1, node3);
        }

        return 0.0;
    }

    private double sxyzw(int x, int y, int z, int w) {
        double sxyzw = 0.0;

        double[] _x = this.data[x];
        double[] _y = this.data[y];
        double[] _z = this.data[z];
        double[] _w = this.data[w];

        int N = _x.length;

        for (int j = 0; j < N; j++) {
            sxyzw += _x[j] * _y[j] * _z[j] * _w[j];
        }

        return (1.0 / N) * sxyzw;
    }

    private double sxy(double[] array1, double[] array2, int N) {
        int i;
        double sum = 0.0;

        for (i = 0; i < N; i++) {
            sum += array1[i] * array2[i];
        }

        return (1.0 / N) * sum;
    }

    private static class Sigma {
        private final Node a;
        private final Node b;

        public Sigma(Node a, Node b) {
            this.a = a;
            this.b = b;
        }

        public Node getA() {
            return this.a;
        }

        public Node getB() {
            return this.b;
        }

        public boolean equals(Object o) {
            if (!(o instanceof Sigma)) {
                throw new IllegalArgumentException();
            }

            Sigma _o = (Sigma) o;
            return (_o.getA().equals(getA()) && _o.getB().equals(getB())) || (_o.getB().equals(getA()) && _o.getA().equals(getB()));
        }

        public int hashCode() {
            return this.a.hashCode() + this.b.hashCode();
        }

        public String toString() {
            return "Sigma(" + getA() + ", " + getB() + ")";
        }
    }
}