All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetrad.search.utils.DeltaTetradTest Maven / Gradle / Ivy

There is a newer version: 7.6.5
Show newest version
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.search.utils;

import edu.cmu.tetrad.data.*;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.util.Matrix;
import org.apache.commons.math3.distribution.ChiSquaredDistribution;

import java.util.*;

/**
 * Implements a test for simultaneously zero tetrads in Bollen, K. (1990). "Outlier screening and distribution-free test
 * for vanishing tetrads." Sociological Methods and Research 19, 80-92 and Bollen and Ting, Confirmatory Tetrad
 * Analysis.
 *
 * @author josephramsey
 */
public class DeltaTetradTest {
    private final int N;
    private final ICovarianceMatrix cov;
    private final List variables;
    private final Map variablesHash;
    private DataSet dataSet;
    private double[][] data;
    private int df;
    private double chisq;

    // As input we require a data set and a list of non-redundant Tetrads.

    // Need a method to remove Tetrads from the input list until what's left is
    // non-redundant. Or at least to check for non-redundancy. Maybe simply
    // checking to see whether a matrix exception is thrown is sufficient.
    // Peter suggests looking at Modern Factor Analysis by Harmon, at triplets.

    /**
     * Constructs a test using a given data set. If a data set is provided (that is, a tabular data set), fourth moment
     * statistics can be calculated (p. 160); otherwise, it must be assumed that the data are multivariate Gaussian.
     *
     * @param dataSet The dataset to use.
     */
    public DeltaTetradTest(DataSet dataSet) {
        if (dataSet == null) {
            throw new NullPointerException();
        }

        if (!dataSet.isContinuous()) {
            throw new IllegalArgumentException();
        }

        this.cov = new CovarianceMatrix(dataSet);

        List data1 = new ArrayList<>();
        data1.add(dataSet);
        List data2 = DataTransforms.center(data1);

        this.dataSet = data2.get(0);

        this.data = this.dataSet.getDoubleData().transpose().toArray();
        this.N = dataSet.getNumRows();
        this.variables = dataSet.getVariables();

        this.variablesHash = new HashMap<>();

        for (int i = 0; i < this.variables.size(); i++) {
            this.variablesHash.put(this.variables.get(i), i);
        }
    }

    /**
     * Constructs a test using the given covariance matrix. Fourth moment statistics are not caculated; it is assumed
     * that the data are distributed as multivariate Gaussian.
     *
     * @param cov The covaraince matrix to use.
     */
    public DeltaTetradTest(ICovarianceMatrix cov) {
        if (cov == null) {
            throw new NullPointerException();
        }

        this.cov = cov;
        this.N = cov.getSampleSize();
        this.variables = cov.getVariables();

        this.variablesHash = new HashMap<>();

        for (int i = 0; i < this.variables.size(); i++) {
            this.variablesHash.put(this.variables.get(i), i);
        }
    }

    /**
     * Takes a list of tetrads for the given data set and returns the chi square value for the test. We assume that the
     * tetrads are non-redundant; if not, a matrix exception will be thrown.
     * 

* Calculates the T statistic (Bollen and Ting, p. 161). This is significant if tests as significant using the Chi * Square distribution with degrees of freedom equal to the number of nonredundant tetrads tested. * * @param tetrads The tetrads for which a chi-square is needed. */ public double calcChiSquare(Tetrad... tetrads) { this.df = tetrads.length; // Need a list of symbolic covariances--i.e. covariances that appear in tetrads. Set boldSigmaSet = new LinkedHashSet<>(); for (Tetrad tetrad : tetrads) { boldSigmaSet.add(new Sigma(tetrad.getI(), tetrad.getK())); boldSigmaSet.add(new Sigma(tetrad.getI(), tetrad.getL())); boldSigmaSet.add(new Sigma(tetrad.getJ(), tetrad.getK())); boldSigmaSet.add(new Sigma(tetrad.getJ(), tetrad.getL())); } List boldSigma = new ArrayList<>(boldSigmaSet); // Need a matrix of variances and covariances of sample covariances. Matrix sigma_ss = new Matrix(boldSigma.size(), boldSigma.size()); for (int i = 0; i < boldSigma.size(); i++) { for (int j = 0; j < boldSigma.size(); j++) { Sigma sigmaef = boldSigma.get(i); Sigma sigmagh = boldSigma.get(j); Node e = sigmaef.getA(); Node f = sigmaef.getB(); Node g = sigmagh.getA(); Node h = sigmagh.getB(); if (this.cov != null && this.cov instanceof CorrelationMatrix) { // Assumes multinormality. Using formula 23. (Not implementing formula 22 because that case // does not come up.) double rr = 0.5 * (sxy(e, f) * sxy(g, h)) * (sxy(e, g) * sxy(e, g) + sxy(e, h) * sxy(e, h) + sxy(f, g) * sxy(f, g) + sxy(f, h) * sxy(f, h)) + sxy(e, g) * sxy(f, h) + sxy(e, h) * sxy(f, g) - sxy(e, f) * (sxy(f, g) * sxy(f, h) + sxy(e, g) * sxy(e, h)) - sxy(g, h) * (sxy(f, g) * sxy(e, g) + sxy(f, h) * sxy(e, h)); sigma_ss.set(i, j, rr); } else if (this.cov != null && this.dataSet == null) { // Assumes multinormality--see p. 160. double _ss = sxy(e, g) * sxy(f, h) - sxy(e, h) * sxy(f, g); // + or -? Different advise. + in the code. sigma_ss.set(i, j, _ss); } else { double _ss = sxyzw(e, f, g, h) - sxy(e, f) * sxy(g, h); sigma_ss.set(i, j, _ss); } } } // Need a matrix of of population estimates of partial derivatives of tetrads // with respect to covariances in boldSigma.w Matrix del = new Matrix(boldSigma.size(), tetrads.length); for (int i = 0; i < boldSigma.size(); i++) { for (int j = 0; j < tetrads.length; j++) { Sigma sigma = boldSigma.get(i); Tetrad tetrad = tetrads[j]; Node e = tetrad.getI(); Node f = tetrad.getJ(); Node g = tetrad.getK(); Node h = tetrad.getL(); double derivative = getDerivative(e, f, g, h, sigma.getA(), sigma.getB()); del.set(i, j, derivative); } } // Need a vector of population estimates of the tetrads. Matrix t = new Matrix(tetrads.length, 1); for (int i = 0; i < tetrads.length; i++) { Tetrad tetrad = tetrads[i]; Node e = tetrad.getI(); Node f = tetrad.getJ(); Node g = tetrad.getK(); Node h = tetrad.getL(); double d1 = sxy(e, f); double d2 = sxy(g, h); double d3 = sxy(e, g); double d4 = sxy(f, h); double value = d1 * d2 - d3 * d4; t.set(i, 0, value); } // Now multiply to get Sigma_tt Matrix w1 = del.transpose().times(sigma_ss); Matrix sigma_tt = w1.times(del); // And now invert and multiply to get T. Matrix v0 = sigma_tt.inverse(); Matrix v1 = t.transpose().times(v0); Matrix v2 = v1.times(t); double chisq = this.N * v2.get(0, 0); this.chisq = chisq; return chisq; } /** * @return the p value for the most recent test. */ public double getPValue() { double cdf = new ChiSquaredDistribution(this.df).cumulativeProbability(this.chisq); return 1.0 - cdf; } /** * Returns a p-value for the given list of tetrad. * * @param tetrads The tetrad for which a p-vaue is needed. * @return The p-value. */ public double getPValue(Tetrad... tetrads) { calcChiSquare(tetrads); return getPValue(); } private double sxyzw(Node e, Node f, Node g, Node h) { if (this.dataSet == null) { throw new IllegalArgumentException("To calculate sxyzw, tabular data is needed."); } int x = this.variablesHash.get(e); int y = this.variablesHash.get(f); int z = this.variablesHash.get(g); int w = this.variablesHash.get(h); return getForthMoment(x, y, z, w); } private double getForthMoment(int x, int y, int z, int w) { return sxyzw(x, y, z, w); } /** * If using a covariance matrix or a correlation matrix, just returns the lookups. Otherwise calculates the * covariance. */ private double sxy(Node _node1, Node _node2) { int i = this.variablesHash.get(_node1); int j = this.variablesHash.get(_node2); if (this.cov != null) { return this.cov.getValue(i, j); } else { double[] arr1 = this.data[i]; double[] arr2 = this.data[j]; return sxy(arr1, arr2, arr1.length); } } private double getDerivative(Node node1, Node node2, Node node3, Node node4, Node a, Node b) { if (node1 == a && node2 == b) { return sxy(node3, node4); } if (node1 == b && node2 == a) { return sxy(node3, node4); } if (node3 == a && node4 == b) { return sxy(node1, node2); } if (node3 == b && node4 == a) { return sxy(node1, node2); } if (node1 == a && node3 == b) { return -sxy(node2, node4); } if (node1 == b && node3 == a) { return -sxy(node2, node4); } if (node2 == a && node4 == b) { return -sxy(node1, node3); } if (node2 == b && node4 == a) { return -sxy(node1, node3); } return 0.0; } private double sxyzw(int x, int y, int z, int w) { double sxyzw = 0.0; double[] _x = this.data[x]; double[] _y = this.data[y]; double[] _z = this.data[z]; double[] _w = this.data[w]; int N = _x.length; for (int j = 0; j < N; j++) { sxyzw += _x[j] * _y[j] * _z[j] * _w[j]; } return (1.0 / N) * sxyzw; } private double sxy(double[] array1, double[] array2, int N) { int i; double sum = 0.0; for (i = 0; i < N; i++) { sum += array1[i] * array2[i]; } return (1.0 / N) * sum; } private static class Sigma { private final Node a; private final Node b; public Sigma(Node a, Node b) { this.a = a; this.b = b; } public Node getA() { return this.a; } public Node getB() { return this.b; } public boolean equals(Object o) { if (!(o instanceof Sigma)) { throw new IllegalArgumentException(); } Sigma _o = (Sigma) o; return (_o.getA().equals(getA()) && _o.getB().equals(getB())) || (_o.getB().equals(getA()) && _o.getA().equals(getB())); } public int hashCode() { return this.a.hashCode() + this.b.hashCode(); } public String toString() { return "Sigma(" + getA() + ", " + getB() + ")"; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy