edu.cmu.tetrad.search.FaskOrig Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of tetrad-lib Show documentation
The newest version!
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.search;

import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.DataTransforms;
import edu.cmu.tetrad.data.Knowledge;
import edu.cmu.tetrad.graph.*;
import edu.cmu.tetrad.regression.RegressionDataset;
import edu.cmu.tetrad.regression.RegressionResult;
import edu.cmu.tetrad.search.score.Score;
import edu.cmu.tetrad.search.utils.GraphSearchUtils;
import edu.cmu.tetrad.util.*;
import org.apache.commons.math3.linear.SingularMatrixException;
import org.apache.commons.math3.util.FastMath;

import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static edu.cmu.tetrad.util.StatUtils.*;
import static org.apache.commons.math3.util.FastMath.*;

/**
 * Implements the FASK (Fast Adjacency Skewness) algorithm, which makes decisions for adjacency and orientation using a
 * combination of conditional independence testing, judgments of nonlinear adjacency, and pairwise orientation due to
 * non-Gaussianity. The reference is this:
 * 
 * Sanchez-Romero, R., Ramsey, J. D., Zhang, K., Glymour, M. R., Huang, B., and Glymour, C. (2019). Estimating
 * feedforward and feedback effective connections from fMRI time series: Assessments of statistical methods. Network
 * Neuroscience, 3(2), 274-30
 * 

 * Some adjustments have been made in some ways from that version, and some additional pairwise options have been added
 * from this reference:
 * 

 * Hyvärinen, A., and Smith, S. M. (2013). Pairwise likelihood ratios for estimation of non-Gaussian structural equation
 * models. Journal of Machine Learning Research, 14(Jan), 111-152.
 * 

 * This method (and the Hyvarinen and Smith methods) make the assumption that the data are generated by a linear,
 * non-Gaussian causal process and attempts to recover the causal graph for that process. They do not attempt to recover
 * the parametrization of this graph; for this a separate estimation algorithm would be needed, such as linear
 * regression regressing each node onto its parents. A further assumption is made, that there are no latent common
 * causes of the algorithm. This is not a constraint on the pairwise orientation methods, since they orient with respect
 * only to the two variables at the endpoints of an edge and so are happy with all other variables being considered
 * latent with respect to that single edge. However, if the built-in adjacency search is used (FAS-Stable), the
 * existence of latents will throw this method off.
 * 

 * As was shown in the Hyvarinen and Smith paper above, FASK works quite well even if the graph contains feedback loops
 * in most configurations, including 2-cycles. 2-cycles can be detected fairly well if the FASK left-right rule is
 * selected and the 2-cycle threshold set to about 0.1--more will be detected (or hallucinated) if the threshold is set
 * higher. As shown in the Sanchez-Romero reference above, 2-cycle detection of the FASK algorithm using this rule is
 * quite good.
 * 

 * Some edges may be undiscoverable by FAS-Stable; to recover more of these edges, a test related to the FASK left-right
 * rule is used, and there is a threshold for this test. A good default for this threshold (the "skew edge threshold")
 * is 0.3. For more of these edges, set this threshold to a lower number.
 * 

 * It is assumed that the data are arranged so each variable forms a column and that there are no missing values. The
 * data matrix is assumed to be rectangular. To this end, the Tetrad DataSet class is used, which enforces this.
 * 

 * Note that orienting a DAG for a linear, non-Gaussian model using the Hyvarinen and Smith pairwise rules is
 * alternatively known in the literature as Pairwise LiNGAM--see Hyvärinen, A., and Smith, S. M. (2013). Pairwise
 * likelihood ratios for estimation of non-Gaussian structural equation models. Journal of Machine Learning Research,
 * 14(Jan), 111-152. We include some of these methods here for comparison.
 * 

 * Parameters:
 * 

 * faskAdjacencyMethod: 1 # this run FAS-Stable (the one used in the paper). See Algorithm 2.
 * 

 * depth: -1. # control the size of the conditional set in the independence tests, setting this to a small integer may
 * reduce the running time, but can also result in false positives. -1 means that it will check "all" possible sizes.
 * 

 * test: sem-bic-test # test for FAS adjacency
 * 

 * score: sem-bic-score
 * 

 * semBicRule: 1 # to set the Chickering Rule, used in the original Fask
 * 

 * penaltyDiscount: 2 # if using sem-bic as independence test (as in the paper). In the paper this is referred as c.
 * Check step 1 and 10 in Algorithm 2 FAS stable.
 * 

 * skewEdgeThreshold: 0.3 # See description of Fask algorithm, and step 11 in Algorithm 1 FASK. Threshold to add edges
 * that may have been non-inferred because there was a positive/negative cycle that result in a non-zero observed
 * relation.
 * 

 * faskLeftRightRule: 1 # this run FASK v1, the original FASK from the paper
 * 

 * faskDelta: -0.3 # See step 1 and 11 in Algorithm 4 (this is the value set in the paper)
 * 

 * twoCycleScreeningThreshold: 0 # not used in the original paper implementation. Added afterwards. You can set it to
 * 0.3, for example, to use it as a filter to run Algorithm 3 2-cycle detection, which may take some time to run.
 * 

 * orientationAlpha: 0.1 # this was referred in the paper as TwoCycle Alpha or just alpha, the lower it is, the lower
 * the chance of inferring a two cycle. Check steps 17 to 28 in Algorithm 3: 2 Cycle Detection Rule.
 * 

 * structurePrior: 0 # prior on the number of parents. Not used in the paper implementation.
 * 

 * So a run of command line would look like this:
 * 

 * java -jar -Xmx10G causal-cmd-1.4.1-jar-with-dependencies.jar --delimiter tab --data-type continuous --dataset
 * concat_BOLDfslfilter_60_FullMacaque.txt --prefix Fask_Test_MacaqueFull --algorithm fask --faskAdjacencyMethod 1
 * --depth -1 --test sem-bic-test --score sem-bic-score --semBicRule 1 --penaltyDiscount 2 --skewEdgeThreshold 0.3
 * --faskLeftRightRule 1 --faskDelta -0.3 --twoCycleScreeningThreshold 0 --orientationAlpha 0.1 -structurePrior 0
 * 

 * This class is configured to respect knowledge of forbidden and required edges, including knowledge of temporal
 * tiers.
 * 

 * This is the code before cleaning it up on 2024-5-16.
 *
 * @author josephramsey
 * @author rubensanchez
 * @version $Id: $Id
 * @see Knowledge
 * @see Lofs
 */
public final class FaskOrig implements IGraphSearch {


    /**
     * The score to be used for the FAS adjacency search.
     */
    private final IndependenceTest test;
    /**
     * Represents a Score.
     */
    private final Score score;
    /**
     * The data sets being analyzed. They must all have the same variables and the same number of records.
     */
    private final DataSet dataSet;
    /**
     * Used for calculating coefficient values.
     */
    private final RegressionDataset regressionDataset;
    /**
     * Represents the data as a double[][] array, with D[i] beimg the data for variable i.
     */
    private double[][] D;
    /**
     * An initial graph to constrain the adjacency step.
     */
    private Graph externalGraph;
    /**
     * Elapsed time of the search, in milliseconds.
     */
    private long elapsed;
    /**
     * For the Fast Adjacency Search, the maximum number of edges in a conditioning set.
     */
    private int depth = -1;
    /**
     * Knowledge the search will obey, of forbidden and required edges.
     */
    private Knowledge knowledge = new Knowledge();
    /**
     * A threshold for including extra adjacencies due to skewness. Default is 0.3. For more edges, lower this
     * threshold.
     */
    private double skewEdgeThreshold;
    /**
     * A threshold for making 2-cycles. Default is 0 (no 2-cycles.) Note that the 2-cycle rule will only work with the
     * FASK left-right rule. Default is 0; a good value for finding a decent set of 2-cycles is 0.1.
     */
    private double twoCycleScreeningCutoff;
    /**
     * At the end of the procedure, two cycles marked in the graph (for having small LR differences) are then tested
     * statistically to see if they are two-cycles, using this cutoff. To adjust this cutoff, set the two-cycle alpha to
     * a number in [0, 1]. The default alpha  is 0.01.
     */
    private double orientationCutoff;
    /**
     * The corresponding alpha.
     */
    private double orientationAlpha;
    /**
     * Bias for orienting with negative coefficients.
     */
    private double delta;
    /**
     * Whether X and Y should be adjusted for skewness. (Otherwise, they are assumed to have positive skewness.)
     */
    private boolean empirical = true;
    /**
     * By default, FAS Stable will be used for adjacencies, though this can be set.
     */
    private AdjacencyMethod adjacencyMethod = AdjacencyMethod.GRASP;
    /**
     * The left right rule to use, default FASK.
     */
    private LeftRight leftRight = LeftRight.RSKEW;
    /**
     * The graph resulting from search.
     */
    private Graph graph;
    /**
     * Represents the seed used to initialize the random number generator in the search algorithm. The seed determines
     * the sequence of random numbers generated during the search. By setting a specific seed, you can reproduce the
     * same sequence of random numbers for every run of the algorithm.
     */
    private long seed = -1;
    /**
     * Determines if verbose mode is enabled or disabled.
     */
    private boolean verbose = false;

    /**
     * Constructor.
     *
     * @param dataSet A continuous dataset over variables V.
     * @param test    An independence test over variables V. (Used for FAS.)
     * @param score   a {@link edu.cmu.tetrad.search.score.Score} object
     */
    public FaskOrig(DataSet dataSet, Score score, IndependenceTest test) {
        if (dataSet == null) {
            throw new NullPointerException("Data set not provided.");
        }

        if (!dataSet.isContinuous()) {
            throw new IllegalArgumentException("For FASK, the dataset must be entirely continuous");
        }

        this.dataSet = dataSet;
        this.test = test;
        this.score = score;

        this.regressionDataset = new RegressionDataset(dataSet);
        this.orientationCutoff = getZForAlpha(0.01);
        this.orientationAlpha = 0.01;
    }


    /**
     * Calculates the left-right judgment for two arrays of double values. This is for version 2.
     *
     * @param x         The data for the first variable.
     * @param y         The data for the second variable.
     * @param empirical Whether to use an empirical judgment.
     * @param delta     The delta value for the judgment.
     * @return The left-right judgment, which is negative if x < y, positive if x $gt; y, and 0 if indeterminate.
     */
    public static double faskLeftRightV2(double[] x, double[] y, boolean empirical, double delta) {
        double sx = skewness(x);
        double sy = skewness(y);
        double r = correlation(x, y);
        double lr = FaskOrig.correxp(x, y, x) - FaskOrig.correxp(x, y, y);

        if (empirical) {
            lr *= signum(sx) * signum(sy);
        }

        if (r < delta) {
            lr *= -1;
        }

        return lr;
    }

    /**
     * Calculates the left-right ratio using the Fask method version 1.
     *
     * @param x         the array of values for variable x
     * @param y         the array of values for variable y
     * @param empirical if true, applies empirical correction to the correlation coefficient
     * @param delta     the threshold value for determining the sign of the left-right ratio
     * @return the left-right ratio
     */
    public static double faskLeftRightV1(double[] x, double[] y, boolean empirical, double delta) {
        double left = FaskOrig.cu(x, y, x) / (sqrt(FaskOrig.cu(x, x, x) * FaskOrig.cu(y, y, x)));
        double right = FaskOrig.cu(x, y, y) / (sqrt(FaskOrig.cu(x, x, y) * FaskOrig.cu(y, y, y)));
        double lr = left - right;

        double r = correlation(x, y);
        double sx = skewness(x);
        double sy = skewness(y);

        if (empirical) {
            r *= signum(sx) * signum(sy);
        }

        lr *= signum(r);
        if (r < delta) lr *= -1;

        return lr;
    }

    /**
     * Calculates a left-right judgment using the robust skewness between two arrays of double values.
     *
     * @param x         The data for the first variable.
     * @param y         The data for the second variable.
     * @param empirical Whether to use an empirical correction to the skewness.
     * @return The robust skewness between the two arrays.
     */
    public static double robustSkew(double[] x, double[] y, boolean empirical) {

        if (empirical) {
            x = correctSkewness(x, skewness(x));
            y = correctSkewness(y, skewness(y));
        }

        double[] lr = new double[x.length];

        for (int i = 0; i < x.length; i++) {
            lr[i] = g(x[i]) * y[i] - x[i] * g(y[i]);
        }

        return correlation(x, y) * mean(lr);
    }

    /**
     * Calculates a left-right judgument using the skewness of two arrays of double values.
     *
     * @param x         the first array of double values
     * @param y         the second array of double values
     * @param empirical flag to indicate whether to apply empirical correction for skewness
     * @return the skewness of the two arrays
     */
    public static double skew(double[] x, double[] y, boolean empirical) {

        if (empirical) {
            x = correctSkewness(x, skewness(x));
            y = correctSkewness(y, skewness(y));
        }

        double[] lr = new double[x.length];

        for (int i = 0; i < x.length; i++) {
            lr[i] = x[i] * x[i] * y[i] - x[i] * y[i] * y[i];
        }

        return correlation(x, y) * mean(lr);
    }

    /**
     * Calculates the logarithm of the hyperbolic cosine of the maximum of x and 0.
     *
     * @param x The input value.
     * @return The result of the calculation.
     */
    public static double g(double x) {
        return log(cosh(FastMath.max(x, 0)));
    }

    /**
     * Corrects the skewness of the given data using the provided skewness value.
     *
     * @param data The array of data to be corrected.
     * @param sk   The skewness value to be used for correction.
     * @return The corrected data array.
     */
    private static double[] correctSkewness(double[] data, double sk) {
        double[] data2 = new double[data.length];
        for (int i = 0; i < data.length; i++) data2[i] = data[i] * signum(sk);
        return data2;
    }

    /**
     * Calculates the conditional mean of the product of corresponding elements from two arrays based on a given
     * condition.
     *
     * @param x         an array of doubles to be multiplied
     * @param y         an array of doubles to be multiplied
     * @param condition an array of doubles representing the condition for multiplication
     * @return the conditional mean of the product of corresponding elements from x and y based on the given condition
     */
    private static double cu(double[] x, double[] y, double[] condition) {
        double exy = 0.0;

        int n = 0;

        for (int k = 0; k < x.length; k++) {
            if (condition[k] > 0) {
                exy += x[k] * y[k];
                n++;
            }
        }

        return exy / n;
    }

    /**
     * Returns E(XY | Z > 0); Z is typically either X or Y.
     *
     * @param x an array of double values
     * @param y an array of double values
     * @param z an array of double values
     * @return the expected value of the product of elements in x and y, considering only the elements in z that are
     * greater than zero
     */
    private static double E(double[] x, double[] y, double[] z) {
        double exy = 0.0;
        int n = 0;

        for (int k = 0; k < x.length; k++) {
            if (z[k] > 0) {
                exy += x[k] * y[k];
                n++;
            }
        }

        return exy / n;
    }

    /**
     * Returns E(XY | Z > 0) / sqrt(E(XX | Z > 0) * E(YY | Z > 0)). Z is typically either X or Y.
     *
     * @param x an array of double values representing the first set of data
     * @param y an array of double values representing the second set of data
     * @param z an array of double values representing the third set of data
     * @return the correlation coefficient between the three sets of data
     */
    private static double correxp(double[] x, double[] y, double[] z) {
        return FaskOrig.E(x, y, z) / sqrt(FaskOrig.E(x, x, z) * FaskOrig.E(y, y, z));
    }

    /**
     * Runs the search on the concatenated data, returning a graph, possibly cyclic, possibly with two-cycles. Runs the
     * fast adjacency search (FAS, Spirtes et al., 2000) followed by a modification of the robust skew rule (Pairwise
     * Likelihood Ratios for Estimation of Non-Gaussian Structural Equation Models, Smith and Hyvarinen), together with
     * some heuristics for orienting two-cycles.
     *
     * @return the graph. Some edges may be undirected (though it shouldn't be many in most cases) and some adjacencies
     * may be two-cycles.
     */
    public Graph search() {
        long start = MillisecondTimes.timeMillis();
        NumberFormat nf = new DecimalFormat("0.000");

        DataSet dataSet = DataTransforms.standardizeData(this.dataSet);

        List variables = dataSet.getVariables();
        double[][] lrs = getLrScores(); // Sets D.

        for (int i = 0; i < variables.size(); i++) {
            System.out.println("Skewness of " + variables.get(i) + " = " + skewness(this.D[i]));
        }

        TetradLogger.getInstance().log("FASK v. 2.0");
        TetradLogger.getInstance().log("");
        TetradLogger.getInstance().log("# variables = " + dataSet.getNumColumns());
        TetradLogger.getInstance().log("N = " + dataSet.getNumRows());
        TetradLogger.getInstance().log("Skewness edge threshold = " + this.skewEdgeThreshold);
        TetradLogger.getInstance().log("Orientation Alpha = " + this.orientationAlpha);
        TetradLogger.getInstance().log("2-cycle threshold = " + this.twoCycleScreeningCutoff);
        TetradLogger.getInstance().log("");

        Graph G;

        if (this.adjacencyMethod == AdjacencyMethod.BOSS) {
            PermutationSearch fas = new PermutationSearch(new Boss(this.score));
            fas.setSeed(seed);
            fas.setKnowledge(this.knowledge);
            G = fas.search();
        } else if (this.adjacencyMethod == AdjacencyMethod.GRASP) {
            Grasp fas = new Grasp(this.score);
            fas.setSeed(seed);
            fas.setDepth(5);
            fas.setNonSingularDepth(1);
            fas.setUncoveredDepth(1);
            fas.setNumStarts(5);
            fas.setAllowInternalRandomness(true);
            fas.setUseDataOrder(false);
            fas.setKnowledge(this.knowledge);
            fas.bestOrder(dataSet.getVariables());
            G = fas.getGraph(true);
        } else if (this.adjacencyMethod == AdjacencyMethod.FAS_STABLE) {
            Fas fas = new Fas(this.test);
            fas.setStable(true);
            fas.setVerbose(false);
            fas.setKnowledge(this.knowledge);
            G = fas.search();
        } else if (this.adjacencyMethod == AdjacencyMethod.FGES) {
            Fges fas = new Fges(this.score);
            fas.setVerbose(false);
            fas.setKnowledge(this.knowledge);
            G = fas.search();
        } else if (this.adjacencyMethod == AdjacencyMethod.EXTERNAL_GRAPH) {
            if (this.externalGraph == null) throw new IllegalStateException("An external graph was not supplied.");

            Graph g1 = new EdgeListGraph(this.externalGraph.getNodes());

            for (Edge edge : this.externalGraph.getEdges()) {
                Node x = edge.getNode1();
                Node y = edge.getNode2();

                if (!g1.isAdjacentTo(x, y)) g1.addUndirectedEdge(x, y);
            }

            g1 = GraphUtils.replaceNodes(g1, dataSet.getVariables());

            G = g1;
        } else if (this.adjacencyMethod == AdjacencyMethod.NONE) {
            G = new EdgeListGraph(variables);
        } else {
            throw new IllegalStateException("That method was not configured: " + this.adjacencyMethod);
        }

        G = GraphUtils.replaceNodes(G, dataSet.getVariables());

        TetradLogger.getInstance().log("");

        GraphSearchUtils.pcOrientbk(this.knowledge, G, G.getNodes(), verbose);

        Graph graph = new EdgeListGraph(G.getNodes());

        TetradLogger.getInstance().log("X\tY\tMethod\tLR\tEdge");

        int V = variables.size();

        List possibleTwoCycles = new ArrayList<>();

        for (int i = 0; i < V; i++) {
            for (int j = i + 1; j < V; j++) {
                Node X = variables.get(i);
                Node Y = variables.get(j);

                // Centered
                double[] x = this.D[i];
                double[] y = this.D[j];

                double cx = FaskOrig.correxp(x, y, x);
                double cy = FaskOrig.correxp(x, y, y);

                if (G.isAdjacentTo(X, Y) || (abs(cx - cy) > this.skewEdgeThreshold)) {
                    double lr = lrs[i][j];// leftRight(x, y);

                    if (edgeForbiddenByKnowledge(X, Y) && edgeForbiddenByKnowledge(Y, X)) {
                        TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge_forbidden"
                                                       + "\t" + nf.format(lr)
                                                       + "\t" + X + "<->" + Y
                        );
                        continue;
                    }

                    if (knowledgeOrients(X, Y)) {
                        TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge"
                                                       + "\t" + nf.format(lr)
                                                       + "\t" + X + "-->" + Y
                        );
                        graph.addDirectedEdge(X, Y);
                    } else if (knowledgeOrients(Y, X)) {
                        TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge"
                                                       + "\t" + nf.format(lr)
                                                       + "\t" + X + "<--" + Y
                        );
                        graph.addDirectedEdge(Y, X);
                    } else {
                        if (passesTwoCycleScreening(x, y)) {
                            if (this.twoCycleScreeningCutoff != 0) {
                                TetradLogger.getInstance().log(X + "\t" + Y + "\t2-cycle Prescreen"
                                                               + "\t" + nf.format(lr)
                                                               + "\t" + X + "...TC?..." + Y
                                );
                            }

                            possibleTwoCycles.add(new NodePair(X, Y));
                        }

                        if (lr > 0) {
                            TetradLogger.getInstance().log(X + "\t" + Y + "\tleft-right"
                                                           + "\t" + nf.format(lr)
                                                           + "\t" + X + "-->" + Y
                            );
                            graph.addDirectedEdge(X, Y);
                        } else if (lr < 0) {
                            TetradLogger.getInstance().log(Y + "\t" + X + "\tleft-right"
                                                           + "\t" + nf.format(lr)
                                                           + "\t" + Y + "-->" + X
                            );
                            graph.addDirectedEdge(Y, X);
                        }
                    }
                }
            }
        }

        if (this.orientationAlpha == 0) {
            for (NodePair edge : possibleTwoCycles) {
                Node X = edge.getFirst();
                Node Y = edge.getSecond();

                graph.removeEdges(X, Y);
                graph.addDirectedEdge(X, Y);
                graph.addDirectedEdge(Y, X);
                logTwoCycle(nf, variables, this.D, X, Y, "2-cycle Pre-screen");
            }
        } else if (this.orientationAlpha > 0) {
            for (NodePair edge : possibleTwoCycles) {
                Node X = edge.getFirst();
                Node Y = edge.getSecond();

                int i = variables.indexOf(X);
                int j = variables.indexOf(Y);

                if (twoCycleTest(i, j, this.D, graph, variables)) {
                    graph.removeEdges(X, Y);
                    graph.addDirectedEdge(X, Y);
                    graph.addDirectedEdge(Y, X);
                    logTwoCycle(nf, variables, this.D, X, Y, "2-cycle");
                }
            }
        }

        long stop = MillisecondTimes.timeMillis();
        this.elapsed = stop - start;

        this.graph = graph;

        return graph;
    }

    private boolean passesTwoCycleScreening(double[] x, double[] y) {
        if (this.twoCycleScreeningCutoff == 0) return true;

        if (this.leftRight == LeftRight.FASK1) {
            return abs(faskLeftRightV1(x, y, empirical, delta)) < this.twoCycleScreeningCutoff;
        } else {
            return abs(faskLeftRightV2(x, y, empirical, delta)) < this.twoCycleScreeningCutoff;
        }
    }

    /**
     * Returns the coefficient matrix for the search. If the search has not yet run, runs it, then estimates
     * coefficients of each node given its parents using linear regression and forms the B matrix of coefficients from
     * these estimates. B[i][j] != 0 means i->j with that coefficient.
     *
     * @return This matrix as a double[][] array.
     */
    public double[][] getB() {
        if (this.graph == null) search();

        List nodes = this.dataSet.getVariables();
        double[][] B = new double[nodes.size()][nodes.size()];

        for (int j = 0; j < nodes.size(); j++) {
            Node y = nodes.get(j);

            List pary = new ArrayList<>(this.graph.getParents(y));
            RegressionResult result = this.regressionDataset.regress(y, pary);
            double[] coef = result.getCoef();

            for (int i = 0; i < pary.size(); i++) {
                B[nodes.indexOf(pary.get(i))][j] = coef[i + 1];
            }
        }

        return B;
    }

    /**
     * Returns a matrix of left-right scores for the search. If lr = getLrScores(), then lr[i][j] is the left right
     * scores leftRight(data[i], data[j]);
     *
     * @return This matrix as a double[][] array.
     */
    public double[][] getLrScores() {
        List variables = this.dataSet.getVariables();
        double[][] D = DataTransforms.standardizeData(this.dataSet).getDoubleData().transpose().toArray();

        double[][] lr = new double[variables.size()][variables.size()];

        for (int i = 0; i < variables.size(); i++) {
            for (int j = 0; j < variables.size(); j++) {
                lr[i][j] = leftRight(D[i], D[j]);
            }
        }

        this.D = D;

        return lr;
    }

    /**
     * 
Getter for the field depth.
     *
     * @return The depth of search for the Fast Adjacency Search (FAS).
     */
    public int getDepth() {
        return this.depth;
    }

    /**
     * Setter for the field depth.
     *
     * @param depth The depth of search for the Fast Adjacency Search (S). The default is -1. Unlimited. Making this too
     *              high may result in statistical errors.
     */
    public void setDepth(int depth) {
        this.depth = depth;
    }

    /**
     * getElapsedTime.
     *
     * @return The elapsed time in milliseconds.
     */
    public long getElapsedTime() {
        return this.elapsed;
    }

    /**
     * Getter for the field knowledge.
     *
     * @return the current knowledge.
     */
    public Knowledge getKnowledge() {
        return this.knowledge;
    }

    /**
     * Setter for the field knowledge.
     *
     * @param knowledge Knowledge of forbidden and required edges.
     */
    public void setKnowledge(Knowledge knowledge) {
        this.knowledge = new Knowledge(knowledge);
    }

    /**
     * Sets the external graph to use. This graph will be used as a set of adjacencies to be included in the graph is
     * the "external graph" options is selected. It doesn't matter what the orientations of the graph are; the graph
     * will be reoriented using the left-right rule selected.
     *
     * @param externalGraph This graph.
     */
    public void setExternalGraph(Graph externalGraph) {
        this.externalGraph = externalGraph;
    }

    /**
     * Sets the skew-edge threshold.
     *
     * @param skewEdgeThreshold This threshold.
     */
    public void setSkewEdgeThreshold(double skewEdgeThreshold) {
        this.skewEdgeThreshold = skewEdgeThreshold;
    }

    /**
     * Sets the cutoff for two-cycle screening.
     *
     * @param twoCycleScreeningCutoff This cutoff.
     */
    public void setTwoCycleScreeningCutoff(double twoCycleScreeningCutoff) {
        if (twoCycleScreeningCutoff < 0)
            throw new IllegalStateException("Two cycle screening threshold must be >= 0");
        this.twoCycleScreeningCutoff = twoCycleScreeningCutoff;
    }

    /**
     * Sets the orientation alpha.
     *
     * @param orientationAlpha This alpha.
     */
    public void setOrientationAlpha(double orientationAlpha) {
        if (orientationAlpha < 0 || orientationAlpha > 1)
            throw new IllegalArgumentException("Two cycle testing alpha should be in [0, 1].");
        this.orientationCutoff = getZForAlpha(orientationAlpha);
        this.orientationAlpha = orientationAlpha;
    }

    /**
     * Sets the left-right rule used
     *
     * @param leftRight This rule.
     * @see LeftRight
     */
    public void setLeftRight(LeftRight leftRight) {
        this.leftRight = leftRight;
    }

    /**
     * Sets the adjacency method used.
     *
     * @param adjacencyMethod This method.
     * @see AdjacencyMethod
     */
    public void setAdjacencyMethod(AdjacencyMethod adjacencyMethod) {
        this.adjacencyMethod = adjacencyMethod;
    }

    /**
     * Sets the delta to use.
     *
     * @param delta This delta.
     */
    public void setDelta(double delta) {
        this.delta = delta;
    }

    /**
     * Sets whether the empirical option is selected.
     *
     * @param empirical True, if so.
     */
    public void setEmpirical(boolean empirical) {
        this.empirical = empirical;
    }

    /**
     * A left/right judgment for double[] arrays (data) as input.
     *
     * @param x The data for the first variable.
     * @param y The data for the second variable.
     * @return The left-right judgment, which is negative if X<-Y, positive if X->Y, and 0 if indeterminate.
     */
    public double leftRight(double[] x, double[] y) {
        if (this.leftRight == LeftRight.FASK1) {
            return faskLeftRightV1(x, y, empirical, delta);
        } else if (this.leftRight == LeftRight.FASK2) {
            return faskLeftRightV2(x, y, empirical, delta);
        } else if (this.leftRight == LeftRight.RSKEW) {
            return robustSkew(x, y, empirical);
        } else if (this.leftRight == LeftRight.SKEW) {
            return skew(x, y, empirical);
        } else if (this.leftRight == LeftRight.TANH) {
            return tanh(x, y, empirical);
        }

        throw new IllegalStateException("Left right rule not configured: " + this.leftRight);
    }

    /**
     * Calculates a left-right judgment using the hyperbolic tangent of each element in the given arrays and performs a
     * computation combining these results.
     *
     * @param x         an array of doubles
     * @param y         an array of doubles
     * @param empirical flag indicating whether empirical correction should be applied to the input arrays
     * @return the final result of the computation
     */
    private double tanh(double[] x, double[] y, boolean empirical) {

        if (empirical) {
            x = correctSkewness(x, skewness(x));
            y = correctSkewness(y, skewness(y));
        }

        double[] lr = new double[x.length];

        for (int i = 0; i < x.length; i++) {
            lr[i] = x[i] * FastMath.tanh(y[i]) - FastMath.tanh(x[i]) * y[i];
        }

        return correlation(x, y) * mean(lr);
    }

    /**
     * Determines if the knowledge orients the nodes X and Y.
     *
     * @param X The first node.
     * @param Y The second node.
     * @return true if the knowledge forbids the orientation of Y towards X, or if X is required by Y; false otherwise.
     */
    private boolean knowledgeOrients(Node X, Node Y) {
        return this.knowledge.isForbidden(Y.getName(), X.getName()) || this.knowledge.isRequired(X.getName(), Y.getName());
    }

    /**
     * Checks if an edge between two nodes is forbidden based on the knowledge.
     *
     * @param X the first node
     * @param Y the second node
     * @return true if the edge is forbidden, false otherwise
     */
    private boolean edgeForbiddenByKnowledge(Node X, Node Y) {
        return this.knowledge.isForbidden(Y.getName(), X.getName()) && this.knowledge.isForbidden(X.getName(), Y.getName());
    }

    /**
     * Tests for the presence of a two-cycle in a graph.
     *
     * @param i  The index of the first node in V.
     * @param j  The index of the second node in V.
     * @param D  The distance matrix of the graph.
     * @param G0 The original graph.
     * @param V  The list of nodes.
     * @return True if a two-cycle is found, false otherwise.
     */
    private boolean twoCycleTest(int i, int j, double[][] D, Graph G0, List V) {
        Node X = V.get(i);
        Node Y = V.get(j);

        double[] x = D[i];
        double[] y = D[j];

        Set adjSet = new HashSet<>(G0.getAdjacentNodes(X));
        adjSet.addAll(G0.getAdjacentNodes(Y));
        List adj = new ArrayList<>(adjSet);
        adj.remove(X);
        adj.remove(Y);

        SublistGenerator gen = new SublistGenerator(adj.size(), FastMath.min(this.depth, adj.size()));
        int[] choice;

        while ((choice = gen.next()) != null) {
            List _adj = GraphUtils.asList(choice, adj);
            double[][] _Z = new double[_adj.size()][];

            for (int f = 0; f < _adj.size(); f++) {
                Node _z = _adj.get(f);
                int column = this.dataSet.getColumn(_z);
                _Z[f] = D[column];
            }

            double pc;
            double pc1;
            double pc2;

            try {
                pc = partialCorrelation(x, y, _Z, x, Double.NEGATIVE_INFINITY);
                pc1 = partialCorrelation(x, y, _Z, x, 0);
                pc2 = partialCorrelation(x, y, _Z, y, 0);
            } catch (SingularMatrixException e) {
                TetradLogger.getInstance().log("Singularity X = " + X + " Y = " + Y + " adj = " + adj);
                continue;
            }

            int nc = getRows(x, x, 0, Double.NEGATIVE_INFINITY).size();
            int nc1 = getRows(x, x, 0, +1).size();
            int nc2 = getRows(y, y, 0, +1).size();

            double z = 0.5 * (log(1.0 + pc) - log(1.0 - pc));
            double z1 = 0.5 * (log(1.0 + pc1) - log(1.0 - pc1));
            double z2 = 0.5 * (log(1.0 + pc2) - log(1.0 - pc2));

            double zv1 = (z - z1) / sqrt((1.0 / ((double) nc - 3) + 1.0 / ((double) nc1 - 3)));
            double zv2 = (z - z2) / sqrt((1.0 / ((double) nc - 3) + 1.0 / ((double) nc2 - 3)));

            boolean rejected1 = abs(zv1) > this.orientationCutoff;
            boolean rejected2 = abs(zv2) > this.orientationCutoff;

            boolean possibleTwoCycle = false;

            if (zv1 < 0 && zv2 > 0 && rejected1) {
                possibleTwoCycle = true;
            } else if (zv1 > 0 && zv2 < 0 && rejected2) {
                possibleTwoCycle = true;
            } else if (rejected1 && rejected2) {
                possibleTwoCycle = true;
            }

            if (!possibleTwoCycle) {
                return false;
            }
        }

        return true;
    }

    /**
     * Calculates the zero-difference test for two variables. The zero-difference test compares the partial correlation
     * between two variables, conditioned on other variables, and checks if the difference is statistically
     * significant.
     *
     * @param i the index of the first variable in the data array
     * @param j the index of the second variable in the data array
     * @param D the data array where each row represents a variable and each column represents an observation
     * @return true if the difference is statistically significant, false otherwise
     * @throws RuntimeException if a singularity is encountered when computing partial correlation
     */
    private boolean zeroDiff(int i, int j, double[][] D) {
        double[] x = D[i];
        double[] y = D[j];

        double pc1;
        double pc2;

        try {
            pc1 = partialCorrelation(x, y, new double[0][], x, 0);
            pc2 = partialCorrelation(x, y, new double[0][], y, 0);
        } catch (SingularMatrixException e) {
            List nodes = dataSet.getVariables();
            throw new RuntimeException("Singularity encountered (conditioning on X > 0, Y > 0) for variables "
                                       + nodes.get(i) + ", " + nodes.get(j));
        }

        int nc1 = getRows(x, x, 0, +1).size();
        int nc2 = getRows(y, y, 0, +1).size();

        double z1 = 0.5 * (log(1.0 + pc1) - log(1.0 - pc1));
        double z2 = 0.5 * (log(1.0 + pc2) - log(1.0 - pc2));

        double zv = (z1 - z2) / sqrt((1.0 / ((double) nc1 - 3) + 1.0 / ((double) nc2 - 3)));

        return abs(zv) <= this.twoCycleScreeningCutoff;
    }

    /**
     * Calculates the partial correlation coefficient between two variables while controlling for other variables.
     *
     * @param x         the first variable
     * @param y         the second variable
     * @param z         the matrix containing the control variables
     * @param condition the control variables for partial correlation
     * @param threshold the threshold for excluding cases
     * @return the partial correlation coefficient
     * @throws SingularMatrixException if the covariance matrix is singular and cannot be inverted
     */
    private double partialCorrelation(double[] x, double[] y, double[][] z, double[] condition, double threshold) throws SingularMatrixException {
        double[][] cv = covMatrix(x, y, z, condition, threshold, 1);
        Matrix m = new Matrix(cv).transpose();
        return StatUtils.partialCorrelation(m);
    }

    /**
     * Logs the two-cycle information.
     *
     * @param nf        The number format used to format the result.
     * @param variables The list of nodes representing variables.
     * @param d         The two-dimensional array representing the distances between variables.
     * @param X         The first variable node.
     * @param Y         The second variable node.
     * @param type      The type of two-cycle.
     */
    private void logTwoCycle(NumberFormat nf, List variables, double[][] d, Node X, Node Y, String type) {
        int i = variables.indexOf(X);
        int j = variables.indexOf(Y);

        double[] x = d[i];
        double[] y = d[j];

        double lr = leftRight(x, y);

        TetradLogger.getInstance().log(X + "\t" + Y + "\t" + type
                                       + "\t" + nf.format(lr)
                                       + "\t" + X + "<=>" + Y
        );
    }

    /**
     * Sets the seed for generating random numbers.
     *
     * @param seed the seed value to set
     */
    public void setSeed(long seed) {
        this.seed = seed;
    }

    /**
     * Sets the verbose mode.
     *
     * @param verbose the flag indicating whether to enable verbose mode or not
     */
    public void setVerbose(boolean verbose) {
        this.verbose = verbose;
    }

    /**
     * Enumerates the options left-right rules to use for FASK. Options include the FASK left-right rule and three
     * left-right rules from the Hyvarinen and Smith pairwise orientation paper: Robust Skew, Skew, and Tanh. In that
     * paper, "empirical" versions were given in which the variables are multiplied through by the signs of the
     * skewnesses; we follow this advice here (with good results). These others are provided for those who prefer them.
     */
    public enum LeftRight {
        /**
         * The original FASK left-right rule.
         */
        FASK1,

        /**
         * The modified FASK left-right rule.
         */
        FASK2,

        /**
         * The robust skew rule from the Hyvarinen and Smith paper.
         */
        RSKEW,

        /**
         * The skew rule from the Hyvarinen and Smith paper.
         */
        SKEW,

        /**
         * The tanh rule from the Hyvarinen and Smith paper.
         */
        TANH
    }

    /**
     * Enumerates the alternatives to use for finding the initial adjacencies for FASK.
     */
    public enum AdjacencyMethod {

        /**
         * Fast Adjacency Search (FAS) with the stable option.
         */
        FAS_STABLE,

        /**
         * FGES with the BIC score.
         */
        FGES,

        /**
         * A permutation search with the BOSS algorithm.
         */
        BOSS,

        /**
         * A permutation search with the GRASP algorithm.
         */
        GRASP,

        /**
         * Use an external graph.
         */
        EXTERNAL_GRAPH,

        /**
         * No initial adjacencies.
         */
        NONE
    }
}