edu.cmu.tetrad.search.FaskOrig Maven / Gradle / Ivy
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below. //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard //
// Scheines, Joseph Ramsey, and Clark Glymour. //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
///////////////////////////////////////////////////////////////////////////////
package edu.cmu.tetrad.search;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.DataTransforms;
import edu.cmu.tetrad.data.Knowledge;
import edu.cmu.tetrad.graph.*;
import edu.cmu.tetrad.regression.RegressionDataset;
import edu.cmu.tetrad.regression.RegressionResult;
import edu.cmu.tetrad.search.score.Score;
import edu.cmu.tetrad.search.utils.GraphSearchUtils;
import edu.cmu.tetrad.util.*;
import org.apache.commons.math3.linear.SingularMatrixException;
import org.apache.commons.math3.util.FastMath;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static edu.cmu.tetrad.util.StatUtils.*;
import static org.apache.commons.math3.util.FastMath.*;
/**
* Implements the FASK (Fast Adjacency Skewness) algorithm, which makes decisions for adjacency and orientation using a
* combination of conditional independence testing, judgments of nonlinear adjacency, and pairwise orientation due to
* non-Gaussianity. The reference is this:
*
* Sanchez-Romero, R., Ramsey, J. D., Zhang, K., Glymour, M. R., Huang, B., and Glymour, C. (2019). Estimating
* feedforward and feedback effective connections from fMRI time series: Assessments of statistical methods. Network
* Neuroscience, 3(2), 274-30
*
* Some adjustments have been made in some ways from that version, and some additional pairwise options have been added
* from this reference:
*
* Hyvärinen, A., and Smith, S. M. (2013). Pairwise likelihood ratios for estimation of non-Gaussian structural equation
* models. Journal of Machine Learning Research, 14(Jan), 111-152.
*
* This method (and the Hyvarinen and Smith methods) make the assumption that the data are generated by a linear,
* non-Gaussian causal process and attempts to recover the causal graph for that process. They do not attempt to recover
* the parametrization of this graph; for this a separate estimation algorithm would be needed, such as linear
* regression regressing each node onto its parents. A further assumption is made, that there are no latent common
* causes of the algorithm. This is not a constraint on the pairwise orientation methods, since they orient with respect
* only to the two variables at the endpoints of an edge and so are happy with all other variables being considered
* latent with respect to that single edge. However, if the built-in adjacency search is used (FAS-Stable), the
* existence of latents will throw this method off.
*
* As was shown in the Hyvarinen and Smith paper above, FASK works quite well even if the graph contains feedback loops
* in most configurations, including 2-cycles. 2-cycles can be detected fairly well if the FASK left-right rule is
* selected and the 2-cycle threshold set to about 0.1--more will be detected (or hallucinated) if the threshold is set
* higher. As shown in the Sanchez-Romero reference above, 2-cycle detection of the FASK algorithm using this rule is
* quite good.
*
* Some edges may be undiscoverable by FAS-Stable; to recover more of these edges, a test related to the FASK left-right
* rule is used, and there is a threshold for this test. A good default for this threshold (the "skew edge threshold")
* is 0.3. For more of these edges, set this threshold to a lower number.
*
* It is assumed that the data are arranged so each variable forms a column and that there are no missing values. The
* data matrix is assumed to be rectangular. To this end, the Tetrad DataSet class is used, which enforces this.
*
* Note that orienting a DAG for a linear, non-Gaussian model using the Hyvarinen and Smith pairwise rules is
* alternatively known in the literature as Pairwise LiNGAM--see Hyvärinen, A., and Smith, S. M. (2013). Pairwise
* likelihood ratios for estimation of non-Gaussian structural equation models. Journal of Machine Learning Research,
* 14(Jan), 111-152. We include some of these methods here for comparison.
*
* Parameters:
*
* faskAdjacencyMethod: 1 # this run FAS-Stable (the one used in the paper). See Algorithm 2.
*
* depth: -1. # control the size of the conditional set in the independence tests, setting this to a small integer may
* reduce the running time, but can also result in false positives. -1 means that it will check "all" possible sizes.
*
* test: sem-bic-test # test for FAS adjacency
*
* score: sem-bic-score
*
* semBicRule: 1 # to set the Chickering Rule, used in the original Fask
*
* penaltyDiscount: 2 # if using sem-bic as independence test (as in the paper). In the paper this is referred as c.
* Check step 1 and 10 in Algorithm 2 FAS stable.
*
* skewEdgeThreshold: 0.3 # See description of Fask algorithm, and step 11 in Algorithm 1 FASK. Threshold to add edges
* that may have been non-inferred because there was a positive/negative cycle that result in a non-zero observed
* relation.
*
* faskLeftRightRule: 1 # this run FASK v1, the original FASK from the paper
*
* faskDelta: -0.3 # See step 1 and 11 in Algorithm 4 (this is the value set in the paper)
*
* twoCycleScreeningThreshold: 0 # not used in the original paper implementation. Added afterwards. You can set it to
* 0.3, for example, to use it as a filter to run Algorithm 3 2-cycle detection, which may take some time to run.
*
* orientationAlpha: 0.1 # this was referred in the paper as TwoCycle Alpha or just alpha, the lower it is, the lower
* the chance of inferring a two cycle. Check steps 17 to 28 in Algorithm 3: 2 Cycle Detection Rule.
*
* structurePrior: 0 # prior on the number of parents. Not used in the paper implementation.
*
* So a run of command line would look like this:
*
* java -jar -Xmx10G causal-cmd-1.4.1-jar-with-dependencies.jar --delimiter tab --data-type continuous --dataset
* concat_BOLDfslfilter_60_FullMacaque.txt --prefix Fask_Test_MacaqueFull --algorithm fask --faskAdjacencyMethod 1
* --depth -1 --test sem-bic-test --score sem-bic-score --semBicRule 1 --penaltyDiscount 2 --skewEdgeThreshold 0.3
* --faskLeftRightRule 1 --faskDelta -0.3 --twoCycleScreeningThreshold 0 --orientationAlpha 0.1 -structurePrior 0
*
* This class is configured to respect knowledge of forbidden and required edges, including knowledge of temporal
* tiers.
*
* This is the code before cleaning it up on 2024-5-16.
*
* @author josephramsey
* @author rubensanchez
* @version $Id: $Id
* @see Knowledge
* @see Lofs
*/
public final class FaskOrig implements IGraphSearch {
/**
* The score to be used for the FAS adjacency search.
*/
private final IndependenceTest test;
/**
* Represents a Score.
*/
private final Score score;
/**
* The data sets being analyzed. They must all have the same variables and the same number of records.
*/
private final DataSet dataSet;
/**
* Used for calculating coefficient values.
*/
private final RegressionDataset regressionDataset;
/**
* Represents the data as a double[][] array, with D[i] beimg the data for variable i.
*/
private double[][] D;
/**
* An initial graph to constrain the adjacency step.
*/
private Graph externalGraph;
/**
* Elapsed time of the search, in milliseconds.
*/
private long elapsed;
/**
* For the Fast Adjacency Search, the maximum number of edges in a conditioning set.
*/
private int depth = -1;
/**
* Knowledge the search will obey, of forbidden and required edges.
*/
private Knowledge knowledge = new Knowledge();
/**
* A threshold for including extra adjacencies due to skewness. Default is 0.3. For more edges, lower this
* threshold.
*/
private double skewEdgeThreshold;
/**
* A threshold for making 2-cycles. Default is 0 (no 2-cycles.) Note that the 2-cycle rule will only work with the
* FASK left-right rule. Default is 0; a good value for finding a decent set of 2-cycles is 0.1.
*/
private double twoCycleScreeningCutoff;
/**
* At the end of the procedure, two cycles marked in the graph (for having small LR differences) are then tested
* statistically to see if they are two-cycles, using this cutoff. To adjust this cutoff, set the two-cycle alpha to
* a number in [0, 1]. The default alpha is 0.01.
*/
private double orientationCutoff;
/**
* The corresponding alpha.
*/
private double orientationAlpha;
/**
* Bias for orienting with negative coefficients.
*/
private double delta;
/**
* Whether X and Y should be adjusted for skewness. (Otherwise, they are assumed to have positive skewness.)
*/
private boolean empirical = true;
/**
* By default, FAS Stable will be used for adjacencies, though this can be set.
*/
private AdjacencyMethod adjacencyMethod = AdjacencyMethod.GRASP;
/**
* The left right rule to use, default FASK.
*/
private LeftRight leftRight = LeftRight.RSKEW;
/**
* The graph resulting from search.
*/
private Graph graph;
/**
* Represents the seed used to initialize the random number generator in the search algorithm. The seed determines
* the sequence of random numbers generated during the search. By setting a specific seed, you can reproduce the
* same sequence of random numbers for every run of the algorithm.
*/
private long seed = -1;
/**
* Determines if verbose mode is enabled or disabled.
*/
private boolean verbose = false;
/**
* Constructor.
*
* @param dataSet A continuous dataset over variables V.
* @param test An independence test over variables V. (Used for FAS.)
* @param score a {@link edu.cmu.tetrad.search.score.Score} object
*/
public FaskOrig(DataSet dataSet, Score score, IndependenceTest test) {
if (dataSet == null) {
throw new NullPointerException("Data set not provided.");
}
if (!dataSet.isContinuous()) {
throw new IllegalArgumentException("For FASK, the dataset must be entirely continuous");
}
this.dataSet = dataSet;
this.test = test;
this.score = score;
this.regressionDataset = new RegressionDataset(dataSet);
this.orientationCutoff = getZForAlpha(0.01);
this.orientationAlpha = 0.01;
}
/**
* Calculates the left-right judgment for two arrays of double values. This is for version 2.
*
* @param x The data for the first variable.
* @param y The data for the second variable.
* @param empirical Whether to use an empirical judgment.
* @param delta The delta value for the judgment.
* @return The left-right judgment, which is negative if x < y, positive if x $gt; y, and 0 if indeterminate.
*/
public static double faskLeftRightV2(double[] x, double[] y, boolean empirical, double delta) {
double sx = skewness(x);
double sy = skewness(y);
double r = correlation(x, y);
double lr = FaskOrig.correxp(x, y, x) - FaskOrig.correxp(x, y, y);
if (empirical) {
lr *= signum(sx) * signum(sy);
}
if (r < delta) {
lr *= -1;
}
return lr;
}
/**
* Calculates the left-right ratio using the Fask method version 1.
*
* @param x the array of values for variable x
* @param y the array of values for variable y
* @param empirical if true, applies empirical correction to the correlation coefficient
* @param delta the threshold value for determining the sign of the left-right ratio
* @return the left-right ratio
*/
public static double faskLeftRightV1(double[] x, double[] y, boolean empirical, double delta) {
double left = FaskOrig.cu(x, y, x) / (sqrt(FaskOrig.cu(x, x, x) * FaskOrig.cu(y, y, x)));
double right = FaskOrig.cu(x, y, y) / (sqrt(FaskOrig.cu(x, x, y) * FaskOrig.cu(y, y, y)));
double lr = left - right;
double r = correlation(x, y);
double sx = skewness(x);
double sy = skewness(y);
if (empirical) {
r *= signum(sx) * signum(sy);
}
lr *= signum(r);
if (r < delta) lr *= -1;
return lr;
}
/**
* Calculates a left-right judgment using the robust skewness between two arrays of double values.
*
* @param x The data for the first variable.
* @param y The data for the second variable.
* @param empirical Whether to use an empirical correction to the skewness.
* @return The robust skewness between the two arrays.
*/
public static double robustSkew(double[] x, double[] y, boolean empirical) {
if (empirical) {
x = correctSkewness(x, skewness(x));
y = correctSkewness(y, skewness(y));
}
double[] lr = new double[x.length];
for (int i = 0; i < x.length; i++) {
lr[i] = g(x[i]) * y[i] - x[i] * g(y[i]);
}
return correlation(x, y) * mean(lr);
}
/**
* Calculates a left-right judgument using the skewness of two arrays of double values.
*
* @param x the first array of double values
* @param y the second array of double values
* @param empirical flag to indicate whether to apply empirical correction for skewness
* @return the skewness of the two arrays
*/
public static double skew(double[] x, double[] y, boolean empirical) {
if (empirical) {
x = correctSkewness(x, skewness(x));
y = correctSkewness(y, skewness(y));
}
double[] lr = new double[x.length];
for (int i = 0; i < x.length; i++) {
lr[i] = x[i] * x[i] * y[i] - x[i] * y[i] * y[i];
}
return correlation(x, y) * mean(lr);
}
/**
* Calculates the logarithm of the hyperbolic cosine of the maximum of x and 0.
*
* @param x The input value.
* @return The result of the calculation.
*/
public static double g(double x) {
return log(cosh(FastMath.max(x, 0)));
}
/**
* Corrects the skewness of the given data using the provided skewness value.
*
* @param data The array of data to be corrected.
* @param sk The skewness value to be used for correction.
* @return The corrected data array.
*/
private static double[] correctSkewness(double[] data, double sk) {
double[] data2 = new double[data.length];
for (int i = 0; i < data.length; i++) data2[i] = data[i] * signum(sk);
return data2;
}
/**
* Calculates the conditional mean of the product of corresponding elements from two arrays based on a given
* condition.
*
* @param x an array of doubles to be multiplied
* @param y an array of doubles to be multiplied
* @param condition an array of doubles representing the condition for multiplication
* @return the conditional mean of the product of corresponding elements from x and y based on the given condition
*/
private static double cu(double[] x, double[] y, double[] condition) {
double exy = 0.0;
int n = 0;
for (int k = 0; k < x.length; k++) {
if (condition[k] > 0) {
exy += x[k] * y[k];
n++;
}
}
return exy / n;
}
/**
* Returns E(XY | Z > 0); Z is typically either X or Y.
*
* @param x an array of double values
* @param y an array of double values
* @param z an array of double values
* @return the expected value of the product of elements in x and y, considering only the elements in z that are
* greater than zero
*/
private static double E(double[] x, double[] y, double[] z) {
double exy = 0.0;
int n = 0;
for (int k = 0; k < x.length; k++) {
if (z[k] > 0) {
exy += x[k] * y[k];
n++;
}
}
return exy / n;
}
/**
* Returns E(XY | Z > 0) / sqrt(E(XX | Z > 0) * E(YY | Z > 0)). Z is typically either X or Y.
*
* @param x an array of double values representing the first set of data
* @param y an array of double values representing the second set of data
* @param z an array of double values representing the third set of data
* @return the correlation coefficient between the three sets of data
*/
private static double correxp(double[] x, double[] y, double[] z) {
return FaskOrig.E(x, y, z) / sqrt(FaskOrig.E(x, x, z) * FaskOrig.E(y, y, z));
}
/**
* Runs the search on the concatenated data, returning a graph, possibly cyclic, possibly with two-cycles. Runs the
* fast adjacency search (FAS, Spirtes et al., 2000) followed by a modification of the robust skew rule (Pairwise
* Likelihood Ratios for Estimation of Non-Gaussian Structural Equation Models, Smith and Hyvarinen), together with
* some heuristics for orienting two-cycles.
*
* @return the graph. Some edges may be undirected (though it shouldn't be many in most cases) and some adjacencies
* may be two-cycles.
*/
public Graph search() {
long start = MillisecondTimes.timeMillis();
NumberFormat nf = new DecimalFormat("0.000");
DataSet dataSet = DataTransforms.standardizeData(this.dataSet);
List variables = dataSet.getVariables();
double[][] lrs = getLrScores(); // Sets D.
for (int i = 0; i < variables.size(); i++) {
System.out.println("Skewness of " + variables.get(i) + " = " + skewness(this.D[i]));
}
TetradLogger.getInstance().log("FASK v. 2.0");
TetradLogger.getInstance().log("");
TetradLogger.getInstance().log("# variables = " + dataSet.getNumColumns());
TetradLogger.getInstance().log("N = " + dataSet.getNumRows());
TetradLogger.getInstance().log("Skewness edge threshold = " + this.skewEdgeThreshold);
TetradLogger.getInstance().log("Orientation Alpha = " + this.orientationAlpha);
TetradLogger.getInstance().log("2-cycle threshold = " + this.twoCycleScreeningCutoff);
TetradLogger.getInstance().log("");
Graph G;
if (this.adjacencyMethod == AdjacencyMethod.BOSS) {
PermutationSearch fas = new PermutationSearch(new Boss(this.score));
fas.setSeed(seed);
fas.setKnowledge(this.knowledge);
G = fas.search();
} else if (this.adjacencyMethod == AdjacencyMethod.GRASP) {
Grasp fas = new Grasp(this.score);
fas.setSeed(seed);
fas.setDepth(5);
fas.setNonSingularDepth(1);
fas.setUncoveredDepth(1);
fas.setNumStarts(5);
fas.setAllowInternalRandomness(true);
fas.setUseDataOrder(false);
fas.setKnowledge(this.knowledge);
fas.bestOrder(dataSet.getVariables());
G = fas.getGraph(true);
} else if (this.adjacencyMethod == AdjacencyMethod.FAS_STABLE) {
Fas fas = new Fas(this.test);
fas.setStable(true);
fas.setVerbose(false);
fas.setKnowledge(this.knowledge);
G = fas.search();
} else if (this.adjacencyMethod == AdjacencyMethod.FGES) {
Fges fas = new Fges(this.score);
fas.setVerbose(false);
fas.setKnowledge(this.knowledge);
G = fas.search();
} else if (this.adjacencyMethod == AdjacencyMethod.EXTERNAL_GRAPH) {
if (this.externalGraph == null) throw new IllegalStateException("An external graph was not supplied.");
Graph g1 = new EdgeListGraph(this.externalGraph.getNodes());
for (Edge edge : this.externalGraph.getEdges()) {
Node x = edge.getNode1();
Node y = edge.getNode2();
if (!g1.isAdjacentTo(x, y)) g1.addUndirectedEdge(x, y);
}
g1 = GraphUtils.replaceNodes(g1, dataSet.getVariables());
G = g1;
} else if (this.adjacencyMethod == AdjacencyMethod.NONE) {
G = new EdgeListGraph(variables);
} else {
throw new IllegalStateException("That method was not configured: " + this.adjacencyMethod);
}
G = GraphUtils.replaceNodes(G, dataSet.getVariables());
TetradLogger.getInstance().log("");
GraphSearchUtils.pcOrientbk(this.knowledge, G, G.getNodes(), verbose);
Graph graph = new EdgeListGraph(G.getNodes());
TetradLogger.getInstance().log("X\tY\tMethod\tLR\tEdge");
int V = variables.size();
List possibleTwoCycles = new ArrayList<>();
for (int i = 0; i < V; i++) {
for (int j = i + 1; j < V; j++) {
Node X = variables.get(i);
Node Y = variables.get(j);
// Centered
double[] x = this.D[i];
double[] y = this.D[j];
double cx = FaskOrig.correxp(x, y, x);
double cy = FaskOrig.correxp(x, y, y);
if (G.isAdjacentTo(X, Y) || (abs(cx - cy) > this.skewEdgeThreshold)) {
double lr = lrs[i][j];// leftRight(x, y);
if (edgeForbiddenByKnowledge(X, Y) && edgeForbiddenByKnowledge(Y, X)) {
TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge_forbidden"
+ "\t" + nf.format(lr)
+ "\t" + X + "<->" + Y
);
continue;
}
if (knowledgeOrients(X, Y)) {
TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge"
+ "\t" + nf.format(lr)
+ "\t" + X + "-->" + Y
);
graph.addDirectedEdge(X, Y);
} else if (knowledgeOrients(Y, X)) {
TetradLogger.getInstance().log(X + "\t" + Y + "\tknowledge"
+ "\t" + nf.format(lr)
+ "\t" + X + "<--" + Y
);
graph.addDirectedEdge(Y, X);
} else {
if (passesTwoCycleScreening(x, y)) {
if (this.twoCycleScreeningCutoff != 0) {
TetradLogger.getInstance().log(X + "\t" + Y + "\t2-cycle Prescreen"
+ "\t" + nf.format(lr)
+ "\t" + X + "...TC?..." + Y
);
}
possibleTwoCycles.add(new NodePair(X, Y));
}
if (lr > 0) {
TetradLogger.getInstance().log(X + "\t" + Y + "\tleft-right"
+ "\t" + nf.format(lr)
+ "\t" + X + "-->" + Y
);
graph.addDirectedEdge(X, Y);
} else if (lr < 0) {
TetradLogger.getInstance().log(Y + "\t" + X + "\tleft-right"
+ "\t" + nf.format(lr)
+ "\t" + Y + "-->" + X
);
graph.addDirectedEdge(Y, X);
}
}
}
}
}
if (this.orientationAlpha == 0) {
for (NodePair edge : possibleTwoCycles) {
Node X = edge.getFirst();
Node Y = edge.getSecond();
graph.removeEdges(X, Y);
graph.addDirectedEdge(X, Y);
graph.addDirectedEdge(Y, X);
logTwoCycle(nf, variables, this.D, X, Y, "2-cycle Pre-screen");
}
} else if (this.orientationAlpha > 0) {
for (NodePair edge : possibleTwoCycles) {
Node X = edge.getFirst();
Node Y = edge.getSecond();
int i = variables.indexOf(X);
int j = variables.indexOf(Y);
if (twoCycleTest(i, j, this.D, graph, variables)) {
graph.removeEdges(X, Y);
graph.addDirectedEdge(X, Y);
graph.addDirectedEdge(Y, X);
logTwoCycle(nf, variables, this.D, X, Y, "2-cycle");
}
}
}
long stop = MillisecondTimes.timeMillis();
this.elapsed = stop - start;
this.graph = graph;
return graph;
}
private boolean passesTwoCycleScreening(double[] x, double[] y) {
if (this.twoCycleScreeningCutoff == 0) return true;
if (this.leftRight == LeftRight.FASK1) {
return abs(faskLeftRightV1(x, y, empirical, delta)) < this.twoCycleScreeningCutoff;
} else {
return abs(faskLeftRightV2(x, y, empirical, delta)) < this.twoCycleScreeningCutoff;
}
}
/**
* Returns the coefficient matrix for the search. If the search has not yet run, runs it, then estimates
* coefficients of each node given its parents using linear regression and forms the B matrix of coefficients from
* these estimates. B[i][j] != 0 means i->j with that coefficient.
*
* @return This matrix as a double[][] array.
*/
public double[][] getB() {
if (this.graph == null) search();
List nodes = this.dataSet.getVariables();
double[][] B = new double[nodes.size()][nodes.size()];
for (int j = 0; j < nodes.size(); j++) {
Node y = nodes.get(j);
List pary = new ArrayList<>(this.graph.getParents(y));
RegressionResult result = this.regressionDataset.regress(y, pary);
double[] coef = result.getCoef();
for (int i = 0; i < pary.size(); i++) {
B[nodes.indexOf(pary.get(i))][j] = coef[i + 1];
}
}
return B;
}
/**
* Returns a matrix of left-right scores for the search. If lr = getLrScores(), then lr[i][j] is the left right
* scores leftRight(data[i], data[j]);
*
* @return This matrix as a double[][] array.
*/
public double[][] getLrScores() {
List variables = this.dataSet.getVariables();
double[][] D = DataTransforms.standardizeData(this.dataSet).getDoubleData().transpose().toArray();
double[][] lr = new double[variables.size()][variables.size()];
for (int i = 0; i < variables.size(); i++) {
for (int j = 0; j < variables.size(); j++) {
lr[i][j] = leftRight(D[i], D[j]);
}
}
this.D = D;
return lr;
}
/**
* Getter for the field depth
.
*
* @return The depth of search for the Fast Adjacency Search (FAS).
*/
public int getDepth() {
return this.depth;
}
/**
* Setter for the field depth
.
*
* @param depth The depth of search for the Fast Adjacency Search (S). The default is -1. Unlimited. Making this too
* high may result in statistical errors.
*/
public void setDepth(int depth) {
this.depth = depth;
}
/**
* getElapsedTime.
*
* @return The elapsed time in milliseconds.
*/
public long getElapsedTime() {
return this.elapsed;
}
/**
* Getter for the field knowledge
.
*
* @return the current knowledge.
*/
public Knowledge getKnowledge() {
return this.knowledge;
}
/**
* Setter for the field knowledge
.
*
* @param knowledge Knowledge of forbidden and required edges.
*/
public void setKnowledge(Knowledge knowledge) {
this.knowledge = new Knowledge(knowledge);
}
/**
* Sets the external graph to use. This graph will be used as a set of adjacencies to be included in the graph is
* the "external graph" options is selected. It doesn't matter what the orientations of the graph are; the graph
* will be reoriented using the left-right rule selected.
*
* @param externalGraph This graph.
*/
public void setExternalGraph(Graph externalGraph) {
this.externalGraph = externalGraph;
}
/**
* Sets the skew-edge threshold.
*
* @param skewEdgeThreshold This threshold.
*/
public void setSkewEdgeThreshold(double skewEdgeThreshold) {
this.skewEdgeThreshold = skewEdgeThreshold;
}
/**
* Sets the cutoff for two-cycle screening.
*
* @param twoCycleScreeningCutoff This cutoff.
*/
public void setTwoCycleScreeningCutoff(double twoCycleScreeningCutoff) {
if (twoCycleScreeningCutoff < 0)
throw new IllegalStateException("Two cycle screening threshold must be >= 0");
this.twoCycleScreeningCutoff = twoCycleScreeningCutoff;
}
/**
* Sets the orientation alpha.
*
* @param orientationAlpha This alpha.
*/
public void setOrientationAlpha(double orientationAlpha) {
if (orientationAlpha < 0 || orientationAlpha > 1)
throw new IllegalArgumentException("Two cycle testing alpha should be in [0, 1].");
this.orientationCutoff = getZForAlpha(orientationAlpha);
this.orientationAlpha = orientationAlpha;
}
/**
* Sets the left-right rule used
*
* @param leftRight This rule.
* @see LeftRight
*/
public void setLeftRight(LeftRight leftRight) {
this.leftRight = leftRight;
}
/**
* Sets the adjacency method used.
*
* @param adjacencyMethod This method.
* @see AdjacencyMethod
*/
public void setAdjacencyMethod(AdjacencyMethod adjacencyMethod) {
this.adjacencyMethod = adjacencyMethod;
}
/**
* Sets the delta to use.
*
* @param delta This delta.
*/
public void setDelta(double delta) {
this.delta = delta;
}
/**
* Sets whether the empirical option is selected.
*
* @param empirical True, if so.
*/
public void setEmpirical(boolean empirical) {
this.empirical = empirical;
}
/**
* A left/right judgment for double[] arrays (data) as input.
*
* @param x The data for the first variable.
* @param y The data for the second variable.
* @return The left-right judgment, which is negative if X<-Y, positive if X->Y, and 0 if indeterminate.
*/
public double leftRight(double[] x, double[] y) {
if (this.leftRight == LeftRight.FASK1) {
return faskLeftRightV1(x, y, empirical, delta);
} else if (this.leftRight == LeftRight.FASK2) {
return faskLeftRightV2(x, y, empirical, delta);
} else if (this.leftRight == LeftRight.RSKEW) {
return robustSkew(x, y, empirical);
} else if (this.leftRight == LeftRight.SKEW) {
return skew(x, y, empirical);
} else if (this.leftRight == LeftRight.TANH) {
return tanh(x, y, empirical);
}
throw new IllegalStateException("Left right rule not configured: " + this.leftRight);
}
/**
* Calculates a left-right judgment using the hyperbolic tangent of each element in the given arrays and performs a
* computation combining these results.
*
* @param x an array of doubles
* @param y an array of doubles
* @param empirical flag indicating whether empirical correction should be applied to the input arrays
* @return the final result of the computation
*/
private double tanh(double[] x, double[] y, boolean empirical) {
if (empirical) {
x = correctSkewness(x, skewness(x));
y = correctSkewness(y, skewness(y));
}
double[] lr = new double[x.length];
for (int i = 0; i < x.length; i++) {
lr[i] = x[i] * FastMath.tanh(y[i]) - FastMath.tanh(x[i]) * y[i];
}
return correlation(x, y) * mean(lr);
}
/**
* Determines if the knowledge orients the nodes X and Y.
*
* @param X The first node.
* @param Y The second node.
* @return true if the knowledge forbids the orientation of Y towards X, or if X is required by Y; false otherwise.
*/
private boolean knowledgeOrients(Node X, Node Y) {
return this.knowledge.isForbidden(Y.getName(), X.getName()) || this.knowledge.isRequired(X.getName(), Y.getName());
}
/**
* Checks if an edge between two nodes is forbidden based on the knowledge.
*
* @param X the first node
* @param Y the second node
* @return true if the edge is forbidden, false otherwise
*/
private boolean edgeForbiddenByKnowledge(Node X, Node Y) {
return this.knowledge.isForbidden(Y.getName(), X.getName()) && this.knowledge.isForbidden(X.getName(), Y.getName());
}
/**
* Tests for the presence of a two-cycle in a graph.
*
* @param i The index of the first node in V.
* @param j The index of the second node in V.
* @param D The distance matrix of the graph.
* @param G0 The original graph.
* @param V The list of nodes.
* @return True if a two-cycle is found, false otherwise.
*/
private boolean twoCycleTest(int i, int j, double[][] D, Graph G0, List V) {
Node X = V.get(i);
Node Y = V.get(j);
double[] x = D[i];
double[] y = D[j];
Set adjSet = new HashSet<>(G0.getAdjacentNodes(X));
adjSet.addAll(G0.getAdjacentNodes(Y));
List adj = new ArrayList<>(adjSet);
adj.remove(X);
adj.remove(Y);
SublistGenerator gen = new SublistGenerator(adj.size(), FastMath.min(this.depth, adj.size()));
int[] choice;
while ((choice = gen.next()) != null) {
List _adj = GraphUtils.asList(choice, adj);
double[][] _Z = new double[_adj.size()][];
for (int f = 0; f < _adj.size(); f++) {
Node _z = _adj.get(f);
int column = this.dataSet.getColumn(_z);
_Z[f] = D[column];
}
double pc;
double pc1;
double pc2;
try {
pc = partialCorrelation(x, y, _Z, x, Double.NEGATIVE_INFINITY);
pc1 = partialCorrelation(x, y, _Z, x, 0);
pc2 = partialCorrelation(x, y, _Z, y, 0);
} catch (SingularMatrixException e) {
TetradLogger.getInstance().log("Singularity X = " + X + " Y = " + Y + " adj = " + adj);
continue;
}
int nc = getRows(x, x, 0, Double.NEGATIVE_INFINITY).size();
int nc1 = getRows(x, x, 0, +1).size();
int nc2 = getRows(y, y, 0, +1).size();
double z = 0.5 * (log(1.0 + pc) - log(1.0 - pc));
double z1 = 0.5 * (log(1.0 + pc1) - log(1.0 - pc1));
double z2 = 0.5 * (log(1.0 + pc2) - log(1.0 - pc2));
double zv1 = (z - z1) / sqrt((1.0 / ((double) nc - 3) + 1.0 / ((double) nc1 - 3)));
double zv2 = (z - z2) / sqrt((1.0 / ((double) nc - 3) + 1.0 / ((double) nc2 - 3)));
boolean rejected1 = abs(zv1) > this.orientationCutoff;
boolean rejected2 = abs(zv2) > this.orientationCutoff;
boolean possibleTwoCycle = false;
if (zv1 < 0 && zv2 > 0 && rejected1) {
possibleTwoCycle = true;
} else if (zv1 > 0 && zv2 < 0 && rejected2) {
possibleTwoCycle = true;
} else if (rejected1 && rejected2) {
possibleTwoCycle = true;
}
if (!possibleTwoCycle) {
return false;
}
}
return true;
}
/**
* Calculates the zero-difference test for two variables. The zero-difference test compares the partial correlation
* between two variables, conditioned on other variables, and checks if the difference is statistically
* significant.
*
* @param i the index of the first variable in the data array
* @param j the index of the second variable in the data array
* @param D the data array where each row represents a variable and each column represents an observation
* @return true if the difference is statistically significant, false otherwise
* @throws RuntimeException if a singularity is encountered when computing partial correlation
*/
private boolean zeroDiff(int i, int j, double[][] D) {
double[] x = D[i];
double[] y = D[j];
double pc1;
double pc2;
try {
pc1 = partialCorrelation(x, y, new double[0][], x, 0);
pc2 = partialCorrelation(x, y, new double[0][], y, 0);
} catch (SingularMatrixException e) {
List nodes = dataSet.getVariables();
throw new RuntimeException("Singularity encountered (conditioning on X > 0, Y > 0) for variables "
+ nodes.get(i) + ", " + nodes.get(j));
}
int nc1 = getRows(x, x, 0, +1).size();
int nc2 = getRows(y, y, 0, +1).size();
double z1 = 0.5 * (log(1.0 + pc1) - log(1.0 - pc1));
double z2 = 0.5 * (log(1.0 + pc2) - log(1.0 - pc2));
double zv = (z1 - z2) / sqrt((1.0 / ((double) nc1 - 3) + 1.0 / ((double) nc2 - 3)));
return abs(zv) <= this.twoCycleScreeningCutoff;
}
/**
* Calculates the partial correlation coefficient between two variables while controlling for other variables.
*
* @param x the first variable
* @param y the second variable
* @param z the matrix containing the control variables
* @param condition the control variables for partial correlation
* @param threshold the threshold for excluding cases
* @return the partial correlation coefficient
* @throws SingularMatrixException if the covariance matrix is singular and cannot be inverted
*/
private double partialCorrelation(double[] x, double[] y, double[][] z, double[] condition, double threshold) throws SingularMatrixException {
double[][] cv = covMatrix(x, y, z, condition, threshold, 1);
Matrix m = new Matrix(cv).transpose();
return StatUtils.partialCorrelation(m);
}
/**
* Logs the two-cycle information.
*
* @param nf The number format used to format the result.
* @param variables The list of nodes representing variables.
* @param d The two-dimensional array representing the distances between variables.
* @param X The first variable node.
* @param Y The second variable node.
* @param type The type of two-cycle.
*/
private void logTwoCycle(NumberFormat nf, List variables, double[][] d, Node X, Node Y, String type) {
int i = variables.indexOf(X);
int j = variables.indexOf(Y);
double[] x = d[i];
double[] y = d[j];
double lr = leftRight(x, y);
TetradLogger.getInstance().log(X + "\t" + Y + "\t" + type
+ "\t" + nf.format(lr)
+ "\t" + X + "<=>" + Y
);
}
/**
* Sets the seed for generating random numbers.
*
* @param seed the seed value to set
*/
public void setSeed(long seed) {
this.seed = seed;
}
/**
* Sets the verbose mode.
*
* @param verbose the flag indicating whether to enable verbose mode or not
*/
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
/**
* Enumerates the options left-right rules to use for FASK. Options include the FASK left-right rule and three
* left-right rules from the Hyvarinen and Smith pairwise orientation paper: Robust Skew, Skew, and Tanh. In that
* paper, "empirical" versions were given in which the variables are multiplied through by the signs of the
* skewnesses; we follow this advice here (with good results). These others are provided for those who prefer them.
*/
public enum LeftRight {
/**
* The original FASK left-right rule.
*/
FASK1,
/**
* The modified FASK left-right rule.
*/
FASK2,
/**
* The robust skew rule from the Hyvarinen and Smith paper.
*/
RSKEW,
/**
* The skew rule from the Hyvarinen and Smith paper.
*/
SKEW,
/**
* The tanh rule from the Hyvarinen and Smith paper.
*/
TANH
}
/**
* Enumerates the alternatives to use for finding the initial adjacencies for FASK.
*/
public enum AdjacencyMethod {
/**
* Fast Adjacency Search (FAS) with the stable option.
*/
FAS_STABLE,
/**
* FGES with the BIC score.
*/
FGES,
/**
* A permutation search with the BOSS algorithm.
*/
BOSS,
/**
* A permutation search with the GRASP algorithm.
*/
GRASP,
/**
* Use an external graph.
*/
EXTERNAL_GRAPH,
/**
* No initial adjacencies.
*/
NONE
}
}