edu.cmu.tetrad.sem.Tsls Maven / Gradle / Ivy
The newest version!
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below. //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard //
// Scheines, Joseph Ramsey, and Clark Glymour. //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
///////////////////////////////////////////////////////////////////////////////
package edu.cmu.tetrad.sem;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.graph.NodeType;
import edu.cmu.tetrad.graph.SemGraph;
import edu.cmu.tetrad.util.MatrixUtils;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
/**
* Implementation of 2SLS, adapted to deal with estimation of covariance matrices. See "An alternative two stage least
* squares (2SLS) estimator for latent variable equations" by Kenneth Bollen (Psychometrika - vol. 61, no1., 109-121,
* March 1996). IMPORTANT: this algorithm assumes that the SemPm is a pure measurement model! With at least TWO
* measurements per latent. Large parts of this code (that is, everything but estimateCoeffs and estimateCovars) were
* reused from SEMEstimateAdapter.
*
* @author Ricardo Silva
*/
class Tsls {
private SemPm spm;
private SemIm semIm;
private List fixedLoadings;
private DataSet dataSet;
private String nodeName;
/**
* We require a DataSet (with continuous dataSet) and a SemPm with matching variables.
*
* @param spm a {@link edu.cmu.tetrad.sem.SemPm} object
* @param dataSet a {@link edu.cmu.tetrad.data.DataSet} object
* @param nm a {@link java.lang.String} object
*/
public Tsls(SemPm spm, DataSet dataSet, String nm) {
initialization(spm, dataSet, nm);
}
/*
* fixedLoadings is a list of string. It should contain the names of the
* nodes such that their loadings are fixed to 1.
*/
/**
* Constructor for Tsls.
*
* @param spm a {@link edu.cmu.tetrad.sem.SemPm} object
* @param dataSet a {@link edu.cmu.tetrad.data.DataSet} object
* @param nm a {@link java.lang.String} object
* @param fixedLoadings a {@link java.util.List} object
*/
public Tsls(SemPm spm, DataSet dataSet, String nm,
List fixedLoadings) {
initialization(spm, dataSet, nm);
this.fixedLoadings = fixedLoadings;
}
private void initialization(SemPm spm, DataSet dataSet,
String nm) {
this.dataSet = dataSet;
this.spm = spm;
if (nm != null) {
this.nodeName = nm;
}
this.semIm = null;
}
/**
* estimate.
*
* @return a {@link edu.cmu.tetrad.sem.SemIm} object
*/
public SemIm estimate() {
this.semIm = new SemIm(this.spm);
estimateCoeffs(this.semIm);
return this.semIm;
}
/**
* getEstimatedSem.
*
* @return a {@link edu.cmu.tetrad.sem.SemIm} object
*/
public SemIm getEstimatedSem() {
return this.semIm;
}
private void setFixedNodes(SemGraph semGraph, List mx1, List my1) {
if (this.fixedLoadings == null) {
for (Node nodeA : semGraph.getNodes()) {
if (nodeA.getNodeType() == NodeType.ERROR) {
continue;
}
if (nodeA.getNodeType() == NodeType.LATENT) {
// We will choose the measurement node with fixed edge by the
// lexicographical order.
Iterator children = semGraph.getChildren(nodeA).iterator();
Node nodeB = null;
while (children.hasNext()) {
Node child = children.next();
if ((child.getNodeType() == NodeType.MEASURED) && (
(nodeB == null) || (child.getName().compareTo(
nodeB.getName()) < 0))) {
nodeB = child;
}
}
if (semGraph.getParents(nodeA).size() == 0) {
mx1.add(nodeB);
} else {
my1.add(nodeB);
}
}
}
} else {
Iterator it = semGraph.getNodes().iterator();
latentIteration:
while (it.hasNext()) {
Node nodeA = it.next();
if (nodeA.getNodeType() == NodeType.ERROR) {
continue;
}
if (nodeA.getNodeType() == NodeType.LATENT) {
for (String fixedLoading : this.fixedLoadings) {
Node indicator = semGraph.getNode(fixedLoading);
for (Node parent : semGraph.getParents(indicator)) {
if (parent == nodeA) {
if (semGraph.getParents(parent).size() == 0) {
System.out.println("Fixing mx1 = " +
indicator.getName());
mx1.add(indicator);
} else {
System.out.println("Fixing my1 = " +
indicator.getName());
my1.add(indicator);
}
continue latentIteration;
}
}
}
}
}
}
}
private void estimateCoeffs(SemIm semIm) {
//System.out.print("\n****************\nCalling 2SLS... ");
SemGraph semGraph = semIm.getSemPm().getGraph();
// Get list of fixed measurements that will be kept fixed, and the
// respective latent variables that are their parents.
// "X" variables are exogenous, while "Y" variables are endogenous.
List ly = new LinkedList<>();
List lx = new LinkedList<>();
List my1 = new LinkedList<>();
List mx1 = new LinkedList<>();
List observed = new LinkedList<>();
for (Node nodeA : semGraph.getNodes()) {
if (nodeA.getNodeType() == NodeType.ERROR) {
continue;
}
if (nodeA.getNodeType() == NodeType.LATENT) {
if (semGraph.getParents(nodeA).size() == 0) {
lx.add(nodeA);
} else {
ly.add(nodeA);
}
} else {
observed.add(nodeA);
}
}
setFixedNodes(semGraph, mx1, my1);
//------------------------------------------------------------------
// Estimate freeParameters for the latent/latent edges
for (Node current : ly) {
if (this.nodeName != null && !this.nodeName.equals(current.getName())) {
continue;
}
// Build Z, the matrix containing the data for the fixed measurements
// associated with the parents of the getModel (endogenous) latent node
List endo_parents_m = new LinkedList<>();
List exo_parents_m = new LinkedList<>();
List endo_parents = new LinkedList<>();
List exo_parents = new LinkedList<>();
Iterator it_p = semGraph.getParents(current).iterator();
while (it_p.hasNext()) {
Node node = it_p.next();
if (node.getNodeType() == NodeType.ERROR) {
continue;
}
if (lx.contains(node)) {
int position = lx.indexOf(node);
exo_parents_m.add(mx1.get(position));
exo_parents.add(node);
} else {
int position = ly.indexOf(node);
endo_parents_m.add(my1.get(position));
endo_parents.add(node);
}
}
Object[] endp_a_m = endo_parents_m.toArray();
Object[] exop_a_m = exo_parents_m.toArray();
Object[] endp_a = endo_parents.toArray();
Object[] exop_a = exo_parents.toArray();
int n = this.dataSet.getNumRows(), c = endp_a_m.length + exop_a_m.length;
if (c == 0) {
continue;
}
double[][] Z = new double[n][c];
for (int i = 0; i < endp_a_m.length; i++) {
Node node = (Node) endp_a_m[i];
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
for (int j = 0; j < n; j++) {
Z[j][i] = this.dataSet.getDouble(j, colIndex);
}
}
for (int i = 0; i < exop_a_m.length; i++) {
Node node = (Node) exop_a_m[i];
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
for (int j = 0; j < n; j++) {
Z[j][endp_a_m.length + i] = this.dataSet.getDouble(j, colIndex);
}
}
// Build V, the matrix containing the data for the nonfixed measurements
// associated with the parents of the getModel (endogenous) latent node
endo_parents_m = new LinkedList<>();
exo_parents_m = new LinkedList<>();
it_p = semGraph.getParents(current).iterator();
while (it_p.hasNext()) {
Node node = it_p.next();
if (node.getNodeType() == NodeType.ERROR) {
continue;
}
List other_measures = new LinkedList<>();
for (Node next : semGraph.getChildren(node)) {
if (next.getNodeType() == NodeType.MEASURED) {
other_measures.add(next);
}
}
if (lx.contains(node)) {
int position = lx.indexOf(node);
other_measures.remove(mx1.get(position));
exo_parents_m.addAll(other_measures);
} else {
int position = ly.indexOf(node);
other_measures.remove(my1.get(position));
endo_parents_m.addAll(other_measures);
}
}
endp_a_m = endo_parents_m.toArray();
exop_a_m = exo_parents_m.toArray();
n = this.dataSet.getNumRows();
c = endp_a_m.length + exop_a_m.length;
double[][] V = new double[n][c];
if (c == 0) {
continue;
}
for (int i = 0; i < endp_a_m.length; i++) {
Node node = ((Node) endp_a_m[i]);
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
for (int j = 0; j < n; j++) {
// V[j][i] = column_data[j];
V[j][i] = this.dataSet.getDouble(j, colIndex);
}
}
for (int i = 0; i < exop_a_m.length; i++) {
Node node = (Node) exop_a_m[i];
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
for (int j = 0; j < n; j++) {
// V[j][endp_a_m.length + i] = column_data[j];
V[j][endp_a_m.length + i] = this.dataSet.getDouble(j, colIndex);
}
}
double[] yi = new double[n];
if (lx.contains(current)) {
int position = lx.indexOf(current);
Node node = mx1.get(position);
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
for (int i = 0; i < n; i++) {
yi[i] = this.dataSet.getDouble(i, colIndex);
}
} else {
int position = ly.indexOf(current);
Node node = my1.get(position);
String name = node.getName();
Node variable = this.dataSet.getVariable(name);
int colIndex = this.dataSet.getVariables().indexOf(variable);
// System.arraycopy(dataSet.getColumnObject(variable).getRawData(), 0, yi, 0, n);
for (int i = 0; i < n; i++) {
yi[i] = this.dataSet.getDouble(i, colIndex);
}
}
// Build Z_hat
double[][] Z_hat = MatrixUtils.product(V, MatrixUtils.product(
MatrixUtils.inverse(
MatrixUtils.product(MatrixUtils.transpose(V), V)),
MatrixUtils.product(MatrixUtils.transpose(V), Z)));
double[] a_hat = MatrixUtils.product(MatrixUtils.inverse(
MatrixUtils.product(MatrixUtils.transpose(Z_hat), Z_hat)),
MatrixUtils.product(MatrixUtils.transpose(Z_hat), yi));
//Set the edge for the fixed measurement
int position = ly.indexOf(current);
semIm.setParamValue(current, my1.get(position), 1.);
// Set the edge for the latents
for (int i = 0; i < endp_a.length; i++) {
semIm.setParamValue((Node) endp_a[i], current, a_hat[i]);
}
for (int i = 0; i < exop_a.length; i++) {
semIm.setParamValue((Node) exop_a[i], current,
a_hat[endp_a.length + i]);
}
if (this.nodeName != null && this.nodeName.equals(current.getName())) {
computeAsymptLatentCovar(yi, a_hat, Z, Z_hat,
this.dataSet.getNumRows());
break;
}
}
//------------------------------------------------------------------
// Estimate freeParameters of the measurement model
// Set the edges of the fixed measurements of exogenous
for (Node current : lx) {
int position = lx.indexOf(current);
semIm.setParamValue(current, mx1.get(position), 1.);
}
for (Node current : observed) {
if (this.nodeName != null && !this.nodeName.equals(current.getName())) {
continue;
}
if (mx1.contains(current) || my1.contains(current)) {
continue;
}
// First, get the parent of this observed
Node current_latent = null;
for (Node node : semGraph.getParents(current)) {
if (node.getNodeType() == NodeType.ERROR) {
continue;
}
current_latent = node;
}
Iterator children =
semGraph.getChildren(current_latent).iterator();
List other_measures = new LinkedList<>();
Node fixed_measurement;
while (children.hasNext()) {
Node next = children.next();
if ((next.getNodeType() == NodeType.MEASURED) &&
next != current) {
other_measures.add(next);
}
}
if (lx.contains(current_latent)) {
int position = lx.indexOf(current_latent);
other_measures.remove(mx1.get(position));
fixed_measurement = mx1.get(position);
} else {
int position = ly.indexOf(current_latent);
other_measures.remove(my1.get(position));
fixed_measurement = my1.get(position);
}
// Regress other_measures over the fixed measurement x1 (y1) correspondent
// to the measurement variable that is being evaluated
int n, c = other_measures.size();
if (c == 0) {
continue;
}
// Build C, the column matrix containing the data for the fixed
// measurement associated with the only latent parent of the getModel
// observed node (as assumed by the structure of our measurement model).
Node variable = this.dataSet.getVariable(fixed_measurement.getName());
int colIndex = this.dataSet.getVariables().indexOf(variable);
double[] C = new double[this.dataSet.getNumRows()];
for (int i = 0; i < this.dataSet.getNumRows(); i++) {
C[i] = this.dataSet.getDouble(colIndex, i);
}
// Build V, the matrix containing the data for the other measurements
// associated with the parents of the (latent) parent of getModel
// observed node. The only difference with respect to the estimation
// of the within-latent coefficients is that here we only include
// the other measurements attached to the parent of the getModel node,
// assuming that the error term of the getModel node is independent
// of the error term of the others and that each measurement is
// taken with respect to only one latent.
n = this.dataSet.getNumRows();
double[][] V = new double[n][c];
for (int i = 0; i < c; i++) {
Node variable2 = this.dataSet.getVariable(
(other_measures.get(i)).getName());
int var2index = this.dataSet.getVariables().indexOf(variable2);
for (int j = 0; j < n; j++) {
// V[j][i] = column_data[j];
V[j][i] = this.dataSet.getDouble(j, var2index);
}
}
double[] yi = new double[n];
Node variable3 = this.dataSet.getVariable((current).getName());
int var3Index = this.dataSet.getVariables().indexOf(variable3);
for (int i = 0; i < n; i++) {
yi[i] = this.dataSet.getDouble(i, var3Index);
}
double[] C_hat = MatrixUtils.product(V, MatrixUtils.product(
MatrixUtils.inverse(
MatrixUtils.product(MatrixUtils.transpose(V), V)),
MatrixUtils.product(MatrixUtils.transpose(V), C)));
double A_hat = MatrixUtils.innerProduct(MatrixUtils.scalarProduct(
1. / MatrixUtils.innerProduct(C_hat, C_hat), C_hat), yi);
// Set the edge for the getModel measurement
semIm.setParamValue(current_latent, current, A_hat);
}
}
private void computeAsymptLatentCovar(double[] y, double[] A_hat,
double[][] Z, double[][] Z_hat, double n) {
double[] yza = MatrixUtils.subtract(y, MatrixUtils.product(Z, A_hat));
double sigma_ui = MatrixUtils.innerProduct(yza, yza) / n;
double[][] asymptLCovar = MatrixUtils.inverse(
MatrixUtils.product(MatrixUtils.transpose(Z_hat), Z_hat));
for (double[] anAsymptLCovar : asymptLCovar) {
for (int j = 0; j < asymptLCovar.length; j++) {
anAsymptLCovar[j] *= sigma_ui;
}
}
}
/*
* Get variance for the edge from source to nodeName
**/
}