edu.cmu.tetrad.bayes.BayesIm Maven / Gradle / Ivy
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below. //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard //
// Scheines, Joseph Ramsey, and Clark Glymour. //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with this program; if not, write to the Free Software //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
///////////////////////////////////////////////////////////////////////////////
package edu.cmu.tetrad.bayes;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.Simulator;
import edu.cmu.tetrad.data.VariableSource;
import edu.cmu.tetrad.graph.Graph;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.util.Im;
import java.util.List;
/**
* Interface implemented by Bayes instantiated models. For purposes of clarification, we distinguish a Bayes parametric
* model from a Bayes instantiated model. The former provides enough information for us to know what the parameters of
* the Bayes net are, given that we know the graph of the Bayes net--i.e., it tells us how many categories each variable
* has and what the names of those categories are. It does not, however, tell us what the value of each parameter is;
* information about the value of each parameter in the Bayes net is provided in the Bayes instantiated model. This
* information is organized, variable by variable, in conditional probability tables. For each variable, a table is
* stored representing enough information to recover the conditional probability of each value of each variable given
* each combination of values of the parents of the variable in the graph. The rows of the table are the combinations of
* parent values of the variable, and the columns of the table are variable values of the variable. Most of the method
* in this interface are designed mainly to allow these values to be set and retrieved. A few methods are dedicated to
* bookkeeping chores, like clearing tables or initializing them randomly. One special method (simulateData) is
* dedicated to the task of generating randomly simulated data sets consistent with the conditional probabilities
* implied by the information stored in the conditional probability tables of the Bayes net. See implementations for
* details.
*
* @author josephramsey
* @version $Id: $Id
* @see edu.cmu.tetrad.graph.Dag
* @see BayesPm
*/
public interface BayesIm extends VariableSource, Im, Simulator {
/**
* Returns the underlying Bayes PM.
*
* @return the underlying Bayes PM.
*/
BayesPm getBayesPm();
/**
* $Description
*
* @return the underlying DAG.
*/
Graph getDag();
/**
* Returns the name of the given node.
*
* @return the number of nodes in the model.
*/
int getNumNodes();
/**
* Returns the name of the given node.
*
* @param nodeIndex the index of the node.
* @return the node corresponding to the given node index.
*/
Node getNode(int nodeIndex);
/**
* Returns the name of the given node.
*
* @param name the name of the node.
* @return the node with the given name in the associated graph.
*/
Node getNode(String name);
/**
* Returns the index of the given node.
*
* @param node the given node.
* @return the index for that node, or -1 if the node is not in the BayesIm.
*/
int getNodeIndex(Node node);
/**
* Returns the list of variables.
*
* @return the list of variable for this Bayes net.
*/
List getVariables();
/**
* Returns the list of variable names.
*
* @return the list of variable names for this Bayes net.
*/
List getVariableNames();
/**
* Returns the list of measured variables.
*
* @return the list of measured variableNodes.
*/
List getMeasuredNodes();
/**
* Returns the number of columns.
*
* @param nodeIndex the index of the node.
* @return the number of columns in the table of the given node N with index 'nodeIndex'--that is, the number of
* possible values that N can take on. That is, if P(N=v0 | P1=v1, P2=v2, ... Pn=vn) is a conditional probability
* stored in 'probs', then the maximum number of rows in the table for N is #vals(N).
* @see #getNumRows
*/
int getNumColumns(int nodeIndex);
/**
* Returns the number of rows.
*
* @param nodeIndex the index of the node.
* @return the number of rows in the table of the given node, which would be the total number of possible
* combinations of parent values for a given node. That is, if P(N=v0 | P1=v1, P2=v2, ... Pn=vn) is a conditional
* probability stored in 'probs', then the maximum number of rows in the table for N is #vals(P1) x #vals(P2) x ...
* x #vals(Pn).
* @see #getRowIndex
* @see #getNumColumns
*/
int getNumRows(int nodeIndex);
/**
* Returns the number of parents for the given node.
*
* @param nodeIndex the given node.
* @return the number of parents of the given node.
*/
int getNumParents(int nodeIndex);
/**
* Returns the ith parent of the givne node.
*
* @param nodeIndex the index of the node.
* @param parentIndex the index of the parent.
* @return the given parent of the given node.
*/
int getParent(int nodeIndex, int parentIndex);
/**
* Returns the dimension of the given parent for the given node.
*
* @param nodeIndex the index of the node.
* @param parentIndex the index of the parent.
* @return the dimension of the given parent for the given node.
*/
int getParentDim(int nodeIndex, int parentIndex);
/**
* Returns the dimensions of the pararents of the given node.
*
* @param nodeIndex the index of the node.
* @return (a defensive copy of) the array representing the dimensionality of each parent of a node, that is, the
* number of values which that node can take on. The order of entries in this array is the same as the order of
* entries of nodes returned by getParents() for that node.
* @see #getParents
*/
int[] getParentDims(int nodeIndex);
/**
* Returns the parents of the given node.
*
* @param nodeIndex the index of the node.
* @return (a defensive copy of) the array containing all of the parents of a given node in the order in which they
* are stored internally.
* @see #getParentDims
*/
int[] getParents(int nodeIndex);
/**
* Returns the parents values of the given node.
*
* @param nodeIndex the index of the node.
* @param rowIndex the index of the row in question.
* @return an array containing the combination of parent values for a given node and given row in the probability
* table for that node. To get the combination of parent values from the row number, the row number is represented
* using a variable-base place value system, where the bases for each place value are the dimensions of the parents
* in the order in which they are given by getParentDims(). For instance, if the row number (base 10) is 103 and the
* parent dimension array is [3 5 7], we calculate the first value as 103 / 7 = 14 with a remainder of 5. We then
* divide 14 / 5 = 2 with a remainder of 4. We then divide 2 / 3 = 0 with a remainder of 2. The variable place value
* representation is [2 4 5], which is the combination of parent values. This is the inverse function of
* getRowIndex().
* @see #getNodeIndex
* @see #getRowIndex
*/
int[] getParentValues(int nodeIndex, int rowIndex);
/**
* Returns the given parent value.
*
* @param nodeIndex the index of the node.
* @param rowIndex the index of the row in question.
* @param colIndex the index of the column in question.
* @return the value in the probability table for the given node, at the given row and column.
*/
int getParentValue(int nodeIndex, int rowIndex, int colIndex);
/**
* Returns the probability for the given cell in the given CPT.
*
* @param nodeIndex the index of the node in question.
* @param rowIndex the row in the table for this for node which represents the combination of parent values in
* question.
* @param colIndex the column in the table for this node which represents the value of the node in question.
* @return the probability for the given node at the given row and column in the table for that node. To get the
* node index, use getNodeIndex(). To get the row index, use getRowIndex(). To get the column index, use
* getCategoryIndex() from the underlying BayesPm(). The value returned will represent a conditional probability of
* the form P(N=v0 | P1=v1, P2=v2, ... , Pn=vn), where N is the node referenced by nodeIndex, v0 is the value
* referenced by colIndex, and the combination of parent values indicated is the combination indicated by rowIndex.
* @see #getNodeIndex
* @see #getRowIndex
*/
double getProbability(int nodeIndex, int rowIndex, int colIndex);
/**
* Returns a row index.
*
* @param nodeIndex the index of the node in question.
* @param values the combination of parent values in question.
* @return the row in the table at which the given combination of parent values is represented for the given node.
* The row is calculated as a variable-base place-value number. For instance, if the array of parent dimensions is
* [3, 5, 7] and the parent value combination is [2, 4, 5], then the row number is (7 * (5 * (3 * 0 + 2) + 4)) + 5 =
* 103. This is the inverse function to getVariableValues().
*
* Note: If the node has n values, the length of 'values' must be >= the number of parents. Only the first n
* values are used.
* @see #getParentValues
*/
int getRowIndex(int nodeIndex, int[] values);
/**
* Normalizes all rows in the tables associated with each of node in turn.
*/
void normalizeAll();
/**
* Normalizes all rows in the table associated with a given node.
*
* @param nodeIndex the index of the node in question.
*/
void normalizeNode(int nodeIndex);
/**
* Normalizes the given row.
*
* @param nodeIndex the index of the node in question.
* @param rowIndex the index of the row in question.
*/
void normalizeRow(int nodeIndex, int rowIndex);
/**
* Sets the probability for the given node at a given row and column in the table for that node. To get the node
* index, use getNodeIndex(). To get the row index, use getRowIndex(). To get the column index, use
* getCategoryIndex() from the underlying BayesPm(). The value returned will represent a conditional probability of
* the form P(N=v0 | P1=v1, P2=v2, ... , Pn=vn), where N is the node referenced by nodeIndex, v0 is the value
* referenced by colIndex, and the combination of parent values indicated is the combination indicated by rowIndex.
*
* @param nodeIndex the index of the node in question.
* @param rowIndex the row in the table for this for node which represents the combination of parent values in
* question.
* @param colIndex the column in the table for this node which represents the value of the node in question.
* @param value the desired probability to be set.
* @see #getProbability
*/
void setProbability(int nodeIndex, int rowIndex, int colIndex, double value);
/**
* Sets the probability for the given node. The matrix row represent row index, the row in the table for this for
* node which represents the combination of parent values in question. of the CPT. The matrix column represent
* column index, the column in the table for this node which represents the value of the node in question.
*
* @param nodeIndex the index of the node in question.
* @param probMatrix a matrix containing probabilities of a node along with its parents
*/
void setProbability(int nodeIndex, double[][] probMatrix);
/**
* Returns the index of the given node in the given BayesIm.
*
* @param otherBayesIm the BayesIm in which the node is to be found.
* @param nodeIndex the index of the node in this BayesIm.
* @return the index of the node with the given name in the specified BayesIm.
*/
int getCorrespondingNodeIndex(int nodeIndex, BayesIm otherBayesIm);
/**
* Assigns random probability values to the child values of this row that add to 1.
*
* @param nodeIndex the node for the table that this row belongs to.
* @param rowIndex the index of the row.
*/
void clearRow(int nodeIndex, int rowIndex);
/**
* Assigns random probability values to the child values of this row that add to 1.
*
* @param nodeIndex the node for the table that this row belongs to.
* @param rowIndex the index of the row.
*/
void randomizeRow(int nodeIndex, int rowIndex);
/**
* Randomizes any row in the table for the given node index that has a Double.NaN value in it.
*
* @param nodeIndex the node for the table whose incomplete rows are to be randomized.
*/
void randomizeIncompleteRows(int nodeIndex);
/**
* Randomizes every row in the table for the given node index.
*
* @param nodeIndex the node for the table to be randomized.
*/
void randomizeTable(int nodeIndex);
/**
* Randomizes every row in the table for the given node index.
*
* @param nodeIndex the node for the table to be randomized.
*/
void clearTable(int nodeIndex);
/**
* Returns true iff the given row in the given node has a Double.NaN value in it.
*
* @param nodeIndex the node for the table whose incomplete rows are to be checked.
* @param rowIndex the index of the row in question.
* @return true iff one of the values in the given row is Double.NaN.
*/
boolean isIncomplete(int nodeIndex, int rowIndex);
/**
* Returns true iff the given node has a Double.NaN value in it.
*
* @param nodeIndex the node for the table whose incomplete rows are to be checked.
* @return true iff any value in the table for the given node is Double.NaN.
*/
boolean isIncomplete(int nodeIndex);
/**
* Simulates a data set with the specified number of rows.
*
* @param sampleSize the number of rows to simulate.
* @param latentDataSaved if true, latent variables are saved in the data set.
* @return the simulated data set.
*/
DataSet simulateData(int sampleSize, boolean latentDataSaved);
/**
* Simulates data based on the provided data set and saves the latent data if specified.
*
* @param dataSet the data set to simulate data for
* @param latentDataSaved a boolean value indicating whether the latent data should be saved or not
* @return the simulated data set
*/
DataSet simulateData(DataSet dataSet, boolean latentDataSaved);
/**
* Returns true iff this Bayes net is equal to the given Bayes net. The sense of equality may vary depending on the
* type of Bayes net.
*
* @param o the Bayes net to be compared to this Bayes net.
* @return true iff this bayes net is equal to the given Bayes net. The sense of equality may vary depending on the
* type of Bayes net.
*/
boolean equals(Object o);
/**
* Returns a string representation for this Bayes net.
*
* @return a string representation for this Bayes net.
*/
String toString();
/**
* Retrieves the CptMapType for this instance.
*
* @return the CptMapType for this instance
*/
default MlBayesIm.CptMapType getCptMapType() {
return MlBayesIm.CptMapType.PROB_MAP;
}
}