All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetrad.search.work_in_progress.MnlrLikelihood Maven / Gradle / Ivy

There is a newer version: 7.6.5
Show newest version
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015 by Peter Spirtes, Richard Scheines,    //
// Joseph Ramsey, and Clark Glymour.                                         //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.search.work_in_progress;

import de.bwaldvogel.liblinear.*;
import edu.cmu.tetrad.data.ContinuousVariable;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.DiscreteVariable;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.search.utils.AdLeafTree;
import edu.cmu.tetrad.util.Matrix;
import edu.cmu.tetrad.util.Vector;
import org.apache.commons.math3.util.FastMath;

import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * Calculates Mixed Variables Polynomial likelihood.
 *
 * 

As for all scores in Tetrad, higher scores mean more dependence, and negative * scores indicate independence.

* * @author Bryan Andrews */ public class MnlrLikelihood { private final DataSet dataSet; // The variables of the continuousData set. private final List variables; // Indices of variables. private final Map nodesHash; // Continuous data only. private final double[][] continuousData; // Discrete data only. private final int[][] discreteData; // Partitions private final AdLeafTree adTree; // Fix degree private final int fDegree; // Structure Prior private final double structurePrior; private final PrintStream original = System.out; private final PrintStream nullout = new PrintStream(new OutputStream() { public void write(int b) { //DO NOTHING } }); /** * Constructor. * * @param dataSet The dataset to analyze. * @param structurePrior The structure prior. * @param fDegree The f degree. */ public MnlrLikelihood(DataSet dataSet, double structurePrior, int fDegree) { if (dataSet == null) { throw new NullPointerException(); } this.dataSet = dataSet; this.variables = dataSet.getVariables(); this.structurePrior = structurePrior; this.fDegree = fDegree; this.continuousData = new double[dataSet.getNumColumns()][]; this.discreteData = new int[dataSet.getNumColumns()][]; for (int j = 0; j < dataSet.getNumColumns(); j++) { Node v = dataSet.getVariable(j); if (v instanceof ContinuousVariable) { double[] col = new double[dataSet.getNumRows()]; for (int i = 0; i < dataSet.getNumRows(); i++) { col[i] = dataSet.getDouble(i, j); } this.continuousData[j] = col; } else if (v instanceof DiscreteVariable) { int[] col = new int[dataSet.getNumRows()]; for (int i = 0; i < dataSet.getNumRows(); i++) { col[i] = dataSet.getInt(i, j); } this.discreteData[j] = col; } } this.nodesHash = new HashMap<>(); for (int j = 0; j < dataSet.getNumColumns(); j++) { Node v = dataSet.getVariable(j); this.nodesHash.put(v, j); } this.adTree = new AdLeafTree(dataSet); } /** * Returns the likelihood of a child given its parents. * * @param child_index The index the child. * @param parents The indices of the parents. * @return The likelihood. */ public double getLik(int child_index, int[] parents) { double lik = 0; Node c = this.variables.get(child_index); List continuous_parents = new ArrayList<>(); List discrete_parents = new ArrayList<>(); for (int p : parents) { Node parent = this.variables.get(p); if (parent instanceof ContinuousVariable) { continuous_parents.add((ContinuousVariable) parent); } else { discrete_parents.add((DiscreteVariable) parent); } } int p = continuous_parents.size(); List> cells = this.adTree.getCellLeaves(discrete_parents); //List> cells = partition(discrete_parents); int[] continuousCols = new int[p]; for (int j = 0; j < p; j++) continuousCols[j] = this.nodesHash.get(continuous_parents.get(j)); for (List cell : cells) { int r = cell.size(); if (r > 1) { double[] mean = new double[p]; double[] var = new double[p]; for (int i = 0; i < p; i++) { for (Integer integer : cell) { mean[i] += this.continuousData[continuousCols[i]][integer]; var[i] += FastMath.pow(this.continuousData[continuousCols[i]][integer], 2); } mean[i] /= r; var[i] /= r; var[i] -= FastMath.pow(mean[i], 2); var[i] = FastMath.sqrt(var[i]); if (Double.isNaN(var[i])) { System.out.println(var[i]); } } int degree = this.fDegree; if (this.fDegree < 1) { degree = (int) FastMath.floor(FastMath.log(r)); } Matrix subset = new Matrix(r, p * degree + 1); for (int i = 0; i < r; i++) { subset.set(i, p * degree, 1); for (int j = 0; j < p; j++) { for (int d = 0; d < degree; d++) { subset.set(i, p * d + j, FastMath.pow((this.continuousData[continuousCols[j]][cell.get(i)] - mean[j]) / var[j], d + 1)); } } } if (c instanceof ContinuousVariable) { Vector target = new Vector(r); for (int i = 0; i < r; i++) { target.set(i, this.continuousData[child_index][cell.get(i)]); } lik += multipleRegression(target, subset); } else { Matrix target = new Matrix(r, ((DiscreteVariable) c).getNumCategories()); for (int i = 0; i < r; i++) { for (int j = 0; j < ((DiscreteVariable) c).getNumCategories(); j++) { target.set(i, j, -1); } target.set(i, this.discreteData[child_index][cell.get(i)], 1); } lik += MultinomialLogisticRegression(target, subset); } } } return lik; } /** * Returns the degrees of freedom of a child given its parents. * * @param child_index The index of the child. * @param parents The indices of the parents. * @return The degrees of freedom. */ public double getDoF(int child_index, int[] parents) { double dof = 0; Node c = this.variables.get(child_index); List continuous_parents = new ArrayList<>(); List discrete_parents = new ArrayList<>(); for (int p : parents) { Node parent = this.variables.get(p); if (parent instanceof ContinuousVariable) { continuous_parents.add((ContinuousVariable) parent); } else { discrete_parents.add((DiscreteVariable) parent); } } List> cells = this.adTree.getCellLeaves(discrete_parents); for (List cell : cells) { int r = cell.size(); if (r > 0) { int degree = this.fDegree; if (this.fDegree < 1) { degree = (int) FastMath.floor(FastMath.log(r)); } if (c instanceof ContinuousVariable) { dof += degree * continuous_parents.size() + 1; } else { dof += ((degree * continuous_parents.size()) + 1) * (((DiscreteVariable) c).getNumCategories() - 1); } } } return dof; } /** * Returns the structur prior for k parents. */ public double getStructurePrior(int k) { if (this.structurePrior < 0) { return getEBICprior(); } double n = this.dataSet.getNumColumns() - 1; double p = this.structurePrior / n; if (this.structurePrior == 0) { return 0; } return k * FastMath.log(p) + (n - k) * FastMath.log(1 - p); } public double getEBICprior() { double n = this.dataSet.getNumColumns(); double gamma = -this.structurePrior; return gamma * FastMath.log(n); } private double multipleRegression(Vector Y, Matrix X) { int n = X.getNumRows(); Vector r; try { Matrix Xt = X.transpose(); Matrix XtX = Xt.times(X); r = X.times(XtX.inverse().times(Xt.times(Y))).minus(Y); } catch (Exception e) { Vector ones = new Vector(n); for (int i = 0; i < n; i++) ones.set(i, 1); r = ones.scalarMult(ones.dotProduct(Y) / (double) n).minus(Y); } double sigma2 = r.dotProduct(r) / n; if (sigma2 <= 0) { Vector ones = new Vector(n); for (int i = 0; i < n; i++) ones.set(i, 1); r = ones.scalarMult(ones.dotProduct(Y) / (double) FastMath.max(n, 2)).minus(Y); sigma2 = r.dotProduct(r) / n; } double lik = -(n / 2.) * (FastMath.log(2 * FastMath.PI) + FastMath.log(sigma2) + 1); if (Double.isInfinite(lik) || Double.isNaN(lik)) { System.out.println(lik); } return lik; } private double MultinomialLogisticRegression(Matrix targets, Matrix subset) { Problem problem = new Problem(); problem.l = targets.getNumRows(); // number of training examples problem.n = subset.getNumColumns(); // number of features problem.x = new FeatureNode[problem.l][problem.n]; // feature nodes problem.bias = 0; for (int i = 0; i < problem.l; i++) { for (int j = 0; j < problem.n; j++) { problem.x[i][j] = new FeatureNode(j + 1, subset.get(i, j)); } } final SolverType solver = SolverType.L2R_LR; // -s 0 final double C = 1.0; // cost of constraints violation final double eps = 1e-4; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); ArrayList models = new ArrayList<>(); double lik = 0; double num; double den; for (int i = 0; i < targets.getNumColumns(); i++) { System.setOut(this.nullout); problem.y = targets.getColumn(i).toArray(); // target values models.add(i, Linear.train(problem, parameter)); System.setOut(this.original); } for (int j = 0; j < problem.l; j++) { num = 0; den = 0; for (int i = 0; i < targets.getNumColumns(); i++) { double[] p = new double[models.get(i).getNrClass()]; Linear.predictProbability(models.get(i), problem.x[j], p); if (targets.get(j, i) == 1) { num = p[0]; den += p[0]; } else if (p.length > 1) { den += p[0]; } } lik += FastMath.log(num / den); } return lik; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy