edu.cmu.tetrad.search.work_in_progress.MnlrLikelihood Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of tetrad-lib Show documentation
There is a newer version: 7.6.5
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015 by Peter Spirtes, Richard Scheines,    //
// Joseph Ramsey, and Clark Glymour.                                         //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.search.work_in_progress;

import de.bwaldvogel.liblinear.*;
import edu.cmu.tetrad.data.ContinuousVariable;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.data.DiscreteVariable;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.search.utils.AdLeafTree;
import edu.cmu.tetrad.util.Matrix;
import edu.cmu.tetrad.util.Vector;
import org.apache.commons.math3.util.FastMath;

import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * Calculates Mixed Variables Polynomial likelihood.
 *
 * As for all scores in Tetrad, higher scores mean more dependence, and negative
 * scores indicate independence.
 *
 * @author Bryan Andrews
 */
public class MnlrLikelihood {

    private final DataSet dataSet;

    // The variables of the continuousData set.
    private final List variables;

    // Indices of variables.
    private final Map nodesHash;

    // Continuous data only.
    private final double[][] continuousData;

    // Discrete data only.
    private final int[][] discreteData;

    // Partitions
    private final AdLeafTree adTree;

    // Fix degree
    private final int fDegree;

    // Structure Prior
    private final double structurePrior;

    private final PrintStream original = System.out;

    private final PrintStream nullout = new PrintStream(new OutputStream() {
        public void write(int b) {
            //DO NOTHING
        }
    });

    /**
     * Constructor.
     *
     * @param dataSet        The dataset to analyze.
     * @param structurePrior The structure prior.
     * @param fDegree        The f degree.
     */
    public MnlrLikelihood(DataSet dataSet, double structurePrior, int fDegree) {

        if (dataSet == null) {
            throw new NullPointerException();
        }

        this.dataSet = dataSet;
        this.variables = dataSet.getVariables();
        this.structurePrior = structurePrior;
        this.fDegree = fDegree;

        this.continuousData = new double[dataSet.getNumColumns()][];
        this.discreteData = new int[dataSet.getNumColumns()][];
        for (int j = 0; j < dataSet.getNumColumns(); j++) {
            Node v = dataSet.getVariable(j);
            if (v instanceof ContinuousVariable) {
                double[] col = new double[dataSet.getNumRows()];
                for (int i = 0; i < dataSet.getNumRows(); i++) {
                    col[i] = dataSet.getDouble(i, j);
                }
                this.continuousData[j] = col;
            } else if (v instanceof DiscreteVariable) {
                int[] col = new int[dataSet.getNumRows()];
                for (int i = 0; i < dataSet.getNumRows(); i++) {
                    col[i] = dataSet.getInt(i, j);
                }
                this.discreteData[j] = col;
            }
        }

        this.nodesHash = new HashMap<>();
        for (int j = 0; j < dataSet.getNumColumns(); j++) {
            Node v = dataSet.getVariable(j);
            this.nodesHash.put(v, j);
        }

        this.adTree = new AdLeafTree(dataSet);

    }

    /**
     * Returns the likelihood of a child given its parents.
     *
     * @param child_index The index the child.
     * @param parents     The indices of the parents.
     * @return The likelihood.
     */
    public double getLik(int child_index, int[] parents) {

        double lik = 0;
        Node c = this.variables.get(child_index);
        List continuous_parents = new ArrayList<>();
        List discrete_parents = new ArrayList<>();
        for (int p : parents) {
            Node parent = this.variables.get(p);
            if (parent instanceof ContinuousVariable) {
                continuous_parents.add((ContinuousVariable) parent);
            } else {
                discrete_parents.add((DiscreteVariable) parent);
            }
        }

        int p = continuous_parents.size();

        List> cells = this.adTree.getCellLeaves(discrete_parents);
        //List> cells = partition(discrete_parents);

        int[] continuousCols = new int[p];
        for (int j = 0; j < p; j++) continuousCols[j] = this.nodesHash.get(continuous_parents.get(j));

        for (List cell : cells) {
            int r = cell.size();
            if (r > 1) {

                double[] mean = new double[p];
                double[] var = new double[p];
                for (int i = 0; i < p; i++) {
                    for (Integer integer : cell) {
                        mean[i] += this.continuousData[continuousCols[i]][integer];
                        var[i] += FastMath.pow(this.continuousData[continuousCols[i]][integer], 2);
                    }
                    mean[i] /= r;
                    var[i] /= r;
                    var[i] -= FastMath.pow(mean[i], 2);
                    var[i] = FastMath.sqrt(var[i]);

                    if (Double.isNaN(var[i])) {
                        System.out.println(var[i]);
                    }
                }

                int degree = this.fDegree;
                if (this.fDegree < 1) {
                    degree = (int) FastMath.floor(FastMath.log(r));
                }
                Matrix subset = new Matrix(r, p * degree + 1);
                for (int i = 0; i < r; i++) {
                    subset.set(i, p * degree, 1);
                    for (int j = 0; j < p; j++) {
                        for (int d = 0; d < degree; d++) {
                            subset.set(i, p * d + j, FastMath.pow((this.continuousData[continuousCols[j]][cell.get(i)] - mean[j]) / var[j], d + 1));
                        }
                    }
                }

                if (c instanceof ContinuousVariable) {
                    Vector target = new Vector(r);
                    for (int i = 0; i < r; i++) {
                        target.set(i, this.continuousData[child_index][cell.get(i)]);
                    }
                    lik += multipleRegression(target, subset);
                } else {
                    Matrix target = new Matrix(r, ((DiscreteVariable) c).getNumCategories());
                    for (int i = 0; i < r; i++) {
                        for (int j = 0; j < ((DiscreteVariable) c).getNumCategories(); j++) {
                            target.set(i, j, -1);
                        }
                        target.set(i, this.discreteData[child_index][cell.get(i)], 1);
                    }
                    lik += MultinomialLogisticRegression(target, subset);
                }
            }
        }

        return lik;
    }

    /**
     * Returns the degrees of freedom of a child given its parents.
     *
     * @param child_index The index of the child.
     * @param parents     The indices of the parents.
     * @return The degrees of freedom.
     */
    public double getDoF(int child_index, int[] parents) {

        double dof = 0;
        Node c = this.variables.get(child_index);
        List continuous_parents = new ArrayList<>();
        List discrete_parents = new ArrayList<>();
        for (int p : parents) {
            Node parent = this.variables.get(p);
            if (parent instanceof ContinuousVariable) {
                continuous_parents.add((ContinuousVariable) parent);
            } else {
                discrete_parents.add((DiscreteVariable) parent);
            }
        }

        List> cells = this.adTree.getCellLeaves(discrete_parents);

        for (List cell : cells) {
            int r = cell.size();
            if (r > 0) {

                int degree = this.fDegree;
                if (this.fDegree < 1) {
                    degree = (int) FastMath.floor(FastMath.log(r));
                }
                if (c instanceof ContinuousVariable) {
                    dof += degree * continuous_parents.size() + 1;
                } else {
                    dof += ((degree * continuous_parents.size()) + 1) * (((DiscreteVariable) c).getNumCategories() - 1);
                }
            }
        }

        return dof;
    }

    /**
     * Returns the structur prior for k parents.
     */
    public double getStructurePrior(int k) {

        if (this.structurePrior < 0) {
            return getEBICprior();
        }

        double n = this.dataSet.getNumColumns() - 1;
        double p = this.structurePrior / n;

        if (this.structurePrior == 0) {
            return 0;
        }
        return k * FastMath.log(p) + (n - k) * FastMath.log(1 - p);

    }

    public double getEBICprior() {

        double n = this.dataSet.getNumColumns();
        double gamma = -this.structurePrior;
        return gamma * FastMath.log(n);

    }

    private double multipleRegression(Vector Y, Matrix X) {

        int n = X.getNumRows();
        Vector r;

        try {
            Matrix Xt = X.transpose();
            Matrix XtX = Xt.times(X);
            r = X.times(XtX.inverse().times(Xt.times(Y))).minus(Y);
        } catch (Exception e) {
            Vector ones = new Vector(n);
            for (int i = 0; i < n; i++) ones.set(i, 1);
            r = ones.scalarMult(ones.dotProduct(Y) / (double) n).minus(Y);
        }

        double sigma2 = r.dotProduct(r) / n;

        if (sigma2 <= 0) {
            Vector ones = new Vector(n);
            for (int i = 0; i < n; i++) ones.set(i, 1);
            r = ones.scalarMult(ones.dotProduct(Y) / (double) FastMath.max(n, 2)).minus(Y);
            sigma2 = r.dotProduct(r) / n;
        }

        double lik = -(n / 2.) * (FastMath.log(2 * FastMath.PI) + FastMath.log(sigma2) + 1);

        if (Double.isInfinite(lik) || Double.isNaN(lik)) {
            System.out.println(lik);
        }

        return lik;
    }

    private double MultinomialLogisticRegression(Matrix targets, Matrix subset) {

        Problem problem = new Problem();
        problem.l = targets.getNumRows(); // number of training examples
        problem.n = subset.getNumColumns(); // number of features
        problem.x = new FeatureNode[problem.l][problem.n]; // feature nodes
        problem.bias = 0;
        for (int i = 0; i < problem.l; i++) {
            for (int j = 0; j < problem.n; j++) {
                problem.x[i][j] = new FeatureNode(j + 1, subset.get(i, j));
            }
        }
        final SolverType solver = SolverType.L2R_LR; // -s 0
        final double C = 1.0;    // cost of constraints violation
        final double eps = 1e-4; // stopping criteria
        Parameter parameter = new Parameter(solver, C, eps);
        ArrayList models = new ArrayList<>();
        double lik = 0;
        double num;
        double den;

        for (int i = 0; i < targets.getNumColumns(); i++) {
            System.setOut(this.nullout);
            problem.y = targets.getColumn(i).toArray(); // target values
            models.add(i, Linear.train(problem, parameter));
            System.setOut(this.original);
        }

        for (int j = 0; j < problem.l; j++) {
            num = 0;
            den = 0;
            for (int i = 0; i < targets.getNumColumns(); i++) {
                double[] p = new double[models.get(i).getNrClass()];
                Linear.predictProbability(models.get(i), problem.x[j], p);
                if (targets.get(j, i) == 1) {
                    num = p[0];
                    den += p[0];
                } else if (p.length > 1) {
                    den += p[0];
                }
            }
            lik += FastMath.log(num / den);
        }

        return lik;

    }

}