All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetradapp.editor.QQPlot Maven / Gradle / Ivy

The newest version!
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetradapp.editor;

import cern.jet.random.engine.MersenneTwister;
import edu.cmu.tetrad.data.ContinuousVariable;
import edu.cmu.tetrad.data.DataSet;
import edu.cmu.tetrad.graph.Node;
import org.apache.commons.math3.util.FastMath;

import javax.swing.*;

/**
 * Immutable object that wraps a dataset and gives a q-q plot.
 *
 * @author Michael Freenor
 */
class QQPlot {


    /**
     * The complete data set
     */
    private final DataSet dataSet;

    /**
     * The variable that we are showing a q-q plot for.
     */
    private ContinuousVariable selectedVariable;

    /**
     * The variable that we store the comparison variable in
     */
    private double[] comparisonVariable;

    /**
     * The min value in the comparison distribution
     */

    private double minComparison;

    /**
     * The max value in the comparison distribution
     */

    private double maxComparison;

    /**
     * The min value in the sample
     */
    private double minData;


    /**
     * The max value in the sample
     */
    private double maxData;

    /**
     * Constructs the histogram given the dataset to wrap and the node that should be viewed.
     *
     * @param dataSet      a {@link edu.cmu.tetrad.data.DataSet} object
     * @param selectedNode a {@link edu.cmu.tetrad.graph.Node} object
     */
    public QQPlot(DataSet dataSet, Node selectedNode) {

        if (dataSet == null) {
            throw new NullPointerException("the given dataset must not be null");
        }
        if (dataSet.getNumColumns() == 0) {
            throw new IllegalArgumentException("The given dataset should not be empty");
        }

        for (int i = 0; i < dataSet.getNumColumns(); i++) {
            if (dataSet.getVariable(i) instanceof ContinuousVariable) {
                break;
            }
            if (i == dataSet.getNumColumns() - 1) {
                JOptionPane.showMessageDialog(new JFrame(), "You must have at least one continuous variable to construct a q-q plot!");
                throw new IllegalArgumentException("You must have at least one continuous variable to construct a q-q plot!");
            }
        }

        this.dataSet = dataSet.copy();
        if (selectedNode == null && dataSet.getNumColumns() != 0) {
            int[] selected = dataSet.getSelectedIndices();
            assert selected != null;
        }

        try {
            this.selectedVariable = (ContinuousVariable) selectedNode;
        } catch (Exception e) {
            JOptionPane.showMessageDialog(new JFrame(), "You cannot construct a q-q plot for a discrete variable!");
            throw new IllegalArgumentException("Only attempt to construct a q-q plot on a continuous variable!");
        }

        buildQQPlotData(this.selectedVariable);

    }

    //==================================== Public Methods ====================================//

    /**
     * @param quantile  Desired quantile you wish to find
     * @param low       The minimum of your dataset
     * @param high      The maximum of your dataset
     * @param n         Your normal distribution you wish to search among
     * @param precision The desired precision of your search (in quantiles)
     * @param count     Feed this zero -- ensures the stack doesn't fill up
     * @param searchCap Desired maximum number of searches -- too high and the stack might overflow!
     * @return an estimation of the point in a Normal distribution at a specific quantile.
     */
    private static double findQuantile(double quantile, double low, double high, cern.jet.random.Normal n, double precision, int count, int searchCap) {
        //System.out.println("Low: " + low + "High: " + high);
        double mid = low + ((high - low) / 2.);
        //System.out.println("Mid: " + mid);
        double cdfResult = n.cdf(mid);
        //System.out.println("CDF: " + cdfResult + " Abs value of difference: " + FastMath.abs(cdfResult - quantile) + " Count: " + count);
        if (
                FastMath.abs(cdfResult - quantile) < precision || count > searchCap) {
            //System.out.println("Found result: " + mid);
            return mid;
        } else {
            if (cdfResult > quantile) {
                //System.out.println("Searching lesser");
                return QQPlot.findQuantile(quantile, low, mid - precision, n, precision, count + 1, searchCap);
            } else {
                //System.out.println("Searching greater");
                return QQPlot.findQuantile(quantile, mid + precision, high, n, precision, count + 1, searchCap);
            }
        }
    }

    /**
     * 

getMaxSample.

* * @return the max sample value. */ public double getMaxSample() { return this.maxData; } /** *

getMinSample.

* * @return the min sample value. */ public double getMinSample() { return this.minData; } /** *

getMinIdeal.

* * @return the min comparison value. */ public double getMinIdeal() { return this.minComparison; } /** *

Getter for the field selectedVariable.

* * @return the node that has been selected. */ public Node getSelectedVariable() { return this.selectedVariable; } /** *

Setter for the field selectedVariable.

* * @param c a {@link edu.cmu.tetrad.data.ContinuousVariable} object */ public void setSelectedVariable(ContinuousVariable c) { this.selectedVariable = c; } /** *

Getter for the field comparisonVariable.

* * @return an array of objects */ public double[] getComparisonVariable() { return this.comparisonVariable; } //============================ Private Methods =======================// /** *

Getter for the field dataSet.

* * @return a {@link edu.cmu.tetrad.data.DataSet} object */ public DataSet getDataSet() { return this.dataSet; } /** * Calculates the ideal quantiles values for the provided dataset. * * @param n Normal distribution generated from the dataset. * @param data Dataset that n is generated from, and whose normality is in question. */ private void calculateComparisonSet(cern.jet.random.Normal n, DataSet data) { this.comparisonVariable = new double[data.getNumRows()]; for (int i = 0; i < data.getNumRows(); i++) { double valueAtQuantile = QQPlot.findQuantile((i + 1) / (data.getNumRows() + 1.0), this.minData, this.maxData, n, .0001, 0, 50); this.comparisonVariable[i] = valueAtQuantile; if (valueAtQuantile < this.minComparison) { this.minComparison = valueAtQuantile; } if (valueAtQuantile > this.maxComparison) { this.maxComparison = valueAtQuantile; } } } /** * Builds the q-q data if required, otherwise does nothing */ private void buildQQPlotData(Node selectedNode) { int columnIndex = this.dataSet.getColumn(selectedNode); double mean = 0.0; double sd = 0.0; this.minData = 10000000000000.0; this.maxData = 0.0; this.minComparison = 1000000000000.0; this.maxComparison = 0.0; //the only case in which this should be -1 is if there's a continuous variable, but it's incomplete if (columnIndex == -1) { for (int i = 0; i < this.dataSet.getNumColumns(); i++) { //set selected variable if there is none if (this.dataSet.getVariable(i) instanceof ContinuousVariable) { this.selectedVariable = (ContinuousVariable) this.dataSet.getVariable(i); columnIndex = i; break; } } if (columnIndex == -1) { JOptionPane.showMessageDialog(new JFrame(), "You need at least one complete continuous variable for a q-q plot!"); throw new IllegalArgumentException("You need at least one complete continuous variable for a q-q plot!"); } } for (int i = 0; i < this.dataSet.getNumRows(); i++) { double value = this.dataSet.getDouble(i, columnIndex); if (Double.isNaN(value) || value == Double.NEGATIVE_INFINITY || value == Double.POSITIVE_INFINITY) { continue; } mean += value; if (value < this.minData) this.minData = value; if (value > this.maxData) this.maxData = value; } //sort the dataset for (int i = 0; i < this.dataSet.getNumRows(); i++) { for (int k = i; k < this.dataSet.getNumRows(); k++) { double value1 = this.dataSet.getDouble(i, columnIndex); double value2 = this.dataSet.getDouble(k, columnIndex); if (Double.isNaN(value1) || value1 == Double.NEGATIVE_INFINITY || value1 == Double.POSITIVE_INFINITY) { continue; } if (Double.isNaN(value2) || value2 == Double.NEGATIVE_INFINITY || value2 == Double.POSITIVE_INFINITY) { continue; } if (value1 > value2) { double temp = this.dataSet.getDouble(i, columnIndex); this.dataSet.setDouble(i, columnIndex, value2); this.dataSet.setDouble(k, columnIndex, temp); } } } if (mean == 0.0) mean = 1.0; else mean /= this.dataSet.getNumRows(); for (int i = 0; i < this.dataSet.getNumRows(); i++) { double value1 = this.dataSet.getDouble(i, columnIndex); double value2 = this.dataSet.getDouble(i, columnIndex); if (Double.isNaN(value1) || value1 == Double.NEGATIVE_INFINITY || value1 == Double.POSITIVE_INFINITY) { continue; } if (Double.isNaN(value2) || value2 == Double.NEGATIVE_INFINITY || value2 == Double.POSITIVE_INFINITY) { continue; } sd += (value1 - mean) * (value2 - mean); } if (sd == 0.0) { sd = 1.0; } else { sd /= this.dataSet.getNumRows() - 1.0; sd = FastMath.sqrt(sd); } //System.out.println("Mean: " + mean + " SD: " + sd + " Min: " + this.minData + " Max: " + this.maxData); cern.jet.random.Normal comparison = new cern.jet.random.Normal(mean, sd, new MersenneTwister()); calculateComparisonSet(comparison, this.dataSet); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy