All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetradapp.editor.DescriptiveStats Maven / Gradle / Ivy

The newest version!
///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetradapp.editor;

import edu.cmu.tetrad.data.*;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.util.NumberFormatUtil;
import edu.cmu.tetrad.util.StatUtils;
import edu.cmu.tetrad.util.TextTable;
import org.apache.commons.math3.util.FastMath;

import java.text.NumberFormat;
import java.util.Arrays;
import java.util.List;

/**
 * Contains some descriptive stats.
 *
 * @author Michael Freenor
 */
class DescriptiveStats {

    /**
     * Constructs a readable table of normality test results
     *
     * @param dataSet               a {@link edu.cmu.tetrad.data.DataSet} object
     * @param variable              a {@link edu.cmu.tetrad.graph.Node} object
     * @param precomputeCovariances a boolean
     * @return a {@link java.lang.String} object
     */
    public static String generateDescriptiveStats(DataSet dataSet, Node variable,
                                                  boolean precomputeCovariances) {
        NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();

        int col = dataSet.getColumn(variable);

        double[] data = new double[dataSet.getNumRows()];
        boolean continuous = false;

        if (variable instanceof ContinuousVariable) {
            continuous = true;

            for (int i = 0; i < dataSet.getNumRows(); i++) {
                data[i] = dataSet.getDouble(i, col);
            }
        } else {
            try {
                for (int i = 0; i < dataSet.getNumRows(); i++) {
                    DiscreteVariable var = (DiscreteVariable) variable;
                    String category = var.getCategory(dataSet.getInt(i, col));
                    int value = Integer.parseInt(category);
                    data[i] = value;
                }
            } catch (NumberFormatException e) {
                return "Not a numerical discrete column.";
            }
        }

        int numVars = dataSet.getNumRows();

        StringBuilder b = new StringBuilder();

        b.append("Descriptive Statistics for: ").append(variable.getName()).append("\n\n");

        double[] normalValues = DescriptiveStats.normalParams(data);
        TextTable table;

        int numRows = continuous ? 13 : 9;
        table = new TextTable(numVars, numRows);

        int rowindex = 0;

        table.setToken(rowindex, 0, "Sample Size:");
        table.setToken(rowindex++, 1, String.valueOf(dataSet.getNumRows()));

        table.setToken(rowindex, 0, "Mean:");
        table.setToken(rowindex++, 1, nf.format(normalValues[0]));

        table.setToken(rowindex, 0, "Standard Deviation:");
        table.setToken(rowindex++, 1, nf.format(normalValues[1]));

        table.setToken(rowindex, 0, "Variance:");
        table.setToken(rowindex++, 1, nf.format(normalValues[2]));

        table.setToken(rowindex, 0, "Skewness:");
        table.setToken(rowindex++, 1, nf.format(StatUtils.skewness(data)));

        table.setToken(rowindex, 0, "Kurtosis:");
        table.setToken(rowindex++, 1, nf.format(StatUtils.kurtosis(data)));

        if (continuous) {
            double[] median = DescriptiveStats.median(data);

            table.setToken(rowindex, 0, "SE Mean:");
            table.setToken(rowindex++, 1, nf.format(DescriptiveStats.standardErrorMean(normalValues[1], dataSet.getNumRows())));

            table.setToken(rowindex, 0, "Median:");
            table.setToken(rowindex++, 1, nf.format(median[0]));

            table.setToken(rowindex, 0, "Minimum:");
            table.setToken(rowindex++, 1, nf.format(median[1]));

            table.setToken(rowindex, 0, "Maximum:");
            table.setToken(rowindex++, 1, nf.format(median[2]));
        }

        table.setToken(rowindex, 0, "Constant Columns:");
        List constantColumns = DataTransforms.getConstantColumns(dataSet);
        table.setToken(rowindex++, 1, constantColumns.isEmpty() ? "None" : constantColumns.toString());

        table.setToken(rowindex, 0, "Example Nonsingular (2 - 3 vars):");

//        CovarianceMatrix covarianceMatrix = new CovarianceMatrix(dataSet);
        ICovarianceMatrix covarianceMatrix = SimpleDataLoader.getCovarianceMatrix(dataSet, precomputeCovariances);
        List exampleNonsingular = DataUtils.getExampleNonsingular(covarianceMatrix, 3);
        table.setToken(rowindex, 1, exampleNonsingular == null ? "None" : exampleNonsingular.toString());

        b.append(table);

        return b.toString();
    }

    /*
        Returns the median in index 0, but also returns the min and max in 1 and 2 respectively.
     */

    /**
     * 

median.

* * @param data an array of objects * @return an array of objects */ public static double[] median(double[] data) { Arrays.sort(data); double[] result = new double[3]; result[1] = data[0]; result[2] = data[data.length - 1]; if (data.length % 2 == 1) //dataset is odd, finding middle value is easy { result[0] = data[data.length / 2]; } else { //average the two middle values double firstValue = data[data.length / 2]; double secondValue = data[data.length / 2 - 1]; result[0] = (firstValue + secondValue) / 2; } return result; } /** *

standardErrorMean.

* * @param stdDev a double * @param sampleSize a double * @return a double */ public static double standardErrorMean(double stdDev, double sampleSize) { return stdDev / (FastMath.sqrt(sampleSize)); } /** * Given some variable, returns the mean, standard deviation, and variance. * * @param data an array of objects * @return [0] -> mean, [1] -> standard deviation, [2] -> variance */ public static double[] normalParams(double[] data) { double mean = 0.0; double sd = 0.0; //calculate the mean for (double datum : data) { mean += datum; } mean /= data.length; //calculate the standard deviation for (double datum : data) { sd += (datum - mean) * (datum - mean); } sd /= data.length - 1.0; double[] result = new double[3]; result[2] = sd; //this is still the variance at this point sd = FastMath.sqrt(sd); result[0] = mean; result[1] = sd; return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy