All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jfree.data.statistics.Regression Maven / Gradle / Ivy

/* ===========================================================
 * JFreeChart : a free chart library for the Java(tm) platform
 * ===========================================================
 *
 * (C) Copyright 2000-2009, by Object Refinery Limited and Contributors.
 *
 * Project Info:  http://www.jfree.org/jfreechart/index.html
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 * USA.
 *
 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
 * in the United States and other countries.]
 *
 * ---------------
 * Regression.java
 * ---------------
 * (C) Copyright 2002-2009, by Object Refinery Limited.
 *
 * Original Author:  David Gilbert (for Object Refinery Limited);
 * Contributor(s):   Peter Kolb (patch 2795746);
 *
 * Changes
 * -------
 * 30-Sep-2002 : Version 1 (DG);
 * 18-Aug-2003 : Added 'abstract' (DG);
 * 15-Jul-2004 : Switched getX() with getXValue() and getY() with
 *               getYValue() (DG);
 * 29-May-2009 : Added support for polynomial regression, see patch 2795746
 *               by Peter Kolb (DG);
 *
 */

package org.jfree.data.statistics;

import org.jfree.data.xy.XYDataset;

/**
 * A utility class for fitting regression curves to data.
 */
public abstract class Regression {

    /**
     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
     * the data using ordinary least squares regression.  The result is
     * returned as a double[], where result[0] --> a, and result[1] --> b.
     *
     * @param data  the data.
     *
     * @return The parameters.
     */
    public static double[] getOLSRegression(double[][] data) {

        int n = data.length;
        if (n < 2) {
            throw new IllegalArgumentException("Not enough data.");
        }

        double sumX = 0;
        double sumY = 0;
        double sumXX = 0;
        double sumXY = 0;
        for (int i = 0; i < n; i++) {
            double x = data[i][0];
            double y = data[i][1];
            sumX += x;
            sumY += y;
            double xx = x * x;
            sumXX += xx;
            double xy = x * y;
            sumXY += xy;
        }
        double sxx = sumXX - (sumX * sumX) / n;
        double sxy = sumXY - (sumX * sumY) / n;
        double xbar = sumX / n;
        double ybar = sumY / n;

        double[] result = new double[2];
        result[1] = sxy / sxx;
        result[0] = ybar - result[1] * xbar;

        return result;

    }

    /**
     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
     * the data using ordinary least squares regression. The result is returned
     * as a double[], where result[0] --> a, and result[1] --> b.
     *
     * @param data  the data.
     * @param series  the series (zero-based index).
     *
     * @return The parameters.
     */
    public static double[] getOLSRegression(XYDataset data, int series) {

        int n = data.getItemCount(series);
        if (n < 2) {
            throw new IllegalArgumentException("Not enough data.");
        }

        double sumX = 0;
        double sumY = 0;
        double sumXX = 0;
        double sumXY = 0;
        for (int i = 0; i < n; i++) {
            double x = data.getXValue(series, i);
            double y = data.getYValue(series, i);
            sumX += x;
            sumY += y;
            double xx = x * x;
            sumXX += xx;
            double xy = x * y;
            sumXY += xy;
        }
        double sxx = sumXX - (sumX * sumX) / n;
        double sxy = sumXY - (sumX * sumY) / n;
        double xbar = sumX / n;
        double ybar = sumY / n;

        double[] result = new double[2];
        result[1] = sxy / sxx;
        result[0] = ybar - result[1] * xbar;

        return result;

    }

    /**
     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
     * the data using a power regression equation.  The result is returned as
     * an array, where double[0] --> a, and double[1] --> b.
     *
     * @param data  the data.
     *
     * @return The parameters.
     */
    public static double[] getPowerRegression(double[][] data) {

        int n = data.length;
        if (n < 2) {
            throw new IllegalArgumentException("Not enough data.");
        }

        double sumX = 0;
        double sumY = 0;
        double sumXX = 0;
        double sumXY = 0;
        for (int i = 0; i < n; i++) {
            double x = Math.log(data[i][0]);
            double y = Math.log(data[i][1]);
            sumX += x;
            sumY += y;
            double xx = x * x;
            sumXX += xx;
            double xy = x * y;
            sumXY += xy;
        }
        double sxx = sumXX - (sumX * sumX) / n;
        double sxy = sumXY - (sumX * sumY) / n;
        double xbar = sumX / n;
        double ybar = sumY / n;

        double[] result = new double[2];
        result[1] = sxy / sxx;
        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);

        return result;

    }

    /**
     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
     * the data using a power regression equation.  The result is returned as
     * an array, where double[0] --> a, and double[1] --> b.
     *
     * @param data  the data.
     * @param series  the series to fit the regression line against.
     *
     * @return The parameters.
     */
    public static double[] getPowerRegression(XYDataset data, int series) {

        int n = data.getItemCount(series);
        if (n < 2) {
            throw new IllegalArgumentException("Not enough data.");
        }

        double sumX = 0;
        double sumY = 0;
        double sumXX = 0;
        double sumXY = 0;
        for (int i = 0; i < n; i++) {
            double x = Math.log(data.getXValue(series, i));
            double y = Math.log(data.getYValue(series, i));
            sumX += x;
            sumY += y;
            double xx = x * x;
            sumXX += xx;
            double xy = x * y;
            sumXY += xy;
        }
        double sxx = sumXX - (sumX * sumX) / n;
        double sxy = sumXY - (sumX * sumY) / n;
        double xbar = sumX / n;
        double ybar = sumY / n;

        double[] result = new double[2];
        result[1] = sxy / sxx;
        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);

        return result;

    }

    /**
     * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
     * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
     * fitted to the data using a polynomial regression equation.
     * The result is returned as an array with a length of n + 2,
     * where double[0] --> a0, double[1] --> a1, .., double[n] --> an.
     * and double[n + 1] is the correlation coefficient R2
     * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
     * edition), pp. 243ff and 327ff.
     *
     * @param dataset  the dataset (null not permitted).
     * @param series  the series to fit the regression line against (the series
     *         must have at least order + 1 non-NaN items).
     * @param order  the order of the function (> 0).
     *
     * @return The parameters.
     *
     * @since 1.0.14
     */
    public static double[] getPolynomialRegression(XYDataset dataset, int series, int order) {
        if (dataset == null) {
            throw new IllegalArgumentException("Null 'dataset' argument.");
        }
        int itemCount = dataset.getItemCount(series);
        if (itemCount < order + 1) {
            throw new IllegalArgumentException("Not enough data.");
        }
        int validItems = 0;
        double[][] data = new double[2][itemCount];
        for(int item = 0; item < itemCount; item++){
            double x = dataset.getXValue(series, item);
            double y = dataset.getYValue(series, item);
            if (!Double.isNaN(x) && !Double.isNaN(y)){
                data[0][validItems] = x;
                data[1][validItems] = y;
                validItems++;
            }
        }
        if (validItems < order + 1) {
            throw new IllegalArgumentException("Not enough data.");
        }
        int equations = order + 1;
        int coefficients = order + 2;
        double[] result = new double[equations + 1];
        double[][] matrix = new double[equations][coefficients];
        double sumX = 0.0;
        double sumY = 0.0;

        for(int item = 0; item < validItems; item++){
            sumX += data[0][item];
            sumY += data[1][item];
            for(int eq = 0; eq < equations; eq++){
                for(int coe = 0; coe < coefficients - 1; coe++){
                    matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
                }
                matrix[eq][coefficients - 1] += data[1][item]
                        * Math.pow(data[0][item],eq);
            }
        }
        double[][] subMatrix = calculateSubMatrix(matrix);
        for (int eq = 1; eq < equations; eq++) {
            matrix[eq][0] = 0;
            for (int coe = 1; coe < coefficients; coe++) {
                matrix[eq][coe] = subMatrix[eq - 1][coe - 1];
            }
        }
        for (int eq = equations - 1; eq > -1; eq--) {
            double value = matrix[eq][coefficients - 1];
            for (int coe = eq; coe < coefficients -1; coe++) {
                value -= matrix[eq][coe] * result[coe];
            }
            result[eq] = value / matrix[eq][eq];
        }
        double meanY = sumY / validItems;
        double yObsSquare = 0.0;
        double yRegSquare = 0.0;
        for (int item = 0; item < validItems; item++) {
            double yCalc = 0;
            for (int eq = 0; eq < equations; eq++) {
                yCalc += result[eq] * Math.pow(data[0][item],eq);
            }
            yRegSquare += Math.pow(yCalc - meanY, 2);
            yObsSquare += Math.pow(data[1][item] - meanY, 2);
        }
        double rSquare = yRegSquare / yObsSquare;
        result[equations] = rSquare;
        return result;
    }

    /**
     * Returns a matrix with the following features: (1) the number of rows
     * and columns is 1 less than that of the original matrix; (2)the matrix
     * is triangular, i.e. all elements a (row, column) with column > row are
     * zero.  This method is used for calculating a polynomial regression.
     * 
     * @param matrix  the start matrix.
     *
     * @return The new matrix.
     */
    private static double[][] calculateSubMatrix(double[][] matrix){
        int equations = matrix.length;
        int coefficients = matrix[0].length;
        double[][] result = new double[equations - 1][coefficients - 1];
        for (int eq = 1; eq < equations; eq++) {
            double factor = matrix[0][0] / matrix[eq][0];
            for (int coe = 1; coe < coefficients; coe++) {
                result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
                        * factor;
            }
        }
        if (equations == 1) {
            return result;
        }
        // check for zero pivot element
        if (result[0][0] == 0) {
            boolean found = false;
            for (int i = 0; i < result.length; i ++) {
                if (result[i][0] != 0) {
                    found = true;
                    double[] temp = result[0];
                    for (int j = 0; j < result[i].length; j++) {
                        result[0][j] = result[i][j];
                    }
                    for (int j = 0; j < temp.length; j++) {
                        result[i][j] = temp[j];
                    }
                    break;
                }
            }
            if (!found) {
                System.out.println("Equation has no solution!");
                return new double[equations - 1][coefficients - 1];
            }
        }
        double[][] subMatrix = calculateSubMatrix(result);
        for (int eq = 1; eq < equations -  1; eq++) {
            result[eq][0] = 0;
            for (int coe = 1; coe < coefficients - 1; coe++) {
                result[eq][coe] = subMatrix[eq - 1][coe - 1];
            }
        }
        return result;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy