All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.timeseries.AR Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.timeseries;

import java.io.Serializable;
import java.util.Arrays;
import smile.math.MathEx;
import smile.math.matrix.Matrix;
import smile.math.special.Beta;
import smile.stat.Hypothesis;

/**
 * Autoregressive model. The autoregressive model specifies that the output
 * variable depends linearly on its own previous values and on a stochastic
 * term; thus the model is in the form of a stochastic difference equation.
 * Together with the moving-average (MA) model, it is a special case and
 * key component of the more general autoregressive–moving-average (ARMA)
 * and autoregressive integrated moving average (ARIMA) models of time
 * series, which have a more complicated stochastic structure; it is also
 * a special case of the vector autoregressive model (VAR), which consists
 * of a system of more than one interlocking stochastic difference equation
 * in more than one evolving random variable.
 * 

* Contrary to the moving-average (MA) model, the autoregressive model * is not always stationary as it may contain a unit root. *

* The notation AR(p) indicates an autoregressive * model of order p. For an AR(p) model to be weakly stationary, * the inverses of the roots of the characteristic polynomial must be less * than 1 in modulus. *

* Two general approaches are available for determining the order p. * The first approach is to use the partial autocorrelation function, * and the second approach is to use some information criteria. *

* Autoregression is a good start point for more complicated models. * They often fit quite well (don't need the MA terms). * And the fitting process is fast (MLEs require some iterations). * In applications, easily fitting autoregressions is important * for obtaining initial values of parameters and in getting estimates of * the error process. Least squares is a popular choice, as is the * Yule-Walker procedure. Unlike the Yule-Walker procedure, least squares * can produce a non-stationary fitted model. Both the Yule-Walker and * least squares estimators are non-iterative and consistent, * so they can be used as starting values for iterative methods like MLE. * * @author Haifeng Li */ public class AR implements Serializable { private static final long serialVersionUID = 2L; /** The fitting method. */ public enum Method { /** Yule-Walker method. */ YuleWalker { @Override public String toString() { return "Yule-Walker"; } }, /** Ordinary least squares. */ OLS, } /** * The time series. */ private final double[] x; /** * The mean of time series. */ private final double mean; /** * The fitting method. */ private final Method method; /** * The order. */ private final int p; /** * The intercept. */ private final double b; /** * The linear weights of AR. */ private final double[] ar; /** * The coefficients, their standard errors, t-scores, and p-values. */ private double[][] ttest; /** * The fitted values. */ private final double[] fittedValues; /** * The residuals, that is response minus fitted values. */ private final double[] residuals; /** * Residual sum of squares. */ private double RSS; /** * Estimated variance. */ private double variance; /** * The degree-of-freedom of residual variance. */ private final int df; /** * R2. R2 is a statistic that will give some information * about the goodness of fit of a model. In regression, the R2 * coefficient of determination is a statistical measure of how well * the regression line approximates the real data points. An R2 * of 1.0 indicates that the regression line perfectly fits the data. *

* In the case of ordinary least-squares regression, R2 * increases as we increase the number of variables in the model * (R2 will not decrease). This illustrates a drawback to * one possible use of R2, where one might try to include * more variables in the model until "there is no more improvement". * This leads to the alternative approach of looking at the * adjusted R2. */ private final double R2; /** * Adjusted R2. The adjusted R2 has almost same * explanation as R2 but it penalizes the statistic as * extra variables are included in the model. */ private final double adjustedR2; /** * Constructor. * * @param x the time series * @param ar the estimated weight parameters of AR(p). * @param b the intercept. * @param method the fitting method. */ public AR(double[] x, double[] ar, double b, Method method) { this.x = x; this.p = ar.length; this.ar = ar; this.b = b; this.method = method; this.mean = MathEx.mean(x); double[] y = y(x, p); double ybar = MathEx.mean(y); int n = y.length; double TSS = 0.0; RSS = 0.0; fittedValues = new double[n]; residuals = new double[n]; for (int i = 0; i < n; i++) { double yi = forecast(x, p+i); fittedValues[i] = yi; double residual = y[i] - yi; residuals[i] = residual; RSS += MathEx.pow2(residual); TSS += MathEx.pow2(y[i] - ybar); } df = x.length - p; variance = RSS / df; R2 = 1.0 - RSS / TSS; adjustedR2 = 1.0 - ((1 - R2) * (n-1) / (n-p)); } /** Returns the least squares design matrix. */ private static Matrix X(double[] x, int p) { int n = x.length - p; Matrix X = new Matrix(n, p+1); for (int j = 0; j < p; j++) { for (int i = 0; i < n; i++) { X.set(i, j, x[i+p-j-1]); } } for (int i = 0; i < n; i++) { X.set(i, p, 1.0); } return X; } /** Returns the right-hand-side of least squares. */ private static double[] y(double[] x, int p) { return Arrays.copyOfRange(x, p, x.length); } /** * Returns the time series. * @return the time series. */ public double[] x() { return x; } /** * Returns the mean of time series. * @return the mean of time series. */ public double mean() { return mean; } /** * Returns the order of AR. * @return the order of AR. */ public int p() { return p; } /** * Returns the t-test of the coefficients (including intercept). * The first column is the coefficients, the second column is the standard * error of coefficients, the third column is the t-score of the hypothesis * test if the coefficient is zero, the fourth column is the p-values of * test. The last row is of intercept. * * @return the t-test of the coefficients. */ public double[][] ttest() { return ttest; } /** * Returns the linear coefficients of AR (without intercept). * @return the linear coefficients. */ public double[] ar() { return ar; } /** * Returns the intercept. * @return the intercept. */ public double intercept() { return b; } /** * Returns the residuals, that is response minus fitted values. * @return the residuals. */ public double[] residuals() { return residuals; } /** * Returns the fitted values. * @return the fitted values. */ public double[] fittedValues() { return fittedValues; } /** * Returns the residual sum of squares. * @return the residual sum of squares. */ public double RSS() { return RSS; } /** * Returns the residual variance. * @return the residual variance. */ public double variance() { return variance; } /** * Returns the degree-of-freedom of residual standard error. * @return the degree-of-freedom of residual standard error. */ public int df() { return df; } /** * Returns R2 statistic. In regression, the R2 * coefficient of determination is a statistical measure of how well * the regression line approximates the real data points. An R2 * of 1.0 indicates that the regression line perfectly fits the data. *

* In the case of ordinary least-squares regression, R2 * increases as we increase the number of variables in the model * (R2 will not decrease). This illustrates a drawback to * one possible use of R2, where one might try to include more * variables in the model until "there is no more improvement". This leads * to the alternative approach of looking at the adjusted R2. * * @return R2 statistic. */ public double R2() { return R2; } /** * Returns adjusted R2 statistic. The adjusted R2 * has almost same explanation as R2 but it penalizes the * statistic as extra variables are included in the model. * * @return Adjusted R2 statistic. */ public double adjustedR2() { return adjustedR2; } /** * Fits an autoregressive model with Yule-Walker procedure. * * @param x the time series. * @param p the order. * @return the model. */ public static AR fit(double[] x, int p) { if (p <= 0 || p >= x.length) { throw new IllegalArgumentException("Invalid order p = " + p); } double mean = MathEx.mean(x); double[] y = new double[p]; for (int i = 0; i < p; i++) { y[i] = TimeSeries.acf(x, i+1); } double[] r = new double[p]; r[0] = 1.0; System.arraycopy(y, 0, r, 1, p - 1); Matrix toeplitz = Matrix.toeplitz(r); Matrix.Cholesky cholesky = toeplitz.cholesky(); double[] ar = cholesky.solve(y); double mu = mean * (1.0 - MathEx.sum(ar)); AR model = new AR(x, ar, mu, Method.YuleWalker); double aracf = 0.0; for (int i = 0; i < p; i++) { aracf += ar[i] * y[i]; } model.variance = MathEx.var(x) * (x.length - 1) / (x.length - p - 1) * (1.0 - aracf); return model; } /** * Fits an autoregressive model with least squares method. * * @param x the time series. * @param p the order. * @return the model. */ public static AR ols(double[] x, int p) { return ols(x, p, true); } /** * Fits an autoregressive model with least squares method. * * @param x the time series. * @param p the order. * @param stderr the flag if estimate the standard errors of parameters. * @return the model. */ public static AR ols(double[] x, int p, boolean stderr) { if (p <= 0 || p >= x.length) { throw new IllegalArgumentException("Invalid order p = " + p); } Matrix X = X(x, p); double[] y = y(x, p); // weights and intercept Matrix.SVD svd = X.svd(true, !stderr); double[] ar = svd.solve(y); AR model = new AR(x, Arrays.copyOf(ar, p), ar[p], Method.OLS); if (stderr) { Matrix.Cholesky cholesky = X.ata().cholesky(true); Matrix inv = cholesky.inverse(); int df = model.df; double error = Math.sqrt(model.variance); double[][] ttest = new double[p][4]; model.ttest = ttest; for (int i = 0; i < p; i++) { ttest[i][0] = ar[i]; double se = error * Math.sqrt(inv.get(i, i)); ttest[i][1] = se; double t = ar[i] / se; ttest[i][2] = t; ttest[i][3] = Beta.regularizedIncompleteBetaFunction(0.5 * df, 0.5, df / (df + t * t)); } } return model; } /** * Predicts/forecasts x[offset]. * * @param x the time series. * @param offset the offset of time series to forecast. */ private double forecast(double[] x, int offset) { double y = b; for (int i = 0; i < p; i++) { y += ar[i] * x[offset - i - 1]; } return y; } /** * Returns 1-step ahead forecast. * @return 1-step ahead forecast. */ public double forecast() { return forecast(x, x.length); } /** * Returns l-step ahead forecast. * @param l the number of steps. * @return l-step ahead forecast. */ public double[] forecast(int l) { double[] x = new double[p + l]; System.arraycopy(this.x, this.x.length - p, x, 0, p); for (int i = 0; i < l; i++) { x[p + i] = forecast(x, p+i); } return Arrays.copyOfRange(x, p, x.length); } @Override public String toString() { StringBuilder builder = new StringBuilder(); builder.append(String.format("AR(%d) by %s:\n", p, method)); double[] r = residuals.clone(); builder.append("\nResiduals:\n"); builder.append(" Min 1Q Median 3Q Max\n"); builder.append(String.format("%10.4f %10.4f %10.4f %10.4f %10.4f%n", MathEx.min(r), MathEx.q1(r), MathEx.median(r), MathEx.q3(r), MathEx.max(r))); builder.append("\nCoefficients:\n"); if (ttest != null) { builder.append(" Estimate Std. Error t value Pr(>|t|)\n"); if (b != 0.0) { builder.append(String.format("Intercept %10.4f%n", b)); } for (int i = 0; i < p; i++) { builder.append(String.format("x[-%d]\t %10.4f %10.4f %10.4f %10.4f %s%n", i+1, ttest[i][0], ttest[i][1], ttest[i][2], ttest[i][3], Hypothesis.significance(ttest[i][3]))); } builder.append("---------------------------------------------------------------------\n"); builder.append("Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"); } else { if (b != 0.0) { builder.append(String.format("Intercept %10.4f%n", b)); } for (int i = 0; i < p; i++) { builder.append(String.format("x[-%d]\t %10.4f%n", i+1, ar[i])); } } builder.append(String.format("%nResidual variance: %.4f on %5d degrees of freedom%n", variance, df)); builder.append(String.format("Multiple R-squared: %.4f, Adjusted R-squared: %.4f%n", R2, adjustedR2)); return builder.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy