org.apache.commons.math3.stat.regression.MillerUpdatingRegression Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.regression;
import java.util.Arrays;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.Precision;
import org.apache.commons.math3.util.MathArrays;
/**
* This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
*
* The algorithm is described in:
* Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
* Author(s): Alan J. Miller
* Source: Journal of the Royal Statistical Society.
* Series C (Applied Statistics), Vol. 41, No. 2
* (1992), pp. 458-478
* Published by: Blackwell Publishing for the Royal Statistical Society
* Stable URL: http://www.jstor.org/stable/2347583
*
* This method for multiple regression forms the solution to the OLS problem
* by updating the QR decomposition as described by Gentleman.
*
* @since 3.0
*/
public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
/** number of variables in regression */
private final int nvars;
/** diagonals of cross products matrix */
private final double[] d;
/** the elements of the R`Y */
private final double[] rhs;
/** the off diagonal portion of the R matrix */
private final double[] r;
/** the tolerance for each of the variables */
private final double[] tol;
/** residual sum of squares for all nested regressions */
private final double[] rss;
/** order of the regressors */
private final int[] vorder;
/** scratch space for tolerance calc */
private final double[] work_tolset;
/** number of observations entered */
private long nobs = 0;
/** sum of squared errors of largest regression */
private double sserr = 0.0;
/** has rss been called? */
private boolean rss_set = false;
/** has the tolerance setting method been called */
private boolean tol_set = false;
/** flags for variables with linear dependency problems */
private final boolean[] lindep;
/** singular x values */
private final double[] x_sing;
/** workspace for singularity method */
private final double[] work_sing;
/** summation of Y variable */
private double sumy = 0.0;
/** summation of squared Y values */
private double sumsqy = 0.0;
/** boolean flag whether a regression constant is added */
private boolean hasIntercept;
/** zero tolerance */
private final double epsilon;
/**
* Set the default constructor to private access
* to prevent inadvertent instantiation
*/
@SuppressWarnings("unused")
private MillerUpdatingRegression() {
this(-1, false, Double.NaN);
}
/**
* This is the augmented constructor for the MillerUpdatingRegression class.
*
* @param numberOfVariables number of regressors to expect, not including constant
* @param includeConstant include a constant automatically
* @param errorTolerance zero tolerance, how machine zero is determined
* @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
*/
public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
throws ModelSpecificationException {
if (numberOfVariables < 1) {
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (includeConstant) {
this.nvars = numberOfVariables + 1;
} else {
this.nvars = numberOfVariables;
}
this.hasIntercept = includeConstant;
this.nobs = 0;
this.d = new double[this.nvars];
this.rhs = new double[this.nvars];
this.r = new double[this.nvars * (this.nvars - 1) / 2];
this.tol = new double[this.nvars];
this.rss = new double[this.nvars];
this.vorder = new int[this.nvars];
this.x_sing = new double[this.nvars];
this.work_sing = new double[this.nvars];
this.work_tolset = new double[this.nvars];
this.lindep = new boolean[this.nvars];
for (int i = 0; i < this.nvars; i++) {
vorder[i] = i;
}
if (errorTolerance > 0) {
this.epsilon = errorTolerance;
} else {
this.epsilon = -errorTolerance;
}
}
/**
* Primary constructor for the MillerUpdatingRegression.
*
* @param numberOfVariables maximum number of potential regressors
* @param includeConstant include a constant automatically
* @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
*/
public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
throws ModelSpecificationException {
this(numberOfVariables, includeConstant, Precision.EPSILON);
}
/**
* A getter method which determines whether a constant is included.
* @return true regression has an intercept, false no intercept
*/
public boolean hasIntercept() {
return this.hasIntercept;
}
/**
* Gets the number of observations added to the regression model.
* @return number of observations
*/
public long getN() {
return this.nobs;
}
/**
* Adds an observation to the regression model.
* @param x the array with regressor values
* @param y the value of dependent variable given these regressors
* @exception ModelSpecificationException if the length of {@code x} does not equal
* the number of independent variables in the model
*/
public void addObservation(final double[] x, final double y)
throws ModelSpecificationException {
if ((!this.hasIntercept && x.length != nvars) ||
(this.hasIntercept && x.length + 1 != nvars)) {
throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
x.length, nvars);
}
if (!this.hasIntercept) {
include(MathArrays.copyOf(x, x.length), 1.0, y);
} else {
final double[] tmp = new double[x.length + 1];
System.arraycopy(x, 0, tmp, 1, x.length);
tmp[0] = 1.0;
include(tmp, 1.0, y);
}
++nobs;
}
/**
* Adds multiple observations to the model.
* @param x observations on the regressors
* @param y observations on the regressand
* @throws ModelSpecificationException if {@code x} is not rectangular, does not match
* the length of {@code y} or does not contain sufficient data to estimate the model
*/
public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
if ((x == null) || (y == null) || (x.length != y.length)) {
throw new ModelSpecificationException(
LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
(x == null) ? 0 : x.length,
(y == null) ? 0 : y.length);
}
if (x.length == 0) { // Must be no y data either
throw new ModelSpecificationException(
LocalizedFormats.NO_DATA);
}
if (x[0].length + 1 > x.length) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
x.length, x[0].length);
}
for (int i = 0; i < x.length; i++) {
addObservation(x[i], y[i]);
}
}
/**
* The include method is where the QR decomposition occurs. This statement forms all
* intermediate data which will be used for all derivative measures.
* According to the miller paper, note that in the original implementation the x vector
* is overwritten. In this implementation, the include method is passed a copy of the
* original data vector so that there is no contamination of the data. Additionally,
* this method differs slightly from Gentleman's method, in that the assumption is
* of dense design matrices, there is some advantage in using the original gentleman algorithm
* on sparse matrices.
*
* @param x observations on the regressors
* @param wi weight of the this observation (-1,1)
* @param yi observation on the regressand
*/
private void include(final double[] x, final double wi, final double yi) {
int nextr = 0;
double w = wi;
double y = yi;
double xi;
double di;
double wxi;
double dpi;
double xk;
double _w;
this.rss_set = false;
sumy = smartAdd(yi, sumy);
sumsqy = smartAdd(sumsqy, yi * yi);
for (int i = 0; i < x.length; i++) {
if (w == 0.0) {
return;
}
xi = x[i];
if (xi == 0.0) {
nextr += nvars - i - 1;
continue;
}
di = d[i];
wxi = w * xi;
_w = w;
if (di != 0.0) {
dpi = smartAdd(di, wxi * xi);
final double tmp = wxi * xi / di;
if (FastMath.abs(tmp) > Precision.EPSILON) {
w = (di * w) / dpi;
}
} else {
dpi = wxi * xi;
w = 0.0;
}
d[i] = dpi;
for (int k = i + 1; k < nvars; k++) {
xk = x[k];
x[k] = smartAdd(xk, -xi * r[nextr]);
if (di != 0.0) {
r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
} else {
r[nextr] = xk / xi;
}
++nextr;
}
xk = y;
y = smartAdd(xk, -xi * rhs[i]);
if (di != 0.0) {
rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
} else {
rhs[i] = xk / xi;
}
}
sserr = smartAdd(sserr, w * y * y);
}
/**
* Adds to number a and b such that the contamination due to
* numerical smallness of one addend does not corrupt the sum.
* @param a - an addend
* @param b - an addend
* @return the sum of the a and b
*/
private double smartAdd(double a, double b) {
final double _a = FastMath.abs(a);
final double _b = FastMath.abs(b);
if (_a > _b) {
final double eps = _a * Precision.EPSILON;
if (_b > eps) {
return a + b;
}
return a;
} else {
final double eps = _b * Precision.EPSILON;
if (_a > eps) {
return a + b;
}
return b;
}
}
/**
* As the name suggests, clear wipes the internals and reorders everything in the
* canonical order.
*/
public void clear() {
Arrays.fill(this.d, 0.0);
Arrays.fill(this.rhs, 0.0);
Arrays.fill(this.r, 0.0);
Arrays.fill(this.tol, 0.0);
Arrays.fill(this.rss, 0.0);
Arrays.fill(this.work_tolset, 0.0);
Arrays.fill(this.work_sing, 0.0);
Arrays.fill(this.x_sing, 0.0);
Arrays.fill(this.lindep, false);
for (int i = 0; i < nvars; i++) {
this.vorder[i] = i;
}
this.nobs = 0;
this.sserr = 0.0;
this.sumy = 0.0;
this.sumsqy = 0.0;
this.rss_set = false;
this.tol_set = false;
}
/**
* This sets up tolerances for singularity testing.
*/
private void tolset() {
int pos;
double total;
final double eps = this.epsilon;
for (int i = 0; i < nvars; i++) {
this.work_tolset[i] = FastMath.sqrt(d[i]);
}
tol[0] = eps * this.work_tolset[0];
for (int col = 1; col < nvars; col++) {
pos = col - 1;
total = work_tolset[col];
for (int row = 0; row < col; row++) {
total += FastMath.abs(r[pos]) * work_tolset[row];
pos += nvars - row - 2;
}
tol[col] = eps * total;
}
tol_set = true;
}
/**
* The regcf method conducts the linear regression and extracts the
* parameter vector. Notice that the algorithm can do subset regression
* with no alteration.
*
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the estimated slope coefficients
* @throws ModelSpecificationException if {@code nreq} is less than 1
* or greater than the number of independent variables
*/
private double[] regcf(int nreq) throws ModelSpecificationException {
int nextr;
if (nreq < 1) {
throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
}
if (nreq > this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
}
if (!this.tol_set) {
tolset();
}
final double[] ret = new double[nreq];
boolean rankProblem = false;
for (int i = nreq - 1; i > -1; i--) {
if (FastMath.sqrt(d[i]) < tol[i]) {
ret[i] = 0.0;
d[i] = 0.0;
rankProblem = true;
} else {
ret[i] = rhs[i];
nextr = i * (nvars + nvars - i - 1) / 2;
for (int j = i + 1; j < nreq; j++) {
ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
++nextr;
}
}
}
if (rankProblem) {
for (int i = 0; i < nreq; i++) {
if (this.lindep[i]) {
ret[i] = Double.NaN;
}
}
}
return ret;
}
/**
* The method which checks for singularities and then eliminates the offending
* columns.
*/
private void singcheck() {
int pos;
for (int i = 0; i < nvars; i++) {
work_sing[i] = FastMath.sqrt(d[i]);
}
for (int col = 0; col < nvars; col++) {
// Set elements within R to zero if they are less than tol(col) in
// absolute value after being scaled by the square root of their row
// multiplier
final double temp = tol[col];
pos = col - 1;
for (int row = 0; row < col - 1; row++) {
if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
r[pos] = 0.0;
}
pos += nvars - row - 2;
}
// If diagonal element is near zero, set it to zero, set appropriate
// element of LINDEP, and use INCLUD to augment the projections in
// the lower rows of the orthogonalization.
lindep[col] = false;
if (work_sing[col] < temp) {
lindep[col] = true;
if (col < nvars - 1) {
Arrays.fill(x_sing, 0.0);
int _pi = col * (nvars + nvars - col - 1) / 2;
for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
x_sing[_xi] = r[_pi];
r[_pi] = 0.0;
}
final double y = rhs[col];
final double weight = d[col];
d[col] = 0.0;
rhs[col] = 0.0;
this.include(x_sing, weight, y);
} else {
sserr += d[col] * rhs[col] * rhs[col];
}
}
}
}
/**
* Calculates the sum of squared errors for the full regression
* and all subsets in the following manner:
* rss[] ={
* ResidualSumOfSquares_allNvars,
* ResidualSumOfSquares_FirstNvars-1,
* ResidualSumOfSquares_FirstNvars-2,
* ..., ResidualSumOfSquares_FirstVariable}
*/
private void ss() {
double total = sserr;
rss[nvars - 1] = sserr;
for (int i = nvars - 1; i > 0; i--) {
total += d[i] * rhs[i] * rhs[i];
rss[i - 1] = total;
}
rss_set = true;
}
/**
* Calculates the cov matrix assuming only the first nreq variables are
* included in the calculation. The returned array contains a symmetric
* matrix stored in lower triangular form. The matrix will have
* ( nreq + 1 ) * nreq / 2 elements. For illustration
* cov =
* {
* cov_00,
* cov_10, cov_11,
* cov_20, cov_21, cov22,
* ...
* }
*
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the variance covariance of the included
* regressors in lower triangular form
*/
private double[] cov(int nreq) {
if (this.nobs <= nreq) {
return null;
}
double rnk = 0.0;
for (int i = 0; i < nreq; i++) {
if (!this.lindep[i]) {
rnk += 1.0;
}
}
final double var = rss[nreq - 1] / (nobs - rnk);
final double[] rinv = new double[nreq * (nreq - 1) / 2];
inverse(rinv, nreq);
final double[] covmat = new double[nreq * (nreq + 1) / 2];
Arrays.fill(covmat, Double.NaN);
int pos2;
int pos1;
int start = 0;
double total = 0;
for (int row = 0; row < nreq; row++) {
pos2 = start;
if (!this.lindep[row]) {
for (int col = row; col < nreq; col++) {
if (!this.lindep[col]) {
pos1 = start + col - row;
if (row == col) {
total = 1.0 / d[col];
} else {
total = rinv[pos1 - 1] / d[col];
}
for (int k = col + 1; k < nreq; k++) {
if (!this.lindep[k]) {
total += rinv[pos1] * rinv[pos2] / d[k];
}
++pos1;
++pos2;
}
covmat[ (col + 1) * col / 2 + row] = total * var;
} else {
pos2 += nreq - col - 1;
}
}
}
start += nreq - row - 1;
}
return covmat;
}
/**
* This internal method calculates the inverse of the upper-triangular portion
* of the R matrix.
* @param rinv the storage for the inverse of r
* @param nreq how many of the regressors to include (either in canonical
* order, or in the current reordered state)
*/
private void inverse(double[] rinv, int nreq) {
int pos = nreq * (nreq - 1) / 2 - 1;
int pos1 = -1;
int pos2 = -1;
double total = 0.0;
Arrays.fill(rinv, Double.NaN);
for (int row = nreq - 1; row > 0; --row) {
if (!this.lindep[row]) {
final int start = (row - 1) * (nvars + nvars - row) / 2;
for (int col = nreq; col > row; --col) {
pos1 = start;
pos2 = pos;
total = 0.0;
for (int k = row; k < col - 1; k++) {
pos2 += nreq - k - 1;
if (!this.lindep[k]) {
total += -r[pos1] * rinv[pos2];
}
++pos1;
}
rinv[pos] = total - r[pos1];
--pos;
}
} else {
pos -= nreq - row;
}
}
}
/**
* In the original algorithm only the partial correlations of the regressors
* is returned to the user. In this implementation, we have
* corr =
* {
* corrxx - lower triangular
* corrxy - bottom row of the matrix
* }
* Replaces subroutines PCORR and COR of:
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
*
* Calculate partial correlations after the variables in rows
* 1, 2, ..., IN have been forced into the regression.
* If IN = 1, and the first row of R represents a constant in the
* model, then the usual simple correlations are returned.
*
* If IN = 0, the value returned in array CORMAT for the correlation
* of variables Xi & Xj is:
* sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )
*
* On return, array CORMAT contains the upper triangle of the matrix of
* partial correlations stored by rows, excluding the 1's on the diagonal.
* e.g. if IN = 2, the consecutive elements returned are:
* (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
* Array YCORR stores the partial correlations with the Y-variable
* starting with YCORR(IN+1) = partial correlation with the variable in
* position (IN+1).
*
* @param in how many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return an array with the partial correlations of the remainder of
* regressors with each other and the regressand, in lower triangular form
*/
public double[] getPartialCorrelations(int in) {
final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
int pos;
int pos1;
int pos2;
final int rms_off = -in;
final int wrk_off = -(in + 1);
final double[] rms = new double[nvars - in];
final double[] work = new double[nvars - in - 1];
double sumxx;
double sumxy;
double sumyy;
final int offXX = (nvars - in) * (nvars - in - 1) / 2;
if (in < -1 || in >= nvars) {
return null;
}
final int nvm = nvars - 1;
final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
if (d[in] > 0.0) {
rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
}
for (int col = in + 1; col < nvars; col++) {
pos = base_pos + col - 1 - in;
sumxx = d[col];
for (int row = in; row < col; row++) {
sumxx += d[row] * r[pos] * r[pos];
pos += nvars - row - 2;
}
if (sumxx > 0.0) {
rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
} else {
rms[col + rms_off] = 0.0;
}
}
sumyy = sserr;
for (int row = in; row < nvars; row++) {
sumyy += d[row] * rhs[row] * rhs[row];
}
if (sumyy > 0.0) {
sumyy = 1.0 / FastMath.sqrt(sumyy);
}
pos = 0;
for (int col1 = in; col1 < nvars; col1++) {
sumxy = 0.0;
Arrays.fill(work, 0.0);
pos1 = base_pos + col1 - in - 1;
for (int row = in; row < col1; row++) {
pos2 = pos1 + 1;
for (int col2 = col1 + 1; col2 < nvars; col2++) {
work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
pos2++;
}
sumxy += d[row] * r[pos1] * rhs[row];
pos1 += nvars - row - 2;
}
pos2 = pos1 + 1;
for (int col2 = col1 + 1; col2 < nvars; col2++) {
work[col2 + wrk_off] += d[col1] * r[pos2];
++pos2;
output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
++pos;
}
sumxy += d[col1] * rhs[col1];
output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
}
return output;
}
/**
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
* Move variable from position FROM to position TO in an
* orthogonal reduction produced by AS75.1.
*
* @param from initial position
* @param to destination
*/
private void vmove(int from, int to) {
double d1;
double d2;
double X;
double d1new;
double d2new;
double cbar;
double sbar;
double Y;
int first;
int inc;
int m1;
int m2;
int mp1;
int pos;
boolean bSkipTo40 = false;
if (from == to) {
return;
}
if (!this.rss_set) {
ss();
}
int count = 0;
if (from < to) {
first = from;
inc = 1;
count = to - from;
} else {
first = from - 1;
inc = -1;
count = from - to;
}
int m = first;
int idx = 0;
while (idx < count) {
m1 = m * (nvars + nvars - m - 1) / 2;
m2 = m1 + nvars - m - 1;
mp1 = m + 1;
d1 = d[m];
d2 = d[mp1];
// Special cases.
if (d1 > this.epsilon || d2 > this.epsilon) {
X = r[m1];
if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
X = 0.0;
}
if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
d[m] = d2;
d[mp1] = d1;
r[m1] = 0.0;
for (int col = m + 2; col < nvars; col++) {
++m1;
X = r[m1];
r[m1] = r[m2];
r[m2] = X;
++m2;
}
X = rhs[m];
rhs[m] = rhs[mp1];
rhs[mp1] = X;
bSkipTo40 = true;
//break;
} else if (d2 < this.epsilon) {
d[m] = d1 * X * X;
r[m1] = 1.0 / X;
for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
r[_i] /= X;
}
rhs[m] /= X;
bSkipTo40 = true;
//break;
}
if (!bSkipTo40) {
d1new = d2 + d1 * X * X;
cbar = d2 / d1new;
sbar = X * d1 / d1new;
d2new = d1 * cbar;
d[m] = d1new;
d[mp1] = d2new;
r[m1] = sbar;
for (int col = m + 2; col < nvars; col++) {
++m1;
Y = r[m1];
r[m1] = cbar * r[m2] + sbar * Y;
r[m2] = Y - X * r[m2];
++m2;
}
Y = rhs[m];
rhs[m] = cbar * rhs[mp1] + sbar * Y;
rhs[mp1] = Y - X * rhs[mp1];
}
}
if (m > 0) {
pos = m;
for (int row = 0; row < m; row++) {
X = r[pos];
r[pos] = r[pos - 1];
r[pos - 1] = X;
pos += nvars - row - 2;
}
}
// Adjust variable order (VORDER), the tolerances (TOL) and
// the vector of residual sums of squares (RSS).
m1 = vorder[m];
vorder[m] = vorder[mp1];
vorder[mp1] = m1;
X = tol[m];
tol[m] = tol[mp1];
tol[mp1] = X;
rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
m += inc;
++idx;
}
}
/**
* ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
*
* Re-order the variables in an orthogonal reduction produced by
* AS75.1 so that the N variables in LIST start at position POS1,
* though will not necessarily be in the same order as in LIST.
* Any variables in VORDER before position POS1 are not moved.
* Auxiliary routine called: VMOVE.
*
* This internal method reorders the regressors.
*
* @param list the regressors to move
* @param pos1 where the list will be placed
* @return -1 error, 0 everything ok
*/
private int reorderRegressors(int[] list, int pos1) {
int next;
int i;
int l;
if (list.length < 1 || list.length > nvars + 1 - pos1) {
return -1;
}
next = pos1;
i = pos1;
while (i < nvars) {
l = vorder[i];
for (int j = 0; j < list.length; j++) {
if (l == list[j] && i > next) {
this.vmove(i, next);
++next;
if (next >= list.length + pos1) {
return 0;
} else {
break;
}
}
}
++i;
}
return 0;
}
/**
* Gets the diagonal of the Hat matrix also known as the leverage matrix.
*
* @param row_data returns the diagonal of the hat matrix for this observation
* @return the diagonal element of the hatmatrix
*/
public double getDiagonalOfHatMatrix(double[] row_data) {
double[] wk = new double[this.nvars];
int pos;
double total;
if (row_data.length > nvars) {
return Double.NaN;
}
double[] xrow;
if (this.hasIntercept) {
xrow = new double[row_data.length + 1];
xrow[0] = 1.0;
System.arraycopy(row_data, 0, xrow, 1, row_data.length);
} else {
xrow = row_data;
}
double hii = 0.0;
for (int col = 0; col < xrow.length; col++) {
if (FastMath.sqrt(d[col]) < tol[col]) {
wk[col] = 0.0;
} else {
pos = col - 1;
total = xrow[col];
for (int row = 0; row < col; row++) {
total = smartAdd(total, -wk[row] * r[pos]);
pos += nvars - row - 2;
}
wk[col] = total;
hii = smartAdd(hii, (total * total) / d[col]);
}
}
return hii;
}
/**
* Gets the order of the regressors, useful if some type of reordering
* has been called. Calling regress with int[]{} args will trigger
* a reordering.
*
* @return int[] with the current order of the regressors
*/
public int[] getOrderOfRegressors(){
return MathArrays.copyOf(vorder);
}
/**
* Conducts a regression on the data in the model, using all regressors.
*
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables
*/
public RegressionResults regress() throws ModelSpecificationException {
return regress(this.nvars);
}
/**
* Conducts a regression on the data in the model, using a subset of regressors.
*
* @param numberOfRegressors many of the regressors to include (either in canonical
* order, or in the current reordered state)
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables or number of regressors requested
* is greater than the regressors in the model
*/
public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
if (this.nobs <= numberOfRegressors) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, numberOfRegressors);
}
if( numberOfRegressors > this.nvars ){
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
}
tolset();
singcheck();
double[] beta = this.regcf(numberOfRegressors);
ss();
double[] cov = this.cov(numberOfRegressors);
int rnk = 0;
for (int i = 0; i < this.lindep.length; i++) {
if (!this.lindep[i]) {
++rnk;
}
}
boolean needsReorder = false;
for (int i = 0; i < numberOfRegressors; i++) {
if (this.vorder[i] != i) {
needsReorder = true;
break;
}
}
if (!needsReorder) {
return new RegressionResults(
beta, new double[][]{cov}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
} else {
double[] betaNew = new double[beta.length];
double[] covNew = new double[cov.length];
int[] newIndices = new int[beta.length];
for (int i = 0; i < nvars; i++) {
for (int j = 0; j < numberOfRegressors; j++) {
if (this.vorder[j] == i) {
betaNew[i] = beta[ j];
newIndices[i] = j;
}
}
}
int idx1 = 0;
int idx2;
int _i;
int _j;
for (int i = 0; i < beta.length; i++) {
_i = newIndices[i];
for (int j = 0; j <= i; j++, idx1++) {
_j = newIndices[j];
if (_i > _j) {
idx2 = _i * (_i + 1) / 2 + _j;
} else {
idx2 = _j * (_j + 1) / 2 + _i;
}
covNew[idx1] = cov[idx2];
}
}
return new RegressionResults(
betaNew, new double[][]{covNew}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
}
}
/**
* Conducts a regression on the data in the model, using regressors in array
* Calling this method will change the internal order of the regressors
* and care is required in interpreting the hatmatrix.
*
* @param variablesToInclude array of variables to include in regression
* @return RegressionResults the structure holding all regression results
* @exception ModelSpecificationException - thrown if number of observations is
* less than the number of variables, the number of regressors requested
* is greater than the regressors in the model or a regressor index in
* regressor array does not exist
*/
public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
if (variablesToInclude.length > this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
}
if (this.nobs <= this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
this.nobs, this.nvars);
}
Arrays.sort(variablesToInclude);
int iExclude = 0;
for (int i = 0; i < variablesToInclude.length; i++) {
if (i >= this.nvars) {
throw new ModelSpecificationException(
LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
}
if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
variablesToInclude[i] = -1;
++iExclude;
}
}
int[] series;
if (iExclude > 0) {
int j = 0;
series = new int[variablesToInclude.length - iExclude];
for (int i = 0; i < variablesToInclude.length; i++) {
if (variablesToInclude[i] > -1) {
series[j] = variablesToInclude[i];
++j;
}
}
} else {
series = variablesToInclude;
}
reorderRegressors(series, 0);
tolset();
singcheck();
double[] beta = this.regcf(series.length);
ss();
double[] cov = this.cov(series.length);
int rnk = 0;
for (int i = 0; i < this.lindep.length; i++) {
if (!this.lindep[i]) {
++rnk;
}
}
boolean needsReorder = false;
for (int i = 0; i < this.nvars; i++) {
if (this.vorder[i] != series[i]) {
needsReorder = true;
break;
}
}
if (!needsReorder) {
return new RegressionResults(
beta, new double[][]{cov}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
} else {
double[] betaNew = new double[beta.length];
int[] newIndices = new int[beta.length];
for (int i = 0; i < series.length; i++) {
for (int j = 0; j < this.vorder.length; j++) {
if (this.vorder[j] == series[i]) {
betaNew[i] = beta[ j];
newIndices[i] = j;
}
}
}
double[] covNew = new double[cov.length];
int idx1 = 0;
int idx2;
int _i;
int _j;
for (int i = 0; i < beta.length; i++) {
_i = newIndices[i];
for (int j = 0; j <= i; j++, idx1++) {
_j = newIndices[j];
if (_i > _j) {
idx2 = _i * (_i + 1) / 2 + _j;
} else {
idx2 = _j * (_j + 1) / 2 + _i;
}
covNew[idx1] = cov[idx2];
}
}
return new RegressionResults(
betaNew, new double[][]{covNew}, true, this.nobs, rnk,
this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
}
}
}