
hex.glm.GLMTask Maven / Gradle / Ivy
package hex.glm;
import hex.DataInfo;
import hex.DataInfo.Row;
import hex.DataInfo.Rows;
import hex.FrameTask2;
import hex.glm.GLMModel.GLMParameters;
import hex.glm.GLMModel.GLMParameters.Link;
import hex.gram.Gram;
import hex.glm.GLMModel.GLMParameters.Family;
import jsr166y.CountedCompleter;
import water.H2O.H2OCountedCompleter;
import water.*;
import water.fvec.*;
import water.util.ArrayUtils;
import java.util.Arrays;
/**
* All GLM related distributed tasks:
*
* YMUTask - computes response means on actual datasets (if some rows are ignored - e.g ignoring rows with NA and/or doing cross-validation)
* GLMGradientTask - computes gradient at given Beta, used by L-BFGS, for KKT condition check
* GLMLineSearchTask - computes residual deviance(s) at given beta(s), used by line search (both L-BFGS and IRLSM)
* GLMIterationTask - used by IRLSM to compute Gram matrix and response t(X) W X, t(X)Wz
*
* @author tomasnykodym
*/
public abstract class GLMTask {
static class YMUTask extends MRTask {
double _yMin = Double.POSITIVE_INFINITY, _yMax = Double.NEGATIVE_INFINITY;
long _nobs;
double _wsum;
final int _responseId;
final int _weightId;
final int _offsetId;
final int _nums; // number of numeric columns
final int _numOff;
final boolean _comupteWeightedSigma;
double [] _xsum; // weighted sum of x
double [] _xxsum; // weighted sum of x^2
double [] _yMu;
final int _nClasses;
public YMUTask(DataInfo dinfo, int nclasses, boolean computeWeightedSigma, H2OCountedCompleter cmp){
super(cmp);
_nums = dinfo._nums;
_numOff = dinfo._cats;
_responseId = dinfo.responseChunkId();
_weightId = dinfo._weights?dinfo.weightChunkId():-1;
_offsetId = dinfo._offset?dinfo.offsetChunkId():-1;
_nClasses = nclasses;
_comupteWeightedSigma = computeWeightedSigma;
}
@Override public void setupLocal(){}
// public double _wY; // (Weighted) sum of the response
// public double _wYY; // (Weighted) sum of the squared response
//
// public double weightedSigma() {
//// double sampleCorrection = _count/(_count-1); //sample variance -> depends on the number of ACTUAL ROWS (not the weighted count)
// double sampleCorrection = 1; //this will make the result (and R^2) invariant to globally scaling the weights
// return _count <= 1 ? 0 : Math.sqrt(sampleCorrection*(_wYY/_wcount - (_wY*_wY)/(_wcount*_wcount)));
// }
@Override public void map(Chunk [] chunks) {
_yMu = new double[_nClasses > 2?_nClasses:1];
boolean [] good = MemoryManager.mallocZ(chunks[0]._len);
Arrays.fill(good,true);
Chunk weight = chunks[_weightId];
for(int i = 0; i < chunks.length; ++i) {
for (int r = chunks[i].nextNZ(-1); r < chunks[i]._len; r = chunks[i].nextNZ(r)) {
if (weight.atd(r) != 0 && chunks[i].isNA(r))
good[r] = false;
}
}
Chunk response = chunks[_responseId];
if(_comupteWeightedSigma) {
_xsum = MemoryManager.malloc8d(_nums);
_xxsum = MemoryManager.malloc8d(_nums);
}
for(int r = 0; r < response._len; ++r) {
double w = weight.atd(r);
if(!good[r] || w == 0) continue;
if(_comupteWeightedSigma) {
for(int i = 0; i < _nums; ++i) {
double d = chunks[i+_numOff].atd(r);
_xsum[i] += w*d;
_xxsum[i] += w*d*d;
}
}
_wsum += w;
double d = w*response.atd(r);
assert !Double.isNaN(d);
if(_nClasses > 2)
_yMu[(int)d] += 1;
else
_yMu[0] += d;
if(d < _yMin)
_yMin = d;
if(d > _yMax)
_yMax = d;
_nobs++;
}
boolean all_good = true;
for(boolean b:good)all_good &= b;
if(!all_good) {
if (weight instanceof C0DChunk && weight.atd(0) == 1) // shortcut for h2o-made binary weights
DKV.put(weight.vec().chunkKey(chunks[0].cidx()), new CBSChunk(good));
else {
for(int i = 0; i < good.length; ++i) // already got weights, need to set the zeros
if(!good[i]) weight.set(i,0);
}
}
}
@Override public void postGlobal() {
ArrayUtils.mult(_yMu,1.0/_wsum);
Futures fs = new Futures();
// _fVec.postWrite(fs); // we just overwrote the vec
fs.blockForPending();
}
@Override public void reduce(YMUTask ymt) {
if(_nobs > 0 && ymt._nobs > 0) {
_wsum += ymt._wsum;
ArrayUtils.add(_yMu,ymt._yMu);
_nobs += ymt._nobs;
if(_yMin > ymt._yMin)
_yMin = ymt._yMin;
if(_yMax < ymt._yMax)
_yMax = ymt._yMax;
if(_comupteWeightedSigma) {
ArrayUtils.add(_xsum, ymt._xsum);
ArrayUtils.add(_xxsum, ymt._xxsum);
}
} else if (_nobs == 0) {
_wsum = ymt._wsum;
_yMu = ymt._yMu;
_nobs = ymt._nobs;
_yMin = ymt._yMin;
_yMax = ymt._yMax;
_xsum = ymt._xsum;
_xxsum = ymt._xxsum;
}
}
}
static class GLMLineSearchTask extends MRTask {
final DataInfo _dinfo;
final double [] _beta;
final double [][] _betaMultinomial;
final int _c;
final double [] _direction;
final double _step;
final double _initStep;
final int _nSteps;
final GLMParameters _params;
boolean _useFasterMetrics = false;
public GLMLineSearchTask(DataInfo dinfo, GLMParameters params, double [] beta, double [] direction, double initStep, double step, int nsteps, CountedCompleter cc) {
super ((H2OCountedCompleter)cc);
_dinfo = dinfo;
_beta = beta;
_betaMultinomial = null;
_direction = direction;
_step = step;
_nSteps = nsteps;
_params = params;
_initStep = initStep;
_c = -1;
}
long _nobs;
double [] _likelihoods; // result
@Override
public void map(Chunk [] chks) {
Chunk responseChunk = chks[_dinfo.responseChunkId()];
boolean[] skip = MemoryManager.mallocZ(chks[0]._len);
double [][] eta = new double[responseChunk._len][_nSteps];
if(_dinfo._offset) {
Chunk offsetChunk = chks[_dinfo.offsetChunkId()];
for (int r = 0; r < eta.length; ++r)
Arrays.fill(eta[r], offsetChunk.atd(r));
}
Chunk weightsChunk = _dinfo._weights?chks[_dinfo.weightChunkId()]:new C0DChunk(1,chks[0]._len);
double [] beta = _beta;
double [] pk = _direction;
// intercept
for (int r = 0; r < eta.length; ++r) {
double b = beta[beta.length - 1];
double t = pk[beta.length - 1] * _initStep;
for (int j = 0; j < _nSteps; ++j, t *= _step) {
eta[r][j] += b + t;
}
}
// categoricals
for(int i = 0; i < _dinfo._cats; ++i) {
Chunk c = chks[i];
for(int r = 0; r < c._len; ++r) { // categoricals can not be sparse
if(skip[r] || c.isNA(r)) {
skip[r] = true;
continue;
}
int off = _dinfo.getCategoricalId(i,(int)c.at8(r)); // get pos in beta vector.
if(off != -1) {
double t = pk[off] * _initStep;
double b = beta[off];
for (int j = 0; j < _nSteps; ++j, t *= _step)
eta[r][j] += b + t;
}
}
}
// compute default eta offset for 0s
final int numStart = _dinfo.numStart();
double [] off = new double[_nSteps];
if(_dinfo._normMul != null && _dinfo._normSub != null) {
for (int i = 0; i < _dinfo._nums; ++i) {
double b = beta[numStart+i];
double s = pk[numStart+i] * _initStep;
double d = _dinfo._normSub[i] * _dinfo._normMul[i];
for (int j = 0; j < _nSteps; ++j, s *= _step)
off[j] -= (b + s) * d;
}
}
// non-zero numbers
for (int i = 0; i < _dinfo._nums; ++i) {
Chunk c = chks[i + _dinfo._cats];
for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
if(skip[r] || c.isNA(r)) {
skip[r] = true;
continue;
}
double d = c.atd(r);
if(d == 0) continue;
if (_dinfo._normMul != null)
d *= _dinfo._normMul[i];
double b = beta[numStart+i];
double s = pk[numStart+i] * _initStep;
for (int j = 0; j < _nSteps; ++j, s *= _step)
eta[r][j] += (b + s) * d;
}
}
_likelihoods = MemoryManager.malloc8d(_nSteps);
for (int r = 0; r < chks[0]._len; ++r) {
double w = weightsChunk.atd(r);
if(w == 0 || responseChunk.isNA(r))
continue;
_nobs++;
double y = responseChunk.atd(r);
double yy = -1 + 2*y;
for (int i = 0; i < _nSteps; ++i) {
double e = eta[r][i] + off[i];
if (_params._family == Family.binomial && _useFasterMetrics) {
_likelihoods[i] += w*Math.log(1 + Math.exp(-yy * e));
} else {
double mu = _params.linkInv(e);
_likelihoods[i] += w*_params.likelihood(y,mu);
}
}
}
}
@Override public void reduce(GLMLineSearchTask glt){
ArrayUtils.add(_likelihoods,glt._likelihoods);
_nobs += glt._nobs;
}
}
static class GLMMultinomialLineSearchTask extends MRTask {
private final DataInfo _dinfo;
public final double _initialStep;
public final double _stepDec;
public final int _nSteps;
private double [][] _betaBase;
private double [][] _direction2D;
private double [] _direction1D;
final int _c;
public long _nobs;
// output
double [] _likelihoods;
static double[][] reshapeAry(double[] ary, int n) {
int d = ary.length/n;
if(d*n != ary.length)
throw new IllegalArgumentException("Array length is not multiple of n");;
double [][] res = new double[d][n];
int off = 0;
for(int i = 0; i < d; ++i)
for(int j = 0; j < n; ++j)
res[i][j] = ary[off++];
return res;
}
GLMMultinomialLineSearchTask(H2OCountedCompleter cc, DataInfo dinfo, double [] beta, double [] direction, double initialStep, double stepDec, int nSteps) {
this(cc,dinfo,reshapeAry(beta,dinfo.fullN()+1),reshapeAry(direction,dinfo.fullN()+1),initialStep, stepDec, nSteps);
}
GLMMultinomialLineSearchTask(H2OCountedCompleter cc, DataInfo dinfo, double [][] beta, double [] direction, int c, double initialStep, double stepDec, int nSteps) {
super(cc);
_dinfo = dinfo;
_betaBase = beta;
_direction1D = direction;
_direction2D = null;
_c = c;
_initialStep = initialStep;
_stepDec = stepDec;
_nSteps = nSteps;
}
GLMMultinomialLineSearchTask(H2OCountedCompleter cc, DataInfo dinfo, double [][] beta, double [][] direction, double initialStep, double stepDec, int nSteps) {
super(cc);
_dinfo = dinfo;
_betaBase = beta;
_direction2D = direction;
_direction1D = null;
_initialStep = initialStep;
_stepDec = stepDec;
_nSteps = nSteps;
_c = -1;
}
public void map(Chunk [] chks) {
double t = _initialStep;
_nobs = 0;
double [][] beta = _betaBase.clone();
for(int i = 0; i < beta.length; ++i)
beta[i] = beta[i].clone();
Rows rows = _dinfo.rows(chks);
double [] etas = new double[_betaBase.length];
double [] etaOffsets = new double [etas.length];
double [] exps = new double[_betaBase.length+1];
double [] likelihoods = new double[_nSteps];
for(int i = 0; i < _nSteps; i++) {
for(int j = 0; j < _betaBase.length; ++j) {
double [] base = _betaBase[j];
double [] b = beta[j];
if(_direction2D != null) {
double [] d = _direction2D[j];
for(int k = 0; k < base.length; ++k)
b[k] = base[k] + t*d[k];
} else if(j == _c){
for(int k = 0; k < base.length; ++k)
b[k] = base[k] + t*_direction1D[k];
}
if(rows._sparse)
etaOffsets[j] = GLM.sparseOffset(b,_dinfo);
}
for(int j = 0; j < rows._nrows; ++j) {
Row row = rows.row(j);
if(i == 0)++_nobs;
double logSumExp = computeMultinomialEtas(row,beta,etas,etaOffsets,exps);
likelihoods[i] -= row.weight * (etas[(int)row.response(0)] - logSumExp);
}
t *= _stepDec;
}
_likelihoods = likelihoods;
_betaBase = null;
_direction1D = null;
_direction2D = null;
}
public void reduce(GLMMultinomialLineSearchTask glmt) {
ArrayUtils.add(_likelihoods,glmt._likelihoods);
_nobs += glmt._nobs;
}
}
static double computeMultinomialEtas(Row row, double [][]beta, final double [] etas, double [] etaOffsets, double [] exps) {
double maxRow = 0;
for (int c = 0; c < beta.length; ++c) {
double e = etaOffsets[c] + row.innerProduct(beta[c]);
if (e > maxRow) maxRow = e;
etas[c] = e;
}
double sumExp = 0;
for(int c = 0; c < beta.length; ++c) {
double x = Math.exp(etas[c] - maxRow);
sumExp += x;
exps[c+1] = x;
}
double reg = 1.0/(sumExp);
for(int c = 0; c < etas.length; ++c)
exps[c+1] *= reg;
exps[0] = 0;
exps[0] = ArrayUtils.maxIndex(exps)-1;
return Math.log(sumExp) + maxRow;
}
static class GLMMultinomialGradientTask extends MRTask {
final double [][] _beta;
final DataInfo _dinfo;
final double _currentLambda;
final double _reg;
final double [] _ymu;
double [] _gradient;
long _nobs;
double _wsum;
double _likelihood;
GLMValidation _val;
final boolean _validate;
public GLMMultinomialGradientTask(DataInfo dinfo, double lambda, double [] ymu, double[][] beta, double reg,boolean validate, H2OCountedCompleter cmp) {
super(cmp);
_dinfo = dinfo;
_currentLambda = lambda;
_reg = reg;
_validate = validate;
_beta = beta;
_ymu = ymu;
}
private final void processRow(Row row, double [][] beta, final double [] etas, double [] etaOffsets, final double [] exps) {
int y = (int)row.response(0);
assert y == row.response(0);
double logSumExp = computeMultinomialEtas(row, beta, etas, etaOffsets, exps);
int P = _dinfo.fullN()+1;
_likelihood -= row.weight * (etas[(int)row.response(0)] - logSumExp);
_val.add(y,exps, row.weight,row.offset);
int numOff = _dinfo.numStart();
for(int c = 0; c < beta.length; ++c) {
double val = row.weight * (exps[c+1] - (y == c?1:0));
for (int j = 0; j < row.nBins; ++j)
_gradient[c*P + row.binIds[j]] += val;
for (int j = 0; j < row.nNums; ++j)
_gradient[c*P + (row.numIds == null ? j + numOff : row.numIds[j])] += row.numVals[j] * val;
_gradient[(c+1) * P - 1] += val;
}
}
public void map(Chunk [] chks) {
int rank = 0;
for(int i = 0; i < _beta.length; ++i)
for(int j = 0; j < _beta[i].length; ++j)
if(_beta[i][j] != 0)
++rank;
_gradient = new double[_beta.length*_beta[0].length];
_val = new GLMValidation(_dinfo._adaptedFrame.lastVec().domain(), _ymu, new GLMParameters(Family.multinomial),rank,0,_validate, true);
final int P = _beta[0].length;
double [] etas = new double[_beta.length];
double [] exps = new double[_beta.length+1];
double [] etaOffsets = new double[_beta.length] ;
Rows rows = _dinfo.rows(chks);
if(rows._sparse)
for(int i = 0; i < _beta.length; ++i)
etaOffsets[i] = GLM.sparseOffset(_beta[i],_dinfo);
for(int r = 0; r < rows._nrows; ++r) {
final Row row = rows.row(r);
if(row.bad || row.weight == 0) continue;
_wsum += row.weight;
_nobs++;
processRow(row, _beta, etas, etaOffsets, exps);
}
if (rows._sparse && _dinfo._normSub != null) { // adjust for centering
int off = _dinfo.numStart();
for(int c = 0; c < _beta.length; ++c) {
double val = _gradient[(c+1)*P-1];
for (int i = 0; i < _dinfo._nums; ++i)
_gradient[c * P + off + i] -= val * _dinfo._normSub[i] * _dinfo._normMul[i];
}
}
}
@Override
public void reduce(GLMMultinomialGradientTask gmgt){
ArrayUtils.add(_gradient,gmgt._gradient);
_nobs += gmgt._nobs;
_wsum += gmgt._wsum;
_likelihood += gmgt._likelihood;
_val.reduce(gmgt._val);
}
@Override public void postGlobal(){
ArrayUtils.mult(_gradient, _reg);
int P = _beta[0].length;
for(int c = 0; c < _beta.length; ++c)
for(int j = 0; j < P-1; ++j)
_gradient[c*P+j] += _currentLambda * _beta[c][j];
}
}
static class GLMGradientTask extends MRTask {
final GLMParameters _params;
GLMValidation _val;
double _currentLambda;
final double [] _beta;
final protected DataInfo _dinfo;
final double _reg;
public double [] _gradient;
public double _likelihood;
protected transient boolean [] _skip;
boolean _validate;
long _nobs;
double _wsum;
double _ymu;
final boolean _intercept;
public GLMGradientTask(DataInfo dinfo, GLMParameters params, double lambda, double[] beta, double reg,boolean intercept){this(dinfo,params, lambda, beta,reg, intercept, null);}
public GLMGradientTask(DataInfo dinfo, GLMParameters params, double lambda, double[] beta, double reg, boolean intercept, H2OCountedCompleter cc){
super(cc);
_dinfo = dinfo;
_params = params;
_beta = beta;
_reg = reg;
_currentLambda = lambda;
_intercept = intercept;
}
public GLMGradientTask setValidate(double ymu, boolean validate) {
_ymu = ymu;
_validate = validate;
return this;
}
protected void goByRows(Chunk [] chks, boolean [] skp){
Row row = _dinfo.newDenseRow();
double [] g = _gradient;
double [] b = _beta;
for(int rid = 0; rid < chks[0]._len; ++rid) {
if(skp[rid]) continue;
row = _dinfo.extractDenseRow(chks, rid, row);
if(row.weight == 0 || row.bad) continue;
_nobs++;
_wsum += row.weight;
double eta = row.innerProduct(b) + row.offset;
double mu = _params.linkInv(eta);
_val.add(row.response(0), mu, row.weight, row.offset);
_likelihood += row.weight*_params.likelihood(row.response(0), mu);
double var = _params.variance(mu);
if(var < 1e-6) var = 1e-6; // to avoid numerical problems with 0 variance
double gval =row.weight * (mu-row.response(0)) / (var * _params.linkDeriv(mu));
// categoricals
for(int i = 0; i < row.nBins; ++i)
g[row.binIds[i]] += gval;
int off = _dinfo.numStart();
// numbers
for(int j = 0; j < _dinfo._nums; ++j)
g[j + off] += row.numVals[j] * gval;
// intercept
if(_dinfo._intercept)
g[g.length-1] += gval;
}
}
@Override
public void postGlobal(){
ArrayUtils.mult(_gradient,_reg);
for(int j = 0; j < _beta.length - (_dinfo._intercept?1:0); ++j)
_gradient[j] += _currentLambda * _beta[j];
}
// compute linear estimate by summing contributions for all columns
// (looping by column in the outer loop to have good access pattern and to exploit sparsity)
protected final double [] computeEtaByCols(Chunk [] chks, boolean [] skip) {
double [] eta = MemoryManager.malloc8d(chks[0]._len);
if(_dinfo._intercept)
Arrays.fill(eta,_beta[_beta.length-1]);
if(_dinfo._offset) {
for (int i = 0; i < eta.length; ++i) {
if(!skip[i]) {
eta[i] += chks[_dinfo.offsetChunkId()].atd(i);
if (Double.isNaN(eta[i]))
skip[i] = true;
}
}
}
double [] b = _beta;
// do categoricals first
for(int i = 0; i < _dinfo._cats; ++i) {
Chunk c = chks[i];
for(int r = 0; r < c._len; ++r) { // categoricals can not be sparse
if(skip[r] || c.isNA(r)) {
skip[r] = true;
continue;
}
int off = _dinfo.getCategoricalId(i,(int)c.at8(r));
if(off != -1)
eta[r] += b[off];
}
}
final int numStart = _dinfo.numStart();
// compute default eta offset for 0s
if(_dinfo._normMul != null && _dinfo._normSub != null) {
double off = 0;
for (int i = 0; i < _dinfo._nums; ++i)
off -= b[numStart + i] * _dinfo._normSub[i] * _dinfo._normMul[i];
for(int r = 0; r < chks[0]._len; ++r)
eta[r] += off;
}
// now numerics
for (int i = 0; i < _dinfo._nums; ++i) {
Chunk c = chks[i + _dinfo._cats];
for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
if(skip[r] || c.isNA(r)) {
skip[r] = true;
continue;
}
double d = c.atd(r);
if (_dinfo._normMul != null)
d *= _dinfo._normMul[i];
eta[r] += b[numStart + i] * d;
}
}
return eta;
}
protected void goByCols(Chunk [] chks, boolean [] skp){
int numStart = _dinfo.numStart();
double [] eta = computeEtaByCols(chks, skp);
double [] b = _beta;
double [] g = _gradient;
Chunk offsetChunk = _dinfo._offset?chks[_dinfo.offsetChunkId()]:new C0DChunk(0,chks[0]._len);
Chunk weightChunk = _dinfo._weights ?chks[_dinfo.weightChunkId()]:new C0DChunk(1,chks[0]._len);
Chunk responseChunk = chks[_dinfo.responseChunkId()];
double eta_sum = 0;
// compute the predicted mean and variance and ginfo for each row
for(int r = 0; r < chks[0]._len; ++r){
if(skp[r] || responseChunk.isNA(r))
continue;
double w = weightChunk.atd(r);
if(w == 0 || Double.isNaN(w))
continue;
_nobs++;
_wsum += w;
assert w > 0;
double y = responseChunk.atd(r);
double mu = _params.linkInv(eta[r]);
_val.add(y, mu, w, offsetChunk.atd(r));
_likelihood += w * _params.likelihood(y, mu);
double var = _params.variance(mu);
if(var < 1e-6) var = 1e-6; // to avoid numerical problems with 0 variance
eta[r] = w * (mu-y) / (var * _params.linkDeriv(mu));
eta_sum += eta[r];
}
// finally go over the columns again and compute ginfo for each column
// first handle eta offset and intercept
if(_dinfo._intercept)
g[g.length-1] = eta_sum;
if(_dinfo._normMul != null && _dinfo._normSub != null)
for(int i = 0; i < _dinfo._nums; ++i)
g[numStart + i] = -_dinfo._normSub[i]*_dinfo._normMul[i]*eta_sum;
// categoricals
for(int i = 0; i < _dinfo._cats; ++i) {
Chunk c = chks[i];
for(int r = 0; r < c._len; ++r) { // categoricals can not be sparse
if(skp[r]) continue;
int off = _dinfo.getCategoricalId(i,(int)chks[i].at8(r));
if(off != -1)
g[off] += eta[r];
}
}
// numerics
for (int i = 0; i < _dinfo._nums; ++i) {
Chunk c = chks[i + _dinfo._cats];
for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
if(skp[r] || c.isNA(r))
continue;
double d = c.atd(r);
if (_dinfo._normMul != null)
d = d*_dinfo._normMul[i];
g[numStart + i] += eta[r] * d;
}
}
_skip = skp;
}
private boolean mostlySparse(Chunk [] chks){
int cnt = 0;
for(Chunk chk:chks)
if(chk.isSparse())
++cnt;
return cnt >= chks.length >> 1;
}
private boolean _forceRows;
private boolean _forceCols;
public GLMGradientTask forceColAccess() {
_forceCols = true;
_forceRows = false;
return this;
}
public GLMGradientTask forceRowAccess() {
_forceCols = false;
_forceRows = true;
return this;
}
public void map(Chunk [] chks){
int rank = 0;
for(int i = 0; i < _beta.length; ++i)
if(_beta[i] != 0)
++rank;
_gradient = MemoryManager.malloc8d(_beta.length);
String [] domain = _dinfo._adaptedFrame.lastVec().domain();
if(domain == null && _params._family == Family.binomial)
domain = new String[]{"0","1"}; // special hard-coded case for binomial on binary col
_val = new GLMValidation(domain, new double[]{_ymu}, _params,rank,0,_validate,_intercept);
boolean [] skp = MemoryManager.mallocZ(chks[0]._len);
if(_forceCols || (!_forceRows && (chks.length >= 100 || mostlySparse(chks))))
goByCols(chks, skp);
else
goByRows(chks, skp);
// apply reg
}
public void reduce(GLMGradientTask grt) {
_likelihood += grt._likelihood;
_nobs += grt._nobs;
_wsum += grt._wsum;
_val.reduce(grt._val);
ArrayUtils.add(_gradient, grt._gradient);
}
}
/**
* Tassk with simplified ginfo computation for logistic regression (and least squares)
* Looks like
*/
public static class LBFGS_LogisticGradientTask extends GLMGradientTask {
public LBFGS_LogisticGradientTask(DataInfo dinfo, GLMParameters params, double lambda, double[] beta, double reg, boolean intercept) {
super(dinfo, params, lambda, beta, reg, intercept);
}
@Override protected void goByRows(Chunk [] chks, boolean [] skp){
Row row = _dinfo.newDenseRow();
double [] g = _gradient;
double [] b = _beta;
for(int rid = 0; rid < chks[0]._len; ++rid) {
if(skp[rid])continue;
row = _dinfo.extractDenseRow(chks, rid, row);
if(row.bad || row.weight == 0) continue;
double y = -1 + 2*row.response(0);
++_nobs;
double eta = row.innerProduct(b) + row.offset;
double gval;
double d = 1 + Math.exp(-y * eta);
_likelihood += row.weight*Math.log(d);
gval = row.weight*-y*(1-1.0/d);
// categoricals
for(int i = 0; i < row.nBins; ++i)
g[row.binIds[i]] += gval;
int off = _dinfo.numStart();
// numbers
for(int j = 0; j < _dinfo._nums; ++j)
g[j + off] += row.numVals[j] * gval;
// intercept
if(_dinfo._intercept)
g[g.length-1] += gval;
}
}
@Override protected void goByCols(Chunk [] chks, boolean [] skp){
int numStart = _dinfo.numStart();
double [] eta = computeEtaByCols(chks,skp);
double [] g = _gradient;
Chunk offsetChunk = null;
int nxs = chks.length-1; // -1 for response
if(_dinfo._offset) {
nxs -= 1;
offsetChunk = chks[nxs];
}
Chunk responseChunk = chks[nxs];
Chunk weightsChunk = _dinfo._weights?chks[_dinfo.weightChunkId()]:new C0DChunk(1,chks[0]._len);
double eta_sum = 0;
// compute the predicted mean and variance and ginfo for each row
for(int r = 0; r < chks[0]._len; ++r){
double w = weightsChunk.atd(r);
if(skp[r] || responseChunk.isNA(r) || w == 0)
continue;
++_nobs;
double off = (_dinfo._offset?offsetChunk.atd(r):0);
double e = eta[r] + off;
switch(_params._family) {
case gaussian:
double diff = e - responseChunk.atd(r);
_likelihood += w*diff*diff;
eta[r] = diff;
break;
case binomial:
double y = -1 + 2*responseChunk.atd(r);
double d = 1 + Math.exp(-y * e);
_likelihood += w*Math.log(d);
eta[r] = w * -y * (1 - 1.0 / d);
break;
default:
throw H2O.unimpl();
}
eta_sum += eta[r];
}
// finally go over the columns again and compute ginfo for each column
// first handle eta offset and intercept
if(_dinfo._intercept)
g[g.length-1] = eta_sum;
if(_dinfo._normMul != null && _dinfo._normSub != null)
for(int i = 0; i < _dinfo._nums; ++i)
g[numStart + i] = -_dinfo._normSub[i]*_dinfo._normMul[i]*eta_sum;
// categoricals
for(int i = 0; i < _dinfo._cats; ++i) {
Chunk c = chks[i];
for(int r = 0; r < c._len; ++r) { // categoricals can not be sparse
if(skp[r]) continue;
int off = _dinfo.getCategoricalId(i,(int)chks[i].at8(r));
if(off != -1)
g[off] += eta[r];
}
}
// numerics
for (int i = 0; i < _dinfo._nums; ++i) {
Chunk c = chks[i + _dinfo._cats]; //not expanded
for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
if(skp[r] || c.isNA(r))
continue;
double d = c.atd(r);
if (_dinfo._normMul != null)
d = d*_dinfo._normMul[i];
g[numStart + i] += eta[r] * d;
}
}
_skip = skp;
}
}
// public static class GLMCoordinateDescentTask extends MRTask {
// final double [] _betaUpdate;
// final double [] _beta;
// final double _xOldSub;
// final double _xOldMul;
// final double _xNewSub;
// final double _xNewMul;
//
// double [] _xy;
//
// public GLMCoordinateDescentTask(double [] betaUpdate, double [] beta, double xOldSub, double xOldMul, double xNewSub, double xNewMul) {
// _betaUpdate = betaUpdate;
// _beta = beta;
// _xOldSub = xOldSub;
// _xOldMul = xOldMul;
// _xNewSub = xNewSub;
// _xNewMul = xNewMul;
// }
//
// public void map(Chunk [] chks) {
// Chunk xOld = chks[0];
// Chunk xNew = chks[1];
// if(xNew.vec().isCategorical()){
// _xy = MemoryManager.malloc8d(xNew.vec().domain().length);
// } else
// _xy = new double[1];
// Chunk eta = chks[2];
// Chunk weights = chks[3];
// Chunk res = chks[4];
// for(int i = 0; i < eta._len; ++i) {
// double w = weights.atd(i);
// double e = eta.atd(i);
// if(_betaUpdate != null) {
// if (xOld.vec().isCategorical()) {
// int cid = (int) xOld.at8(i);
// e = +_betaUpdate[cid];
// } else
// e += _betaUpdate[0] * (xOld.atd(i) - _xOldSub) * _xOldMul;
// eta.set(i, e);
// }
// int cid = 0;
// double x = w;
// if(xNew.vec().isCategorical()) {
// cid = (int) xNew.at8(i);
// e -= _beta[cid];
// } else {
// x = (xNew.atd(i) - _xNewSub) * _xNewMul;
// e -= _beta[0] * x;
// x *= w;
// }
// _xy[cid] += x * (res.atd(i) - e);
// }
// }
// @Override public void reduce(GLMCoordinateDescentTask t) {
// ArrayUtils.add(_xy, t._xy);
// }
// }
// /**
// * Compute initial solution for multinomial problem (Simple weighted LR with all weights = 1/4)
// */
// public static final class GLMMultinomialInitTsk extends MRTask {
// double [] _mu;
// DataInfo _dinfo;
// Gram _gram;
// double [][] _xy;
//
// @Override public void map(Chunk [] chks) {
// Rows rows = _dinfo.rows(chks);
// _gram = new Gram(_dinfo);
// _xy = new double[_mu.length][_dinfo.fullN()+1];
// int numStart = _dinfo.numStart();
// double [] ds = new double[_mu.length];
// for(int i = 0; i < ds.length; ++i)
// ds[i] = 1.0/(_mu[i] * (1-_mu[i]));
// for(int i = 0; i < rows._nrows; ++i) {
// Row r = rows.row(i);
// double y = r.response(0);
// _gram.addRow(r,.25);
// for(int c = 0; c < _mu.length; ++c) {
// double iY = y == c?1:0;
// double z = (y-_mu[c]) * ds[i];
// for(int j = 0; j < r.nBins; ++j)
// _xy[c][r.binIds[j]] += z;
// for(int j = 0; j < r.nNums; ++j){
// int id = r.numIds == null?(j + numStart):r.numIds[j];
// double val = r.numVals[j];
// _xy[c][id] += z*val;
// }
// }
// }
// }
// @Override public void reduce(){
//
// }
// }
/**
* One iteration of glm, computes weighted gram matrix and t(x)*y vector and t(y)*y scalar.
*
* @author tomasnykodym
*/
public static class GLMIterationTask extends FrameTask2 {
final GLMParameters _params;
final double [][]_beta_multinomial;
final double []_beta;
final int _c;
protected Gram _gram; // wx%*%x
double [] _xy; // wx^t%*%z,
GLMValidation _val; // validation of previous model
final double [] _ymu;
long _nobs;
final boolean _validate;
int [] _ti;
public double _likelihood;
final double _lambda;
double wsum, wsumu;
final boolean _intercept;
public GLMIterationTask(Key jobKey, DataInfo dinfo, double lambda, GLMModel.GLMParameters glm, boolean validate,
double [][] betaMultinomial, double [] beta, int c, double [] ymu, boolean intercept, H2OCountedCompleter cmp) {
super(cmp,dinfo,jobKey);
_params = glm;
_beta = beta;
_beta_multinomial = betaMultinomial.clone();
_beta_multinomial[c] = beta;
_c = c;
_ymu = ymu;
_validate = validate;
_lambda = lambda;
_intercept = intercept;
}
public GLMIterationTask(Key jobKey, DataInfo dinfo, double lambda, GLMModel.GLMParameters glm, boolean validate,
double [] beta, double ymu,boolean intercept, H2OCountedCompleter cmp) {
super(cmp,dinfo,jobKey);
_params = glm;
_beta = beta;
_beta_multinomial = null;
_c = -1;
_ymu = new double[]{ymu};
_validate = validate;
_lambda = lambda;
_intercept = intercept;
}
@Override public boolean handlesSparseData(){return true;}
transient private double [] _etas;
transient private double [] _sparseOffsets;
transient private double _sparseOffset;
@Override
public void chunkInit() {
// initialize
_gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo._nums, _dinfo._cats,true);
if(_params._family == Family.multinomial)
_etas = new double[_beta_multinomial.length];
// public GLMValidation(Key dataKey, double ymu, GLMParameters glm, int rank, float [] thresholds){
if(_validate) {
int rank = 0;
if(_beta != null) for(double d:_beta) if(d != 0)++rank;
String [] domain = _dinfo._adaptedFrame.lastVec().domain();
if(domain == null && _params._family == Family.binomial)
domain = new String[]{"0","1"}; // special hard-coded case for binomial on binary col
_val = new GLMValidation(domain, _ymu, _params, rank, .5, true,_intercept); // todo pass correct threshold
}
_xy = MemoryManager.malloc8d(_dinfo.fullN()+1); // + 1 is for intercept
if(_params._family == Family.binomial && _validate){
_ti = new int[2];
}
if(_params._family == Family.multinomial) {
_sparseOffsets = new double[_beta_multinomial.length];
if(_sparse)
for (int i = 0; i < _beta_multinomial.length; ++i)
_sparseOffsets[i] = GLM.sparseOffset(_beta_multinomial[i],_dinfo);
} else if(_sparse)
_sparseOffset = GLM.sparseOffset(_beta,_dinfo);
}
@Override
protected void processRow(Row r) { // called for every row in the chunk
if(r.bad || r.weight == 0) return;
++_nobs;
double y = r.response(0);
assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0 for family=Gamma.";
assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1> for family=Binomial. got " + y;
final double w;
final double eta;
double mu;
final double var;
final double wz;
final int numStart = _dinfo.numStart();
double d = 1;
if( _params._family == Family.gaussian && _params._link == Link.identity){
w = r.weight;
wz = w*(y - r.offset);
mu = 0;
eta = mu;
} else {
if(_params._family == Family.multinomial) {
y = (y == _c)?1:0;
double maxrow = 0;
for(int i = 0; i < _beta_multinomial.length; ++i) {
_etas[i] = r.innerProduct(_beta_multinomial[i]) + _sparseOffsets[i];
if(_etas[i] > maxrow /*|| -_etas[i] > maxrow*/)
maxrow = _etas[i];
}
eta = _etas[_c];
double etaExp = Math.exp(_etas[_c]-maxrow);
double sumExp = 0;
for(int i = 0; i < _beta_multinomial.length; ++i)
sumExp += Math.exp(_etas[i]-maxrow);
mu = etaExp / sumExp;
if(mu < 1e-16)
mu = 1e-16;//
double logSumExp = Math.log(sumExp) + maxrow;
_likelihood -= r.weight * (_etas[(int)r.response(0)] - logSumExp);
d = mu*(1-mu);
wz = r.weight * (eta * d + (y-mu));
w = r.weight * d;
} else {
eta = r.innerProduct(_beta) + _sparseOffset;
mu = _params.linkInv(eta + r.offset);
_likelihood += r.weight*_params.likelihood(y,mu);
var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance
d = _params.linkDeriv(mu);
double z = eta + (y-mu)*d;
w = r.weight/(var*d*d);
wz = w*z;
if(_validate)
_val.add(y, mu, r.weight, r.offset);
}
}
assert w >= 0|| Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed!
wsum+=w;
wsumu+=r.weight; // just add the user observation weight for the scaling.
for(int i = 0; i < r.nBins; ++i) {
_xy[r.binIds[i]] += wz;
}
for(int i = 0; i < r.nNums; ++i){
int id = r.numIds == null?(i + numStart):r.numIds[i];
double val = r.numVals[i];
_xy[id] += wz*val;
}
if(_dinfo._intercept)
_xy[_xy.length-1] += wz;
_gram.addRow(r, w);
}
@Override
public void reduce(GLMIterationTask git){
ArrayUtils.add(_xy, git._xy);
_gram.add(git._gram);
_nobs += git._nobs;
wsum += git.wsum;
wsumu += git.wsumu;
if (_validate) _val.reduce(git._val);
_likelihood += git._likelihood;
super.reduce(git);
}
@Override protected void postGlobal(){
if(_sparse && _dinfo._normSub != null) { // need to adjust gram for missing centering!
int ns = _dinfo.numStart();
int interceptIdx = _xy.length-1;
double [] interceptRow = _gram._xx[interceptIdx-_gram._diagN];
double nobs = interceptRow[interceptRow.length-1]; // weighted _nobs
for(int i = ns; i < _dinfo.fullN(); ++i) {
double iMean = _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
for (int j = 0; j < ns; ++j)
_gram._xx[i - _gram._diagN][j] -= interceptRow[j]*iMean;
for (int j = ns; j <= i; ++j) {
double jMean = _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns];
_gram._xx[i - _gram._diagN][j] -= interceptRow[i]*jMean + interceptRow[j]*iMean - nobs * iMean * jMean;
}
}
if(_dinfo._intercept) { // do the intercept row
for(int j = ns; j < _dinfo.fullN(); ++j)
interceptRow[j] -= nobs * _dinfo._normSub[j-ns]*_dinfo._normMul[j-ns];
}
// and the xy vec as well
for(int i = ns; i < _dinfo.fullN(); ++i) {
_xy[i] -= _xy[_xy.length - 1] * _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
}
}
if(_val != null){
_val.computeAIC();
}
}
public boolean hasNaNsOrInf() {
return ArrayUtils.hasNaNsOrInfs(_xy) || _gram.hasNaNsOrInfs();
}
}
/* public static class GLMCoordinateDescentTask extends FrameTask2 {
final GLMParameters _params;
final double [] _betaw;
final double [] _betacd;
public double [] _temp;
public double [] _varsum;
public double _ws=0;
long _nobs;
public double _likelihoods;
public GLMCoordinateDescentTask(Key jobKey, DataInfo dinfo, double lambda, GLMModel.GLMParameters glm, boolean validate, double [] betaw,
double [] betacd, double ymu, Vec rowFilter, H2OCountedCompleter cmp) {
super(cmp,dinfo,jobKey,rowFilter);
_params = glm;
_betaw = betaw;
_betacd = betacd;
}
@Override public boolean handlesSparseData(){return false;}
@Override
public void chunkInit() {
_temp=MemoryManager.malloc8d(_dinfo.fullN()+1); // using h2o memory manager
_varsum=MemoryManager.malloc8d(_dinfo.fullN());
}
@Override
protected void processRow(Row r) {
if(r.bad || r.weight == 0) return;
++_nobs;
final double y = r.response(0);
assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0 for family=Gamma.";
assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1> for family=Binomial. got " + y;
final double w, eta, mu, var, z;
final int numStart = _dinfo.numStart();
double d = 1;
if( _params._family == Family.gaussian && _params._link == Link.identity){
w = r.weight;
z = y - r.offset;
mu = 0;
eta = mu;
} else {
eta = r.innerProduct(_betaw);
mu = _params.linkInv(eta + r.offset);
var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance
d = _params.linkDeriv(mu);
z = eta + (y-mu)*d;
w = r.weight/(var*d*d);
}
_likelihoods += r.weight*_params.likelihood(y,mu);
assert w >= 0|| Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed!
_ws+=w;
double xb = r.innerProduct(_betacd);
for(int i = 0; i < r.nBins; ++i) { // go over cat variables
_temp[r.binIds[i]] += (z - xb + _betacd[r.binIds[i]]) *w;
_varsum[r.binIds[i]] += w ;
}
for(int i = 0; i < r.nNums; ++i){ // num vars
int id = r.numIds == null?(i + numStart):r.numIds[i];
_temp[id] += (z- xb + r.get(id)*_betacd[id] )*(r.get(id)*w);
_varsum[id] += w*r.get(id)*r.get(id);
}
_temp[_temp.length-1] += w*(z-r.innerProduct(_betacd)+_betacd[_betacd.length-1]);
}
@Override
public void reduce(GLMCoordinateDescentTask git){ // adding contribution of all the chunks
ArrayUtils.add(_temp, git._temp);
ArrayUtils.add(_varsum, git._varsum);
_ws+= git._ws;
_nobs += git._nobs;
_likelihoods += git._likelihoods;
super.reduce(git);
}
}
*/
public static class GLMCoordinateDescentTaskSeqNaive extends MRTask {
public double [] _normMulold;
public double [] _normSubold;
public double [] _normMulnew;
public double [] _normSubnew;
final double [] _betaold; // current old value at j
final double [] _betanew; // global beta @ j-1 that was just updated.
final int [] _catLvls_new; // sorted list of indices of active levels only for one categorical variable
final int [] _catLvls_old;
public double [] _temp;
boolean _skipFirst;
long _nobs;
int _cat_num; // 1: c and p categorical, 2:c numeric and p categorical, 3:c and p numeric , 4: c categorical and previous num.
boolean _interceptnew;
boolean _interceptold;
public GLMCoordinateDescentTaskSeqNaive(boolean interceptold, boolean interceptnew, int cat_num ,
double [] betaold, double [] betanew, int [] catLvlsold, int [] catLvlsnew,
double [] normMulold, double [] normSubold, double [] normMulnew, double [] normSubnew,
boolean skipFirst ) { // pass it norm mul and norm sup - in the weights already done. norm
//mul and mean will be null without standardization.
_normMulold = normMulold;
_normSubold = normSubold;
_normMulnew = normMulnew;
_normSubnew = normSubnew;
_cat_num = cat_num;
_betaold = betaold;
_betanew = betanew;
_interceptold = interceptold; // if updating beta_1, then the intercept is the previous column
_interceptnew = interceptnew; // if currently updating the intercept value
_catLvls_old = catLvlsold;
_catLvls_new = catLvlsnew;
_skipFirst = skipFirst;
}
@Override
public void map(Chunk [] chunks) {
int cnt = 0;
Chunk wChunk = chunks[cnt++];
Chunk zChunk = chunks[cnt++];
Chunk ztildaChunk = chunks[cnt++];
Chunk xpChunk=null, xChunk=null;
_temp = new double[_betaold.length];
if (_interceptnew) {
xChunk = new C0DChunk(1,chunks[0]._len);
xpChunk = chunks[cnt++];
} else {
if (_interceptold) {
xChunk = chunks[cnt++];
xpChunk = new C0DChunk(1,chunks[0]._len);
}
else {
xChunk = chunks[cnt++];
xpChunk = chunks[cnt++];
}
}
// For each observation, add corresponding term to temp - or if categorical variable only add the term corresponding to its active level and the active level
// of the most recently updated variable before it (if also cat). If for an obs the active level corresponds to an inactive column, we just dont want to include
// it - same if inactive level in most recently updated var. so set these to zero ( Wont be updating a betaj which is inactive) .
for (int i = 0; i < chunks[0]._len; ++i) { // going over all the rows in the chunk
double betanew = 0; // most recently updated prev variable
double betaold = 0; // old value of current variable being updated
double w = wChunk.atd(i);
if(w == 0) continue;
++_nobs;
int observation_level = 0, observation_level_p = 0;
double val = 1, valp = 1;
if(_cat_num == 1) {
observation_level = (int) xChunk.at8(i); // only need to change one temp value per observation.
if (_catLvls_old != null)
observation_level = Arrays.binarySearch(_catLvls_old, observation_level);
observation_level_p = (int) xpChunk.at8(i); // both cat
if (_catLvls_new != null)
observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p);
if(_skipFirst){
observation_level--;
observation_level_p--;
}
}
else if(_cat_num == 2){
val = xChunk.atd(i); // current num and previous cat
if (_normMulold != null && _normSubold != null)
val = (val - _normSubold[0]) * _normMulold[0];
observation_level_p = (int) xpChunk.at8(i);
if (_catLvls_new != null)
observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p);
if(_skipFirst){
observation_level_p--;
}
}
else if(_cat_num == 3){
val = xChunk.atd(i); // both num
if (_normMulold != null && _normSubold != null)
val = (val - _normSubold[0]) * _normMulold[0];
valp = xpChunk.atd(i);
if (_normMulnew != null && _normSubnew != null)
valp = (valp - _normSubnew[0]) * _normMulnew[0];
}
else if(_cat_num == 4){
observation_level = (int) xChunk.at8(i); // current cat
if (_catLvls_old != null)
observation_level = Arrays.binarySearch(_catLvls_old, observation_level); // search to see if this level is active.
if(_skipFirst){
observation_level--;
}
valp = xpChunk.atd(i); //prev numeric
if (_normMulnew != null && _normSubnew != null)
valp = (valp - _normSubnew[0]) * _normMulnew[0];
}
if(observation_level >= 0)
betaold = _betaold[observation_level];
if(observation_level_p >= 0)
betanew = _betanew[observation_level_p];
if (_interceptnew) {
ztildaChunk.set(i, ztildaChunk.atd(i) - betaold + valp * betanew); //
_temp[0] += w * (zChunk.atd(i) - ztildaChunk.atd(i));
} else {
ztildaChunk.set(i, ztildaChunk.atd(i) - val * betaold + valp * betanew);
if(observation_level >=0 ) // if the active level for that observation is an "inactive column" don't want to add contribution to temp for that observation
_temp[observation_level] += w * val * (zChunk.atd(i) - ztildaChunk.atd(i));
}
}
}
@Override
public void reduce(GLMCoordinateDescentTaskSeqNaive git){
ArrayUtils.add(_temp, git._temp);
_nobs += git._nobs;
super.reduce(git);
}
}
public static class GLMCoordinateDescentTaskSeqIntercept extends MRTask {
final double [] _betaold;
public double _temp;
DataInfo _dinfo;
public GLMCoordinateDescentTaskSeqIntercept( double [] betaold, DataInfo dinfo) {
_betaold = betaold;
_dinfo = dinfo;
}
@Override
public void map(Chunk [] chunks) {
int cnt = 0;
Chunk wChunk = chunks[cnt++];
Chunk zChunk = chunks[cnt++];
Chunk filterChunk = chunks[cnt++];
Row r = _dinfo.newDenseRow();
for(int i = 0; i < chunks[0]._len; ++i) {
if(filterChunk.atd(i)==1) continue;
_dinfo.extractDenseRow(chunks,i,r);
_temp = wChunk.at8(i)* (zChunk.atd(i)- r.innerProduct(_betaold) );
}
}
@Override
public void reduce(GLMCoordinateDescentTaskSeqIntercept git){
_temp+= git._temp;
super.reduce(git);
}
}
public static class GLMGenerateWeightsTask extends MRTask {
final GLMParameters _params;
final double [] _betaw;
double [] denums;
double wsum,wsumu;
DataInfo _dinfo;
double _likelihood;
public GLMGenerateWeightsTask(Key jobKey, DataInfo dinfo, GLMModel.GLMParameters glm, double[] betaw) {
_params = glm;
_betaw = betaw;
_dinfo = dinfo;
}
@Override
public void map(Chunk [] chunks) {
Chunk wChunk = chunks[chunks.length-3];
Chunk zChunk = chunks[chunks.length-2];
Chunk zTilda = chunks[chunks.length-1];
chunks = Arrays.copyOf(chunks,chunks.length-3);
denums = new double[_dinfo.fullN()+1]; // full N is expanded variables with categories
Row r = _dinfo.newDenseRow();
for(int i = 0; i < chunks[0]._len; ++i) {
_dinfo.extractDenseRow(chunks,i,r);
if (r.bad || r.weight == 0) {
wChunk.set(i,0);
zChunk.set(i,0);
zTilda.set(i,0);
continue;
}
final double y = r.response(0);
assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0 for family=Gamma.";
assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1> for family=Binomial. got " + y;
final double w, eta, mu, var, z;
final int numStart = _dinfo.numStart();
double d = 1;
eta = r.innerProduct(_betaw);
if (_params._family == Family.gaussian && _params._link == Link.identity) {
w = r.weight;
z = y - r.offset;
mu = 0;
} else {
mu = _params.linkInv(eta + r.offset);
var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance
d = _params.linkDeriv(mu);
z = eta + (y - mu) * d;
w = r.weight / (var * d * d);
}
_likelihood += _params.likelihood(y,mu);
zTilda.set(i,eta-_betaw[_betaw.length-1]);
assert w >= 0 || Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed!
wChunk.set(i,w);
zChunk.set(i,z);
wsum+=w;
wsumu+=r.weight; // just add the user observation weight for the scaling.
for(int j = 0; j < r.nBins; ++j) { // go over cat variables
denums[r.binIds[j]] += w; // binIds skips the zeros.
}
for(int j = 0; j < r.nNums; ++j){ // num vars
int id = r.numIds == null?(j + numStart):r.numIds[j];
denums[id]+= w*r.get(id)*r.get(id);
}
}
}
@Override
public void reduce(GLMGenerateWeightsTask git){ // adding contribution of all the chunks
ArrayUtils.add(denums, git.denums);
wsum+=git.wsum;
wsumu += git.wsumu;
_likelihood += git._likelihood;
super.reduce(git);
}
}
// public static class GLMValidationTask> extends MRTask {
// protected final GLMModel _model;
// protected GLMValidation _res;
// public final double _lambda;
// public boolean _improved;
// Key _jobKey;
// public static Key makeKey(){return Key.make("__GLMValidation_" + Key.make().toString());}
// public GLMValidationTask(GLMModel model, double lambda){this(model,lambda,null);}
// public GLMValidationTask(GLMModel model, double lambda, H2OCountedCompleter completer){super(completer); _lambda = lambda; _model = model;}
// @Override public void map(Chunk[] chunks){
// _res = new GLMValidation(null,_model._ymu,_model._parms,_model.rank(_lambda));
// final int nrows = chunks[0]._len;
// double [] row = MemoryManager.malloc8d(_model._output._names.length);
// float [] preds = MemoryManager.malloc4f(_model._parms._family == Family.binomial?3:1);
// OUTER:
// for(int i = 0; i < nrows; ++i){
// if(chunks[chunks.length-1].isNA(i))continue;
// for(int j = 0; j < chunks.length-1; ++j){
// if(chunks[j].isNA(i))continue OUTER;
// row[j] = chunks[j].atd(i);
// }
// _model.score0(row, preds);
// double response = chunks[chunks.length-1].atd(i);
// _res.add(response, _model._parms._family == Family.binomial?preds[2]:preds[0]);
// }
// }
// @Override public void reduce(GLMValidationTask gval){_res.add(gval._res);}
// @Override public void postGlobal(){
// _res.computeAIC();
// _res.computeAUC();
// }
// }
// use general score to reduce number of possible different code paths
// public static class GLMXValidationTask extends GLMValidationTask{
// protected final GLMModel [] _xmodels;
// protected GLMValidation [] _xvals;
// long _nobs;
// final float [] _thresholds;
// public static Key makeKey(){return Key.make("__GLMValidation_" + Key.make().toString());}
//
// public GLMXValidationTask(GLMModel mainModel,double lambda, GLMModel [] xmodels, float [] thresholds){this(mainModel,lambda,xmodels,thresholds,null);}
// public GLMXValidationTask(GLMModel mainModel,double lambda, GLMModel [] xmodels, float [] thresholds, final H2OCountedCompleter completer){
// super(mainModel, lambda,completer);
// _xmodels = xmodels;
// _thresholds = thresholds;
// }
// @Override public void map(Chunk [] chunks) {
// long gid = chunks[0].start();
// _xvals = new GLMValidation[_xmodels.length];
// for(int i = 0; i < _xmodels.length; ++i)
// _xvals[i] = new GLMValidation(null,_xmodels[i]._ymu,_xmodels[i]._parms,_xmodels[i]._output.rank(),_thresholds);
// final int nrows = chunks[0]._len;
// double [] row = MemoryManager.malloc8d(_xmodels[0]._output._names.length);
// float [] preds = MemoryManager.malloc4f(_xmodels[0]._parms._family == Family.binomial?3:1);
// OUTER:
// for(int i = 0; i < nrows; ++i){
// if(chunks[chunks.length-1].isNA(i))continue;
// for(int j = 0; j < chunks.length-1; ++j) {
// if(chunks[j].isNA(i))continue OUTER;
// row[j] = chunks[j].atd(i);
// }
// ++_nobs;
// final int mid = (int)((i + gid) % _xmodels.length);
// final GLMModel model = _xmodels[mid];
// final GLMValidation val = _xvals[mid];
// model.score0(row, preds);
// double response = chunks[chunks.length-1].at8(i);
// val.add(response, model._parms._family == Family.binomial?preds[2]:preds[0]);
// }
// }
// @Override public void reduce(GLMXValidationTask gval){
// _nobs += gval._nobs;
// for(int i = 0; i < _xvals.length; ++i)
// _xvals[i].add(gval._xvals[i]);}
//
// @Override public void postGlobal() {
// H2OCountedCompleter cmp = (H2OCountedCompleter)getCompleter();
// if(cmp != null)cmp.addToPendingCount(_xvals.length + 1);
// for (int i = 0; i < _xvals.length; ++i) {
// _xvals[i].computeAIC();
// _xvals[i].computeAUC();
// _xvals[i]._nobs = _nobs - _xvals[i]._nobs;
// GLMModel.setXvalidation(cmp, _xmodels[i]._key, _lambda, _xvals[i]);
// }
// GLMModel.setXvalidation(cmp, _model._key, _lambda, new GLMXValidation(_model, _xmodels, _xvals, _lambda, _nobs,_thresholds));
// }
// }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy