All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.glm.GLMTask Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.glm;

import hex.DataInfo;
import hex.DataInfo.Row;
import hex.FrameTask2;
import hex.glm.GLMModel.GLMParameters;
import hex.glm.GLMModel.GLMParameters.Family;
import hex.glm.GLMModel.GLMParameters.Link;
import hex.glm.GLMModel.GLMWeights;
import hex.glm.GLMModel.GLMWeightsFun;
import hex.gram.Gram;
import water.*;
import water.H2O.H2OCountedCompleter;
import water.fvec.C0DChunk;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.util.ArrayUtils;
import water.util.FrameUtils;
import water.util.MathUtils;
import water.util.MathUtils.BasicStats;

import java.util.Arrays;

import static hex.glm.GLMModel.GLMParameters.DispersionMethod.deviance;
import static hex.glm.GLMModel.GLMParameters.Family.gaussian;
import static hex.glm.GLMTask.DataAddW2AugXZ.getCorrectChunk;
import static hex.glm.GLMUtils.updateGradGam;
import static hex.glm.GLMUtils.updateGradGamMultinomial;
import static org.apache.commons.math3.special.Gamma.*;

/**
 * All GLM related distributed tasks:
 *
 * YMUTask           - computes response means on actual datasets (if some rows are ignored - e.g ignoring rows with NA and/or doing cross-validation)
 * GLMGradientTask   - computes gradient at given Beta, used by L-BFGS, for KKT condition check
 * GLMLineSearchTask - computes residual deviance(s) at given beta(s), used by line search (both L-BFGS and IRLSM)
 * GLMIterationTask  - used by IRLSM to compute Gram matrix and response t(X) W X, t(X)Wz
 *
 * @author tomasnykodym
 */
public abstract class GLMTask  {
  final static double EPS=1e-10;
  final static double ZEROEQUAL = 1e-8;
  final static double ONEEQUAL = 1-1e-8;
  static class NullDevTask extends MRTask {
    double _nullDev;
    final double [] _ymu;
    final GLMWeightsFun _glmf;
    final boolean _hasWeights;
    final boolean _hasOffset;
    public NullDevTask(GLMWeightsFun glmf, double [] ymu, boolean hasWeights, boolean hasOffset) {
      _glmf = glmf;
      _ymu = ymu;
      _hasWeights = hasWeights;
      _hasOffset = hasOffset;
    }

    @Override public void map(Chunk [] chks) {
      int i = 0;
      int len = chks[0]._len;
      Chunk w = _hasWeights?chks[i++]:new C0DChunk(1.0,len);
      Chunk o = _hasOffset?chks[i++]:new C0DChunk(0.0,len);
      Chunk r = chks[i];
      if(_glmf._family != Family.multinomial) {
        double ymu = _glmf.link(_ymu[0]);
        for (int j = 0; j < len; ++j)
          _nullDev += w.atd(j)*_glmf.deviance(r.atd(j), _glmf.linkInv(ymu + o.atd(j)));
      } else {
        throw H2O.unimpl();
      }
    }
    @Override public void reduce(NullDevTask ndt) {_nullDev += ndt._nullDev;}
  }

  static class GLMResDevTask extends FrameTask2 {
    final GLMWeightsFun _glmf;
    final double [] _beta;
    double _resDev = 0;
    long _nobs;
    double _likelihood;

    public GLMResDevTask(Key jobKey, DataInfo dinfo,GLMParameters parms, double [] beta) {
      super(null,dinfo, jobKey);
      _glmf = new GLMWeightsFun(parms);
      _beta = beta;
      _sparseOffset = _sparse?GLM.sparseOffset(_beta,_dinfo):0;
    }
    private transient GLMWeights _glmw;
    private final double _sparseOffset;

    @Override public boolean handlesSparseData(){return true;}
    @Override
    public void chunkInit() {
      _glmw = new GLMWeights();
    }
    @Override
    protected void processRow(Row r) {
      _glmf.computeWeights(r.response(0), r.innerProduct(_beta) + _sparseOffset, r.offset, r.weight, _glmw);
      _resDev += _glmw.dev;
      _likelihood += _glmw.l;
      ++_nobs;
    }
    @Override public void reduce(GLMResDevTask gt) {_nobs += gt._nobs; _resDev += gt._resDev; _likelihood += gt._likelihood;}
    public double avgDev(){return _resDev/_nobs;}
    public double dev(){return _resDev;}

  }

  static class GLMResDevTaskOrdinal extends FrameTask2 {
    final double [][] _beta;
    double _likelihood;
    final int _nclasses;
    final int _lastClass;
    final int _secondToLast;
    long _nobs;

    public GLMResDevTaskOrdinal(Key jobKey, DataInfo dinfo, double [] beta, int nclasses) {
      super(null,dinfo, jobKey);
      _beta = ArrayUtils.convertTo2DMatrix(beta,beta.length/nclasses);
      _nclasses = nclasses;
      _lastClass = nclasses-1;
      _secondToLast = _lastClass - 1;

    }

    @Override public boolean handlesSparseData(){return true;}
    private transient double [] _sparseOffsets;

    @Override
    public void chunkInit() {
      _sparseOffsets = MemoryManager.malloc8d(_nclasses);
      if(_sparse)
        for(int c = 0; c < _nclasses; ++c)
          _sparseOffsets[c] = GLM.sparseOffset(_beta[c],_dinfo);
    }
    @Override
    protected void processRow(Row r) {
      _nobs++;
      int c = (int)r.response(0); // true response category
      if (c==0) { // for category 0
        double eta = r.innerProduct(_beta[0])+ _sparseOffsets[c];
        _likelihood -= r.weight * (eta-Math.log(1+Math.exp(eta)));
      } else if (c==_lastClass) { // for class nclass-1
        _likelihood += r.weight * Math.log(1+Math.exp(r.innerProduct(_beta[_secondToLast])+ _sparseOffsets[c]));
      } else { // for category from 1 to nclass-2
        double eta = Math.exp(r.innerProduct(_beta[c])+_sparseOffsets[c]);
        double etaM1 = Math.exp(r.innerProduct(_beta[c])+_sparseOffsets[c-1]);
        _likelihood -= r.weight * Math.log(eta/(1+eta)-etaM1/(1+etaM1));
      }
    }
    @Override public void reduce(GLMResDevTaskOrdinal gt) {_nobs += gt._nobs; _likelihood += gt._likelihood;}

    public double avgDev(){return _likelihood*2/_nobs;}
    public double dev(){return _likelihood*2;}
  }

  static class GLMResDevTaskMultinomial extends FrameTask2 {
    final double [][] _beta;
    double _likelihood;
    final int _nclasses;
    long _nobs;

    public GLMResDevTaskMultinomial(Key jobKey, DataInfo dinfo, double [] beta, int nclasses) {
      super(null,dinfo, jobKey);
      _beta = ArrayUtils.convertTo2DMatrix(beta,beta.length/nclasses);
      _nclasses = nclasses;
    }

    @Override public boolean handlesSparseData(){return true;}
    private transient double [] _sparseOffsets;

    @Override
    public void chunkInit() {
      _sparseOffsets = MemoryManager.malloc8d(_nclasses);
      if(_sparse)
        for(int c = 0; c < _nclasses; ++c)
          _sparseOffsets[c] = GLM.sparseOffset(_beta[c],_dinfo);
    }
    @Override
    protected void processRow(Row r) {
      _nobs++;
      double sumExp = 0;
      for(int c = 0; c < _nclasses; ++c)
        sumExp += Math.exp(r.innerProduct(_beta[c]) + _sparseOffsets[c]);
      int c = (int)r.response(0);
      _likelihood -= r.weight * ((r.innerProduct(_beta[c]) + _sparseOffsets[c]) - Math.log(sumExp));
    }
    @Override public void reduce(GLMResDevTaskMultinomial gt) {_nobs += gt._nobs; _likelihood += gt._likelihood;}

    public double avgDev(){return _likelihood*2/_nobs;}
    public double dev(){return _likelihood*2;}
  }

 static class WeightedSDTask extends MRTask {
   final int _weightId;
   final double [] _mean;
   public double [] _varSum;
   public WeightedSDTask(int wId, double [] mean){
     _weightId = wId;
     _mean = mean;
   }
   @Override public void map(Chunk [] chks){
     double [] weights = null;
     if(_weightId != - 1){
       weights = MemoryManager.malloc8d(chks[_weightId]._len);
       chks[_weightId].getDoubles(weights,0,weights.length);
       chks = ArrayUtils.remove(chks,_weightId);
     }
     _varSum = MemoryManager.malloc8d(_mean.length);
     double [] vals = MemoryManager.malloc8d(chks[0]._len);
     int [] ids = MemoryManager.malloc4(chks[0]._len);
     for(int c = 0; c < _mean.length; ++c){
       double mu = _mean[c];
       int n = chks[c].getSparseDoubles(vals,ids);
       double s = 0;
       for(int i = 0; i < n; ++i) {
         double d = vals[i];
         if(Double.isNaN(d)) // NAs are either skipped or replaced with mean (i.e. can also be skipped)
           continue;
         d = d - mu;
         if(_weightId != -1)
           s += weights[ids[i]]*d*d;
         else
           s += d*d;
       }
       _varSum[c] = s;
     }
   }
   public void reduce(WeightedSDTask t){
     ArrayUtils.add(_varSum,t._varSum);
   }
 }
 static public class YMUTask extends MRTask {
   double _yMin = Double.POSITIVE_INFINITY, _yMax = Double.NEGATIVE_INFINITY;
   final int _responseId;
   final int _weightId;
   final int _offsetId;
   final int _nums; // number of numeric columns
   final int _numOff;
   final boolean _skipNAs;
   final boolean _computeWeightedMeanSigmaResponse;

   private BasicStats _basicStats;
   private BasicStats _basicStatsResponse;
   double [] _yMu;
   final int _nClasses;


   private double [] _predictorSDs;
   private final boolean _expandedResponse; // true iff family == multinomial and response has been maually expanded into binary columns

   public double [] predictorMeans(){return _basicStats.mean();}
   public double [] predictorSDs(){
     if(_predictorSDs != null) return _predictorSDs;
     return (_predictorSDs = _basicStats.sigma());
   }

   public double [] responseMeans(){return _basicStatsResponse.mean();}
   public double [] responseSDs(){
     return _basicStatsResponse.sigma();
   }

   public YMUTask(DataInfo dinfo, int nclasses, boolean computeWeightedMeanSigmaResponse, boolean skipNAs, boolean haveResponse, boolean expandedResponse) {
     _nums = dinfo._nums;
     _numOff = dinfo._cats;
     _responseId = haveResponse ? dinfo.responseChunkId(0) : -1;
     _weightId = dinfo._weights?dinfo.weightChunkId():-1;
     _offsetId = dinfo._offset?dinfo.offsetChunkId():-1;
     _nClasses = nclasses;
     _computeWeightedMeanSigmaResponse = computeWeightedMeanSigmaResponse;
     _skipNAs = skipNAs;
     _expandedResponse = _nClasses == 1 || expandedResponse;
   }

   @Override public void setupLocal(){}

   @Override public void map(Chunk [] chunks) {
     _yMu = new double[_nClasses];
     double [] ws = MemoryManager.malloc8d(chunks[0].len());
     if(_weightId != -1)
       chunks[_weightId].getDoubles(ws,0,ws.length);
     else
      Arrays.fill(ws,1);
     boolean changedWeights = false;
     if(_skipNAs) { // first find the rows to skip, need to go over all chunks including categoricals
       double [] vals = MemoryManager.malloc8d(chunks[0]._len);
       int [] ids = MemoryManager.malloc4(vals.length);
       for (int i = 0; i < chunks.length; ++i) {
         int n = vals.length;
         if(chunks[i].isSparseZero())
           n = chunks[i].getSparseDoubles(vals,ids);
         else
          chunks[i].getDoubles(vals,0,n);
         for (int r = 0; r < n; ++r) {
           if (ws[r] != 0 && Double.isNaN(vals[r])) {
             ws[r] = 0;
             changedWeights = true;
           }
         }
       }
       if(changedWeights && _weightId != -1)
         chunks[_weightId].set(ws);
     }
     Chunk response = _responseId < 0 ? null : chunks[_responseId];
     double [] numsResponse = null;
     _basicStats = new BasicStats(_nums);
     if(_computeWeightedMeanSigmaResponse) {
       _basicStatsResponse = new BasicStats(_nClasses);
       numsResponse = MemoryManager.malloc8d(_nClasses);
     }
     // compute basic stats for numeric predictors
     for(int i = 0; i < _nums; ++i) {
       Chunk c = chunks[i + _numOff];
       double w;
       for (int r = c.nextNZ(-1); r < c._len; r = c.nextNZ(r)) {
         if ((w = ws[r]) == 0) continue;
         double d = c.atd(r);
         _basicStats.add(d, w, i);
       }
     }
     if (response == null) return;
     long nobs = 0;
     double wsum = 0;
     for(double w:ws) {
       if(w != 0)++nobs;
       wsum += w;
     }
     _basicStats.setNobs(nobs,wsum);
     // compute the mean for the response
     // autoexpand categoricals into binary vecs
     for(int r = 0; r < response._len; ++r) {
       double w;
       if((w = ws[r]) == 0)
         continue;
       if(_computeWeightedMeanSigmaResponse) {
         //FIXME: Add support for subtracting offset from response
         if(_expandedResponse) {
           for (int i = 0; i < _nClasses; ++i)
             numsResponse[i] = chunks[chunks.length - _nClasses + i].atd(r);
         } else {
           Arrays.fill(numsResponse,0);
           double d = response.atd(r);
           if(Double.isNaN(d))
             Arrays.fill(numsResponse,Double.NaN);
           else
             numsResponse[(int)d] = 1;
         }
         _basicStatsResponse.add(numsResponse,w);
       }
       double d = response.atd(r);
       if(!Double.isNaN(d)) {
         if (_nClasses > 2)
           _yMu[(int) d] += w;
         else
           _yMu[0] += w*d;
         if (d < _yMin)
           _yMin = d;
         if (d > _yMax)
           _yMax = d;
       }
     }
     if(_basicStatsResponse != null)_basicStatsResponse.setNobs(nobs,wsum);
     for(int i = 0; i < _nums; ++i) {
       if(chunks[i+_numOff].isSparseZero())
         _basicStats.fillSparseZeros(i);
       else if(chunks[i+_numOff].isSparseNA())
         _basicStats.fillSparseNAs(i);
     }
   }
   @Override public void postGlobal() {
     ArrayUtils.mult(_yMu,1.0/_basicStats._wsum);
   }

   @Override public void reduce(YMUTask ymt) {
     if(ymt._basicStats.nobs() > 0 && ymt._basicStats.nobs() > 0) {
       ArrayUtils.add(_yMu,ymt._yMu);
       if(_yMin > ymt._yMin)
         _yMin = ymt._yMin;
       if(_yMax < ymt._yMax)
         _yMax = ymt._yMax;
       _basicStats.reduce(ymt._basicStats);
       if(_computeWeightedMeanSigmaResponse)
         _basicStatsResponse.reduce(ymt._basicStatsResponse);
     } else if (_basicStats.nobs() == 0) {
       _yMu = ymt._yMu;
       _yMin = ymt._yMin;
       _yMax = ymt._yMax;
       _basicStats = ymt._basicStats;
       _basicStatsResponse = ymt._basicStatsResponse;
     }
   }
   public double wsum() {return _basicStats._wsum;}

   public long nobs() {return _basicStats.nobs();}
 }

  static double  computeMultinomialEtas(double [] etas, double [] exps) {
    double maxRow = ArrayUtils.maxValue(etas);
    double sumExp = 0;
    int K = etas.length;
    for(int c = 0; c < K; ++c) {
      double x = Math.exp(etas[c] - maxRow);
      sumExp += x;
      exps[c+1] = x;
    }
    double reg = 1.0/(sumExp);
    for(int c = 0; c < etas.length; ++c)
      exps[c+1] *= reg;
    exps[0] = 0;
    exps[0] = ArrayUtils.maxIndex(exps)-1;
    return Math.log(sumExp) + maxRow;
  }
  
  static abstract class GLMGradientTask extends MRTask {
    final double [] _beta;
    public double [] _gradient;
    public double _likelihood;
    final transient  double _currentLambda;
    final transient double _reg;
    protected final DataInfo _dinfo;
    public double[][][] _penalty_mat; // for gam only
    public int[][] _gamBetaIndices; // for gam only
    
    protected GLMGradientTask(Key jobKey, DataInfo dinfo, double reg, double lambda, double[] beta){
      _dinfo = dinfo;
      _beta = beta.clone();
      _reg = reg;
      _currentLambda = lambda;
    }

    protected GLMGradientTask(Key jobKey, DataInfo dinfo, double reg, double lambda, double[] beta, 
                              double[][][] penaltyMat, int[][] gamBetaInd){
      this(jobKey, dinfo, reg, lambda, beta);
      _penalty_mat = penaltyMat;
      _gamBetaIndices = gamBetaInd;
    }
    
    protected abstract void computeGradientMultipliers(double [] es, double [] ys, double [] ws);

    private final void computeCategoricalEtas(Chunk [] chks, double [] etas, double [] vals, int [] ids) {
      // categoricals
      for(int cid = 0; cid < _dinfo._cats; ++cid){
        Chunk c = chks[cid];
        if(c.isSparseZero()) {
          int nvals = c.getSparseDoubles(vals,ids,-1);
          for(int i = 0; i < nvals; ++i){
            int id = _dinfo.getCategoricalId(cid,(int)vals[i]);
            if(id >=0) etas[ids[i]] += _beta[id];
          }
        } else {
          c.getIntegers(ids, 0, c._len,-1);
          for(int i = 0; i < ids.length; ++i){
            int id = _dinfo.getCategoricalId(cid,ids[i]);
            if(id >=0) etas[i] += _beta[id];
          }
        }
      }
    }

    private final void computeCategoricalGrads(Chunk [] chks, double [] etas, double [] vals, int [] ids) {
      // categoricals
      for(int cid = 0; cid < _dinfo._cats; ++cid){
        Chunk c = chks[cid];
        if(c.isSparseZero()) {
          int nvals = c.getSparseDoubles(vals,ids,-1);
          for(int i = 0; i < nvals; ++i){
            int id = _dinfo.getCategoricalId(cid,(int)vals[i]);
            if(id >=0) _gradient[id] += etas[ids[i]];
          }
        } else {
          c.getIntegers(ids, 0, c._len,-1);
          for(int i = 0; i < ids.length; ++i){
            int id = _dinfo.getCategoricalId(cid,ids[i]);
            if(id >=0) _gradient[id] += etas[i];
          }
        }
      }
    }

    private final void computeNumericEtas(Chunk [] chks, double [] etas, double [] vals, int [] ids) {
      int numOff = _dinfo.numStart();
      for(int cid = 0; cid < _dinfo._nums; ++cid){
        double scale = _dinfo._normMul != null?_dinfo._normMul[cid]:1;
        double off = _dinfo._normSub != null?_dinfo._normSub[cid]:0;
        double NA = _dinfo._numNAFill[cid];
        Chunk c = chks[cid+_dinfo._cats];
        double b = scale*_beta[numOff+cid];
        if(c.isSparseZero()){
          int nvals = c.getSparseDoubles(vals,ids,NA);
          for(int i = 0; i < nvals; ++i)
            etas[ids[i]] += vals[i] * b;
        } else if(c.isSparseNA()){
          int nvals = c.getSparseDoubles(vals,ids,NA);
          for(int i = 0; i < nvals; ++i)
            etas[ids[i]] += (vals[i] - off) * b;
        } else {
          c.getDoubles(vals,0,vals.length,NA);
          for(int i = 0; i < vals.length; ++i)
            etas[i] += (vals[i] - off) * b;
        }
      }
    }

    private final void computeNumericGrads(Chunk [] chks, double [] etas, double [] vals, int [] ids) {
      int numOff = _dinfo.numStart();
      for(int cid = 0; cid < _dinfo._nums; ++cid){
        double NA = _dinfo._numNAFill[cid];
        Chunk c = chks[cid+_dinfo._cats];
        double scale = _dinfo._normMul == null?1:_dinfo._normMul[cid];
        double offset = _dinfo._normSub == null?0:_dinfo._normSub[cid];
        if(c.isSparseZero()){
          double g = 0;
          int nVals = c.getSparseDoubles(vals,ids,NA);
          for(int i = 0; i < nVals; ++i)
            g += (vals[i]-offset)*scale*etas[ids[i]];
          _gradient[numOff+cid] = g;
        } else if(c.isSparseNA()){
          double off = _dinfo._normSub == null?0:_dinfo._normSub[cid];
          double g = 0;
          int nVals = c.getSparseDoubles(vals,ids,NA);
          for(int i = 0; i < nVals; ++i)
            g += (vals[i]-off)*scale*etas[ids[i]];
          _gradient[numOff+cid] = g;
        } else {
          double off = _dinfo._normSub == null?0:_dinfo._normSub[cid];
          c.getDoubles(vals,0,vals.length,NA);
          double g = 0;
          for(int i = 0; i < vals.length; ++i)
            g += (vals[i]-off)*scale*etas[i];
          _gradient[numOff+cid] = g;
        }
      }
    }

    public void map(Chunk [] chks) {
      _gradient = MemoryManager.malloc8d(_beta.length);
      Chunk response = chks[chks.length-_dinfo._responses];
      Chunk weights = _dinfo._weights?chks[_dinfo.weightChunkId()]:new C0DChunk(1,response._len);
      double [] ws = weights.getDoubles(MemoryManager.malloc8d(weights._len),0,weights._len);
      double [] ys = response.getDoubles(MemoryManager.malloc8d(weights._len),0,response._len);
      double [] etas = MemoryManager.malloc8d(response._len);
      if(_dinfo._offset)
        chks[_dinfo.offsetChunkId()].getDoubles(etas,0,etas.length);
      double sparseOffset = 0;
      int numStart = _dinfo.numStart();
      if(_dinfo._normSub != null)
        for(int i = 0; i < _dinfo._nums; ++i)
          if(chks[_dinfo._cats + i].isSparseZero())
            sparseOffset -= _beta[numStart + i]*_dinfo._normSub[i]*_dinfo._normMul[i];
      ArrayUtils.add(etas,sparseOffset + _beta[_beta.length-1]);
      double [] vals = MemoryManager.malloc8d(response._len);
      int [] ids = MemoryManager.malloc4(response._len);
      computeCategoricalEtas(chks,etas,vals,ids);
      computeNumericEtas(chks,etas,vals,ids);
      computeGradientMultipliers(etas,ys,ws);
      // walk the chunks again, add to the gradient
      computeCategoricalGrads(chks,etas,vals,ids);
      computeNumericGrads(chks,etas,vals,ids);
      // add intercept
      _gradient[_gradient.length-1] = ArrayUtils.sum(etas);
      if(_dinfo._normSub != null) {
        double icpt = _gradient[_gradient.length-1];
        for(int i = 0; i < _dinfo._nums; ++i) {
          if(chks[_dinfo._cats+i].isSparseZero()) {
            double d = _dinfo._normSub[i] * _dinfo._normMul[i];
            _gradient[numStart + i] -= d * icpt;
          }
        }
      }
    }

    @Override
    public final void reduce(GLMGradientTask gmgt){
      ArrayUtils.add(_gradient,gmgt._gradient);
      _likelihood += gmgt._likelihood;
    }
    @Override public final void postGlobal(){
      ArrayUtils.mult(_gradient,_reg);
      for(int j = 0; j < _beta.length - 1; ++j)
        _gradient[j] += _currentLambda * _beta[j];  // add L2 constraint for gradient
      if ((_penalty_mat != null) && (_gamBetaIndices != null))  // update contribution from gam smoothness constraint
        updateGradGam(_gradient, _penalty_mat, _gamBetaIndices, _beta, _dinfo._activeCols);
    }
  }

  static class GLMGenericGradientTask extends GLMGradientTask {
    private final GLMWeightsFun _glmf;
    public GLMGenericGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta);
      _glmf = new GLMWeightsFun(parms);
    }

    public GLMGenericGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta, 
                                  double[][][] penaltyMat, int[][] gamCols) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta, penaltyMat, gamCols);
      _glmf = new GLMWeightsFun(parms);
    }

    @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){
      double l = 0;
      for(int i = 0; i < es.length; ++i) {
        if (Double.isNaN(ys[i]) || ws[i] == 0) {
          es[i] = 0;
        } else {
          double mu = _glmf.linkInv(es[i]);
          mu = mu==0?hex.glm.GLMModel._EPS:mu;
          l += ws[i] * _glmf.likelihood(ys[i], mu);
          double var = _glmf.variance(mu);
          if (var < hex.glm.GLMModel._EPS) var = hex.glm.GLMModel._EPS; // es is the gradient without the predictor term
          if (_glmf._family.equals(Family.tweedie)) {
            _glmf._oneOeta = 1.0/(es[i]==0?hex.glm.GLMModel._EPS:es[i]);
            _glmf._oneOetaSquare = _glmf._oneOeta*_glmf._oneOeta;
            es[i] = ws[i]*_glmf.linkInvDeriv(mu)*(_glmf._var_power==1?(1-ys[i]/mu):
                    (_glmf._var_power==2?(1/mu-ys[i]*Math.pow(mu, -_glmf._var_power)):
                            (Math.pow(mu, _glmf._oneMinusVarPower)-ys[i]*Math.pow(mu, -_glmf._var_power))));
          } else {
            es[i] = ws[i] * (mu - ys[i]) / (var * _glmf.linkDeriv(mu));
          }
        }
      }
      _likelihood = l;
    }
  }

  static class GLMPoissonGradientTask extends GLMGradientTask {
    private final GLMWeightsFun _glmf;
    public GLMPoissonGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta);
      _glmf = new GLMWeightsFun(parms);
    }

    public GLMPoissonGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta, 
                                  double[][][] penaltyMat, int[][] gamCols) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta, penaltyMat, gamCols);
      _glmf = new GLMWeightsFun(parms);
    }
    @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){
      double l = 0;
      for(int i = 0; i < es.length; ++i) {
        if (Double.isNaN(ys[i]) || ws[i] == 0) {
          es[i] = 0;
        } else {
          double eta = es[i];
          double mu = Math.exp(eta);
          double yr = ys[i];
          double diff = mu - yr;
          l += ws[i] * (yr == 0?mu:yr*Math.log(yr/mu) + diff);  // todo: looks wrong to me... double check
          es[i] = ws[i]*diff;
        }
      }
      _likelihood = 2*l;
    }
  }

  static class GLMNegativeBinomialGradientTask extends GLMGradientTask {
    private final GLMWeightsFun _glmf;
    public GLMNegativeBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta);
      _glmf = new GLMWeightsFun(parms);
    }

    public GLMNegativeBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, 
                                           double[] beta, double[][][] penaltyMat, int[][] gamCols) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta, penaltyMat, gamCols);
      _glmf = new GLMWeightsFun(parms);
    }
    
    @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws) {
      double l = 0;
      for(int i = 0; i < es.length; ++i) {
        if (Double.isNaN(ys[i]) || ws[i] == 0) {
          es[i] = 0;
        } else {
          double eta = es[i];
          double mu = _glmf.linkInv(eta);
          double yr = ys[i];
          if ((mu > 0) && (yr > 0)) {  // response and predictions are all nonzeros
            double invSum =1.0/(1.0+mu*_glmf._theta);
            double muDeriv = _glmf.linkInvDeriv(mu);
            es[i] = ws[i] * (invSum-yr/mu+_glmf._theta*yr*invSum) * muDeriv; // gradient of -llh.  CHECKED-log/CHECKED-identity
            l -= ws[i] * (sumOper(yr, _glmf._invTheta, 0)-(yr+_glmf._invTheta)*Math.log(1+_glmf._theta*mu)+
                  yr*Math.log(mu)+yr*Math.log(_glmf._theta)); // store the -llh, with everything.  CHECKED-Log/CHECKED-identity
          } else if ((mu > 0) && (yr==0)) {
            es[i] = ws[i]*(_glmf.linkInvDeriv(mu)/(1.0+_glmf._theta*mu)); // CHECKED-log/CHECKED-identity
            l += _glmf._invTheta*Math.log(1+_glmf._theta*mu);  //CHECKED-log/CHECKED-identity
          } // no update otherwise
        }
      }
      _likelihood = l;
    }
  }

  static double sumOper(double y, double multiplier, int opVal) {
    double summation = 0.0;
    if (opVal == 0){
      return logGamma(y + multiplier) - logGamma(multiplier) - logGamma(y + 1);
    }
    for (int val = 0; val < y; val++) {
      double temp = 1.0/(val*multiplier*multiplier+multiplier);
      summation += opVal==1?temp:(opVal==2?temp*temp*(2*val*multiplier+1):Math.log(multiplier+val));
    }
    return summation;
  }
  
  static class GLMQuasiBinomialGradientTask extends GLMGradientTask {
    private final GLMWeightsFun _glmf;
    public GLMQuasiBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta);
      _glmf = new GLMWeightsFun(parms);
    }

    public GLMQuasiBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double[] beta,
                                        double[][][] penaltyMat, int[][] gamCols) {
      super(jobKey, dinfo, parms._obj_reg, lambda, beta, penaltyMat, gamCols);
      _glmf = new GLMWeightsFun(parms);
    }
    @Override protected void computeGradientMultipliers(double [] es, double [] ys, double [] ws){
      double l = 0;
      for(int i = 0; i < es.length; ++i){
        double p = _glmf.linkInv(es[i]);
        if(p == 0) p = 1e-15;
        if(p == 1) p = 1 - 1e-15;
        es[i] = -ws[i]*(ys[i]-p);
        l += ys[i]*Math.log(p) + (1-ys[i])*Math.log(1-p);
      }
      _likelihood = -l;
    }
  }


  static class GLMBinomialGradientTask extends GLMGradientTask {
    public GLMBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta) {
      super(jobKey,dinfo,parms._obj_reg,lambda,beta);
      assert (parms._family == Family.binomial && parms._link == Link.logit) || (parms._family == Family.fractionalbinomial && parms._link == Link.logit);
    }

    public GLMBinomialGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta, 
                                   double[][][] penaltyMat, int[][] gamCol) {
      super(jobKey,dinfo,parms._obj_reg,lambda,beta, penaltyMat, gamCol);
      assert (parms._family == Family.binomial && parms._link == Link.logit) || (parms._family == Family.fractionalbinomial && parms._link == Link.logit);
    }

    @Override
    protected void computeGradientMultipliers(double[] es, double[] ys, double[] ws) {
      for(int i = 0; i < es.length; ++i) {
        if(Double.isNaN(ys[i]) || ws[i] == 0){es[i] = 0; continue;}
        double e = es[i], w = ws[i];
        double yr = ys[i];
        double ym = 1.0 / (Math.exp(-e) + 1.0);
        if(ym != yr) _likelihood += w*((MathUtils.y_log_y(yr, ym)) + MathUtils.y_log_y(1 - yr, 1 - ym));
        es[i] = ws[i] * (ym - yr);
      }
    }
  }

  public static class GLMGaussianGradientTask extends GLMGradientTask {
    public GLMGaussianGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta) {
      super(jobKey,dinfo,parms._obj_reg,lambda,beta);
      assert parms._family == gaussian && parms._link == Link.identity;
    }

    public GLMGaussianGradientTask(Key jobKey, DataInfo dinfo, GLMParameters parms, double lambda, double [] beta, 
                                   double[][][] penaltyMat, int[][] gamCol) {
      super(jobKey,dinfo,parms._obj_reg,lambda,beta, penaltyMat, gamCol);
      assert parms._family == gaussian && parms._link == Link.identity;
    }

    @Override
    protected void computeGradientMultipliers(double[] es, double[] ys, double[] ws) {
      for(int i = 0; i < es.length; ++i) {
        double w = ws[i];
        if(w == 0 || Double.isNaN(ys[i])){
          es[i] = 0;
          continue;
        }
        double e = es[i], y = ys[i];
        double d = (e-y);
        double wd = w*d;
        _likelihood += wd*d;
        es[i] = wd;
      }
    }
  }

  static class GLMMultinomialLikelihoodTask extends GLMMultinomialGradientBaseTask {
    public GLMMultinomialLikelihoodTask(Job job, DataInfo dinfo, double lambda, double[][] beta, double reg) {
      super(job, dinfo, lambda, beta, reg);
    }

    public GLMMultinomialLikelihoodTask(Job job, DataInfo dinfo, double lambda, double[][] beta, GLMParameters glmp) {
      super(job, dinfo, lambda, beta, glmp);
    }

    @Override
    public void calMultipliersNGradients(double[][] etas, double[][] etasOffset, double[] ws, double[] vals,
                                         int[] ids, Chunk response, Chunk[] chks, int M, int P, int numStart) {
      computeGradientMultipliers(etas, response.getDoubles(vals, 0, M), ws);
    }
  }

  // share between multinomial and ordinal regression
  public static abstract class GLMMultinomialGradientBaseTask extends MRTask {
    final double[][] _beta;
    final transient double _currentLambda;
    final transient double _reg;
    public double[][] _gradient;  // this is arranged such that we have double[ncoeff][nclass]
    double _likelihood;
    Job _job;
    final boolean _sparse;
    final DataInfo _dinfo;
    // parameters used by ordinal regression
    Link _link;           // link function, e.g. ologit, ologlog, oprobit
    GLMParameters _glmp;  // parameter used to access linkinv and linkinvderiv functions
    int _secondToLast;    // denote class label nclass-2
    int _theLast;         // denote class label nclass-1
    int _interceptId;     // index of offset/intercept in double[][] _beta
    double[][][] _penaltyMat;
    int[][] _gamBetaIndices;

    /**
     * @param job
     * @param dinfo
     * @param lambda
     * @param beta   coefficients as 2D array [P][K]
     * @param reg
     */
    public GLMMultinomialGradientBaseTask(Job job, DataInfo dinfo, double lambda, double[][] beta, double reg) {
      _currentLambda = lambda;
      _reg = reg;
      // need to flip the beta
      _beta = new double[beta[0].length][beta.length];
      for (int i = 0; i < _beta.length; ++i)
        for (int j = 0; j < _beta[i].length; ++j)
          _beta[i][j] = beta[j][i];
      _job = job;
      _sparse = FrameUtils.sparseRatio(dinfo._adaptedFrame) < .125;
      _dinfo = dinfo;
      if (_dinfo._offset) throw H2O.unimpl();
    }

    public GLMMultinomialGradientBaseTask(Job job, DataInfo dinfo, double lambda, double[][] beta, GLMParameters glmp) {
      this(job, dinfo, lambda, beta, glmp._obj_reg);
      _theLast = beta.length - 1;       // initialize ordinal regression parameters
      _secondToLast = _theLast - 1;
      _interceptId = _beta.length - 1;
      _link = glmp._link;
      _glmp = glmp;
    }

    public GLMMultinomialGradientBaseTask(Job job, DataInfo dinfo, double lambda, double[][] beta, GLMParameters glmp,
                                          double[][][] penaltyMat, int[][] gamCols) {
      this(job, dinfo, lambda, beta, glmp._obj_reg);
      _theLast = beta.length - 1;       // initialize ordinal regression parameters
      _secondToLast = _theLast - 1;
      _interceptId = _beta.length - 1;
      _link = glmp._link;
      _glmp = glmp;
      _penaltyMat = penaltyMat;
      _gamBetaIndices = gamCols;
    }

    // common between multinomial and ordinal
    public final void computeCategoricalEtas(Chunk[] chks, double[][] etas, double[] vals, int[] ids) {
      // categoricals
      for (int cid = 0; cid < _dinfo._cats; ++cid) {
        Chunk c = chks[cid];
        if (c.isSparseZero()) {
          int nvals = c.getSparseDoubles(vals, ids, -1);
          for (int i = 0; i < nvals; ++i) {
            int id = _dinfo.getCategoricalId(cid, (int) vals[i]);
            if (id >= 0) ArrayUtils.add(etas[ids[i]], _beta[id]);
          }
        } else {
          c.getIntegers(ids, 0, c._len, -1);
          for (int i = 0; i < ids.length; ++i) {
            int id = _dinfo.getCategoricalId(cid, ids[i]);
            if (id >= 0) ArrayUtils.add(etas[i], _beta[id]);
          }
        }
      }
    }

    public final void computeCategoricalGrads(Chunk[] chks, double[][] etas, double[] vals, int[] ids) {
      // categoricals
      for (int cid = 0; cid < _dinfo._cats; ++cid) {
        Chunk c = chks[cid];
        if (c.isSparseZero()) {
          int nvals = c.getSparseDoubles(vals, ids, -1);
          for (int i = 0; i < nvals; ++i) {
            int id = _dinfo.getCategoricalId(cid, (int) vals[i]);
            if (id >= 0) ArrayUtils.add(_gradient[id], etas[ids[i]]);
          }
        } else {
          c.getIntegers(ids, 0, c._len, -1);
          for (int i = 0; i < ids.length; ++i) {
            int id = _dinfo.getCategoricalId(cid, ids[i]);
            if (id >= 0) ArrayUtils.add(_gradient[id], etas[i]);
          }
        }
      }
    }

    public final void computeNumericEtas(Chunk[] chks, double[][] etas, double[] vals, int[] ids) {
      int numOff = _dinfo.numStart();
      for (int cid = 0; cid < _dinfo._nums; ++cid) {
        double[] b = _beta[numOff + cid];
        double scale = _dinfo._normMul != null ? _dinfo._normMul[cid] : 1;
        double NA = _dinfo._numNAFill[cid];
        Chunk c = chks[cid + _dinfo._cats];
        if (c.isSparseZero() || c.isSparseNA()) {
          int nvals = c.getSparseDoubles(vals, ids, NA);
          for (int i = 0; i < nvals; ++i) {
            double d = vals[i] * scale;
            ArrayUtils.wadd(etas[ids[i]], b, d);
          }
        } else {
          c.getDoubles(vals, 0, vals.length, NA);
          double off = _dinfo._normSub != null ? _dinfo._normSub[cid] : 0;
          for (int i = 0; i < vals.length; ++i) {
            double d = (vals[i] - off) * scale;
            ArrayUtils.wadd(etas[i], b, d);
          }
        }
      }
    }

    public final void computeNumericGrads(Chunk[] chks, double[][] etas, double[] vals, int[] ids) {
      int numOff = _dinfo.numStart();
      for (int cid = 0; cid < _dinfo._nums; ++cid) {
        double[] g = _gradient[numOff + cid];
        double NA = _dinfo._numNAFill[cid];
        Chunk c = chks[cid + _dinfo._cats];
        double scale = _dinfo._normMul == null ? 1 : _dinfo._normMul[cid];
        if (c.isSparseZero() || c.isSparseNA()) {
          int nVals = c.getSparseDoubles(vals, ids, NA);
          for (int i = 0; i < nVals; ++i)
            ArrayUtils.wadd(g, etas[ids[i]], vals[i] * scale);
        } else {
          double off = _dinfo._normSub == null ? 0 : _dinfo._normSub[cid];
          c.getDoubles(vals, 0, vals.length, NA);
          for (int i = 0; i < vals.length; ++i) {
            ArrayUtils.wadd(g, etas[i], (vals[i] - off) * scale);
          }
        }
      }
    }


    // This method will compute the multipliers for gradient calculation of the betas in etas and for
    // the intercepts in etas_offsets for each row of data
    final void computeGradientMultipliersLH(double [][] etas, double [][] etasOffset, double [] ys, double [] ws) {
      int K = _beta[0].length;    // number of class
      double[] tempEtas = new double[K];  // store original etas
      int y;  // get and store response class category
      double yJ, yJm1;
      for (int row = 0; row < etas.length; row++) { // calculate the multiplier from each row
        double w = ws[row];
        if (w==0) {
          Arrays.fill(etas[row], 0);    // zero out etas for current row
          continue;
        }
        // note that, offset is different for all class, beta is shared for all classes
        System.arraycopy(etas[row], 0, tempEtas, 0, K); // copy data over to tempEtas
        Arrays.fill(etas[row], 0);    // zero out etas for current row
        y = (int) ys[row];  // get response class category
        if (y==0) { // response is in 0th category
          etasOffset[row][0] = _glmp.linkInv(tempEtas[0])-1;
          etas[row][0] = etasOffset[row][0];
          _likelihood -= w*tempEtas[y]-Math.log(1+Math.exp(tempEtas[y]));
        } else if (y==_theLast) { // response is in last category
          etasOffset[row][_secondToLast] = _glmp.linkInv(tempEtas[_secondToLast]);
          etas[row][0] = etasOffset[row][_secondToLast];
          _likelihood += w*Math.log(1+Math.exp(tempEtas[_secondToLast]));
        } else {  // perform update for response between 1 to K-2, y can affect class y and y-1
          int lastC = y-1;  // previous class
          yJ = _glmp.linkInv(tempEtas[y]);
          yJm1 = _glmp.linkInv(tempEtas[lastC]);
          double den = yJ-yJm1;
          den = den==0.0?1e-10:den;
          _likelihood -= w*Math.log(den);
          etas[row][0] = yJ+yJm1-1.0; // for non-intercepts
          double oneMcdfPC = 1-yJm1;
          oneMcdfPC = oneMcdfPC==0.0?1e-10:oneMcdfPC;
          double oneOthreshold = 1-Math.exp(_beta[_interceptId][lastC]-_beta[_interceptId][y]);
          oneOthreshold = oneOthreshold==0.0?1e-10:oneOthreshold;
          double oneOverThreshold = 1.0/oneOthreshold;
          etasOffset[row][y] = (yJ-1)*oneOverThreshold/oneMcdfPC;
          yJ = yJ==0?1e-10:yJ;
          etasOffset[row][lastC] = yJm1*oneOverThreshold/yJ;
        }
        for (int c=1; c 0) {
            etasOffset[row][c] = tempEtas[c];
            etas[row][0] += tempEtas[c];
            _likelihood += w*0.5*tempEtas[c]*tempEtas[c];
          }
        }
        for (int c = y; c < _theLast; c++) {  // class >= yresp, should be positive
          if (tempEtas[c] <= 0) {
            etasOffset[row][c] = tempEtas[c];
            etas[row][0] += tempEtas[c];
            _likelihood += w*0.5*tempEtas[c]*tempEtas[c];
          }
        }
        for (int c=1; c 0) {
        for (int c = 0; c < P - 1; ++c)
          for (int j = 0; j < _beta[0].length; ++j) // iterate over each class
            _gradient[c][j] += _currentLambda * _beta[c][j];
      }
      if ((_penaltyMat!=null) && (_gamBetaIndices!=null)) 
        updateGradGamMultinomial(_gradient, _penaltyMat, _gamBetaIndices, _beta); // beta is coeff index by class
    }

    /** This method changes the _gradient that is coeffPerClss by number of classes back to number of classes by
     *  coeffPerClass.  Also, if only active columns are included, that is what is returned.  If both active and
     *  non-active columns are included, both will be returned.
     *  
     * @return
     */
    public double [] gradient(){
      double [] res = MemoryManager.malloc8d(_gradient.length*_gradient[0].length);
      int P = _gradient.length;
      for(int k = 0; k < _gradient[0].length; ++k)
        for(int i = 0; i < _gradient.length; ++i)
          res[k*P + i] = _gradient[i][k];
      return res;
    }
  }

  // share between multinomial and ordinal regression
  public static class GLMMultinomialGradientTask extends GLMMultinomialGradientBaseTask {
    public GLMMultinomialGradientTask(Job job, DataInfo dinfo, double lambda, double[][] beta, double reg) {
      super(job, dinfo, lambda, beta, reg);
    }

    public GLMMultinomialGradientTask(Job job, DataInfo dinfo, double lambda, double[][] beta, GLMParameters glmp) {
      super(job, dinfo, lambda, beta, glmp);
    }

    public GLMMultinomialGradientTask(Job job, DataInfo dinfo, double lambda, double[][] beta, GLMParameters glmp, 
                                      double[][][] penaltyMat, int[][] gamCols) {
      super(job, dinfo, lambda, beta, glmp, penaltyMat, gamCols);
    }
    @Override
    public void calMultipliersNGradients(double[][] etas, double[][] etasOffset, double[] ws, double[] vals,
                                         int[] ids, Chunk response, Chunk[] chks, int M, int P, int numStart) {
      if (_glmp != null && _link == Link.ologit && (_glmp._solver.equals(GLMParameters.Solver.AUTO) ||
              _glmp._solver.equals((GLMParameters.Solver.GRADIENT_DESCENT_LH))))  // gradient is stored in etas
        computeGradientMultipliersLH(etas, etasOffset, response.getDoubles(vals, 0, M), ws);
      else if (_glmp != null && _link == Link.ologit && _glmp._solver.equals(GLMParameters.Solver.GRADIENT_DESCENT_SQERR))
        computeGradientMultipliersSQERR(etas, etasOffset, response.getDoubles(vals, 0, M), ws);
      else
        computeGradientMultipliers(etas, response.getDoubles(vals, 0, M), ws);

      computeCategoricalGrads(chks, etas, vals, ids);
      computeNumericGrads(chks, etas, vals, ids);

      double [] g = _gradient[P-1]; // get the intercept gradient.
      // sum up the gradient over the data rows in this chk[]
      if (_link == Link.ologit) {
        for (int i = 0; i < etasOffset.length; ++i)
          ArrayUtils.add(g, etasOffset[i]);
      } else {
        for (int i = 0; i < etas.length; ++i)
          ArrayUtils.add(g, etas[i]);
      }
      if(_dinfo._normSub != null) {
        double [] icpt = _gradient[P-1];
        for(int i = 0; i < _dinfo._normSub.length; ++i) {
          if(chks[_dinfo._cats+i].isSparseZero())
            ArrayUtils.wadd(_gradient[numStart+i],icpt,-_dinfo._normSub[i]*_dinfo._normMul[i]);
        }
      }
    }
  }

//  public static class GLMCoordinateDescentTask extends MRTask {
//    final double [] _betaUpdate;
//    final double [] _beta;
//    final double _xOldSub;
//    final double _xOldMul;
//    final double _xNewSub;
//    final double _xNewMul;
//
//    double [] _xy;
//
//    public GLMCoordinateDescentTask(double [] betaUpdate, double [] beta, double xOldSub, double xOldMul, double xNewSub, double xNewMul) {
//      _betaUpdate = betaUpdate;
//      _beta = beta;
//      _xOldSub = xOldSub;
//      _xOldMul = xOldMul;
//      _xNewSub = xNewSub;
//      _xNewMul = xNewMul;
//    }
//
//    public void map(Chunk [] chks) {
//      Chunk xOld = chks[0];
//      Chunk xNew = chks[1];
//      if(xNew.vec().isCategorical()){
//        _xy = MemoryManager.malloc8d(xNew.vec().domain().length);
//      } else
//      _xy = new double[1];
//      Chunk eta = chks[2];
//      Chunk weights = chks[3];
//      Chunk res = chks[4];
//      for(int i = 0; i < eta._len; ++i) {
//        double w = weights.atd(i);
//        double e = eta.atd(i);
//        if(_betaUpdate != null) {
//          if (xOld.vec().isCategorical()) {
//            int cid = (int) xOld.at8(i);
//            e = +_betaUpdate[cid];
//          } else
//            e += _betaUpdate[0] * (xOld.atd(i) - _xOldSub) * _xOldMul;
//          eta.set(i, e);
//        }
//        int cid = 0;
//        double x = w;
//        if(xNew.vec().isCategorical()) {
//          cid = (int) xNew.at8(i);
//          e -= _beta[cid];
//        } else {
//          x = (xNew.atd(i) - _xNewSub) * _xNewMul;
//          e -= _beta[0] * x;
//          x *= w;
//        }
//        _xy[cid] += x * (res.atd(i) - e);
//      }
//    }
//    @Override public void reduce(GLMCoordinateDescentTask t) {
//      ArrayUtils.add(_xy, t._xy);
//    }
//  }


//  /**
//   * Compute initial solution for multinomial problem (Simple weighted LR with all weights = 1/4)
//   */
//  public static final class GLMMultinomialInitTsk extends MRTask  {
//    double [] _mu;
//    DataInfo _dinfo;
//    Gram _gram;
//    double [][] _xy;
//
//    @Override public void map(Chunk [] chks) {
//      Rows rows = _dinfo.rows(chks);
//      _gram = new Gram(_dinfo);
//      _xy = new double[_mu.length][_dinfo.fullN()+1];
//      int numStart = _dinfo.numStart();
//      double [] ds = new double[_mu.length];
//      for(int i = 0; i < ds.length; ++i)
//        ds[i] = 1.0/(_mu[i] * (1-_mu[i]));
//      for(int i = 0; i < rows._nrows; ++i) {
//        Row r = rows.row(i);
//        double y = r.response(0);
//        _gram.addRow(r,.25);
//        for(int c = 0; c < _mu.length; ++c) {
//          double iY = y == c?1:0;
//          double z = (y-_mu[c]) * ds[i];
//          for(int j = 0; j < r.nBins; ++j)
//            _xy[c][r.binIds[j]] += z;
//          for(int j = 0; j < r.nNums; ++j){
//            int id = r.numIds == null?(j + numStart):r.numIds[j];
//            double val = r.numVals[j];
//            _xy[c][id] += z*val;
//          }
//        }
//      }
//    }
//    @Override public void reduce(){
//
//    }
//  }

  /**
   * Task to compute t(X) %*% W %*%  X and t(X) %*% W %*% y
   */
  public static class LSTask extends FrameTask2 {
    public double[] _xy;
    public Gram _gram;
    final int numStart;

    public LSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey) {
      super(cmp, dinfo, jobKey);
      numStart = _dinfo.numStart();
    }

    @Override
    public void chunkInit() {
      _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats, true);
      _xy = MemoryManager.malloc8d(_dinfo.fullN() + 1);

    }

    @Override
    protected void processRow(Row r) {
      double wz = r.weight * (r.response(0) - r.offset);
      for (int i = 0; i < r.nBins; ++i) {
        _xy[r.binIds[i]] += wz;
      }
      for (int i = 0; i < r.nNums; ++i) {
        int id = r.numIds == null ? (i + numStart) : r.numIds[i];
        double val = r.numVals[i];
        _xy[id] += wz * val;
      }
      if (_dinfo._intercept)
        _xy[_xy.length - 1] += wz;
      _gram.addRow(r, r.weight);
    }

    @Override
    public void reduce(LSTask lst) {
      ArrayUtils.add(_xy, lst._xy);
      _gram.add(lst._gram);
    }

    @Override
    public void postGlobal() {
      if (_sparse && _dinfo._normSub != null) { // need to adjust gram for missing centering!
        int ns = _dinfo.numStart();
        int interceptIdx = _xy.length - 1;
        double[] interceptRow = _gram._xx[interceptIdx - _gram._diagN];
        double nobs = interceptRow[interceptRow.length - 1]; // weighted _nobs
        for (int i = ns; i < _dinfo.fullN(); ++i) {
          double iMean = _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
          for (int j = 0; j < ns; ++j)
            _gram._xx[i - _gram._diagN][j] -= interceptRow[j] * iMean;
          for (int j = ns; j <= i; ++j) {
            double jMean = _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns];
            _gram._xx[i - _gram._diagN][j] -= interceptRow[i] * jMean + interceptRow[j] * iMean - nobs * iMean * jMean;
          }
        }
        if (_dinfo._intercept) { // do the intercept row
          for (int j = ns; j < _dinfo.fullN(); ++j)
            interceptRow[j] -= nobs * _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns];
        }
        // and the xy vec as well
        for (int i = ns; i < _dinfo.fullN(); ++i) {
          _xy[i] -= _xy[_xy.length - 1] * _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
        }
      }
    }
  }

  public static class GLMWLSTask extends LSTask {
    final GLMWeightsFun _glmw;
    final double [] _beta;
    double _sparseOffset;

    public GLMWLSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, GLMWeightsFun glmw, double [] beta) {
      super(cmp, dinfo, jobKey);
      _glmw = glmw;
      _beta = beta;
    }

    private transient GLMWeights _ws;

    @Override public void chunkInit(){
      super.chunkInit();
      _ws = new GLMWeights();
    }

    @Override
    public void processRow(Row r) {
      // update weights
      double eta = r.innerProduct(_beta) + _sparseOffset;
      _glmw.computeWeights(r.response(0),eta,r.weight,r.offset,_ws);
      r.weight = _ws.w;
      r.offset = 0; // handled offset here
      r.setResponse(0,_ws.z);
      super.processRow(r);
    }
  }

  public static class GLMMultinomialWLSTask extends LSTask {
    final GLMWeightsFun _glmw;
    final double [] _beta;
    double _sparseOffset;

    public GLMMultinomialWLSTask(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, GLMWeightsFun glmw, double [] beta) {
      super(cmp, dinfo, jobKey);
      _glmw = glmw;
      _beta = beta;
    }

    private transient GLMWeights _ws;

    @Override public void chunkInit(){
      super.chunkInit();
      _ws = new GLMWeights();
    }

    @Override
    public void processRow(Row r) {

      // update weights
      double eta = r.innerProduct(_beta) + _sparseOffset;
      _glmw.computeWeights(r.response(0),eta,r.weight,r.offset,_ws);
      r.weight = _ws.w;
      r.offset = 0; // handled offset here
      r.setResponse(0,_ws.z);
      super.processRow(r);
    }
  }


  public static class GLMIterationTaskMultinomial extends FrameTask2 {
    final int _c;
    final double [] _beta; // current beta to compute update of predictors for the current class

    double [] _xy;
    Gram _gram;
    transient double _sparseOffset;

    public GLMIterationTaskMultinomial(DataInfo dinfo, Key jobKey, double [] beta, int c) {
      super(null, dinfo, jobKey);
      _beta = beta;
      _c = c;
    }

    @Override public void chunkInit(){
      // initialize
      _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats,true);
      _xy = MemoryManager.malloc8d(_dinfo.fullN()+1); // + 1 is for intercept
      if(_sparse)
        _sparseOffset = GLM.sparseOffset(_beta,_dinfo);
    }
    @Override
    protected void processRow(Row r) {
      double y = r.response(0);
      double sumExp = r.response(1);
      double maxRow = r.response(2);
      int numStart = _dinfo.numStart();
      y = (y == _c)?1:0;
      double eta = r.innerProduct(_beta) + _sparseOffset;
      if(eta > maxRow) maxRow = eta;
      double etaExp = Math.exp(eta-maxRow);
      sumExp += etaExp;
      double mu = (etaExp == Double.POSITIVE_INFINITY?1:(etaExp / sumExp));
      if(mu < 1e-16)
        mu = 1e-16;//
      double d = mu*(1-mu);
      double wz = r.weight * (eta * d + (y-mu));
      double w  = r.weight * d;
      for(int i = 0; i < r.nBins; ++i) {
        _xy[r.binIds[i]] += wz;
      }
      for(int i = 0; i < r.nNums; ++i){
        int id = r.numIds == null?(i + numStart):r.numIds[i];
        double val = r.numVals[i];
        _xy[id] += wz*val;
      }
      if(_dinfo._intercept)
        _xy[_xy.length-1] += wz;
      _gram.addRow(r, w);
    }

    @Override
    public void reduce(GLMIterationTaskMultinomial glmt) {
      ArrayUtils.add(_xy,glmt._xy);
      _gram.add(glmt._gram);
    }
  }

  public static class GLMMultinomialUpdate extends FrameTask2 {
    private final double [][] _beta; // updated  value of beta
    private final int _c;
    private transient double [] _sparseOffsets;
    private transient double [] _etas;

    public GLMMultinomialUpdate(DataInfo dinfo, Key jobKey, double [] beta, int c) {
      super(null, dinfo, jobKey);
      _beta = ArrayUtils.convertTo2DMatrix(beta,dinfo.fullN()+1);
      _c = c;
    }

    @Override public void chunkInit(){
      // initialize
      _sparseOffsets = MemoryManager.malloc8d(_beta.length);
      _etas = MemoryManager.malloc8d(_beta.length);
      if(_sparse) {
        for(int i = 0; i < _beta.length; ++i)
          _sparseOffsets[i] = GLM.sparseOffset(_beta[i],_dinfo);
      }
    }

    private transient Chunk _sumExpChunk;
    private transient Chunk _maxRowChunk;

    @Override public void map(Chunk [] chks) {
      _sumExpChunk = chks[chks.length-2];
      _maxRowChunk = chks[chks.length-1];
      super.map(chks);
    }

    @Override
    protected void processRow(Row r) {
      double maxrow = 0;
      for(int i = 0; i < _beta.length; ++i) {
        _etas[i] = r.innerProduct(_beta[i]) + _sparseOffsets[i];
        if(_etas[i] > maxrow)
          maxrow = _etas[i];
      }
      double sumExp = 0;
      for(int i = 0; i < _beta.length; ++i)
//        if(i != _c)
          sumExp += Math.exp(_etas[i]-maxrow);
      _maxRowChunk.set(r.cid,_etas[_c]);
      _sumExpChunk.set(r.cid,Math.exp(_etas[_c]-maxrow)/sumExp);
    }
  }
  
  /**
   * One iteration of glm, computes weighted gram matrix and t(x)*y vector and t(y)*y scalar.
   *
   * @author tomasnykodym
   */
  public static class GLMIterationTask extends FrameTask2 {
    final GLMWeightsFun _glmf;
    double [][]_beta_multinomial;
    double []_beta;
    protected Gram  _gram; // wx%*%x
    double [] _xy; // wx^t%*%z,
    double _yy;
    final double [] _ymu;
    long _nobs;
    public double _likelihood;
    private transient GLMWeights _w;
    private transient GLMWeightsFun _glmfTweedie; // only needed for Tweedie
    //    final double _lambda;
    double wsum, sumOfRowWeights;
    double _sumsqe;
    int _c = -1;
    
    public double[] getXY() {
      return _xy;
    }
    
    public double getYY() {
      return _yy;
    }

    public  GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw,double [] beta) {
      super(null,dinfo,jobKey);
      _beta = beta;
      _ymu = null;
      _glmf = glmw;
    }
    
    public  GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double [] beta, int c) {
      super(null,dinfo,jobKey);
      _beta = beta;
      _ymu = null;
      _glmf = glmw;
      _c = c;
    }

    @Override public boolean handlesSparseData(){return true;}

    transient private double _sparseOffset;

    @Override
    public void chunkInit() {
      // initialize
      _gram = new Gram(_dinfo.fullN(), _dinfo.largestCat(), _dinfo.numNums(), _dinfo._cats,true);
      _xy = MemoryManager.malloc8d(_dinfo.fullN()+1); // + 1 is for intercept
      if(_sparse)
        _sparseOffset = GLM.sparseOffset(_beta,_dinfo);
      _w = new GLMWeights();
      if (_glmf._family.equals(Family.tweedie)) {
        _glmfTweedie = new GLMModel.GLMWeightsFun(_glmf._family, _glmf._link, _glmf._var_power, _glmf._link_power,
                _glmf._theta);
      }
    }
    
    public Gram getGram() {
      return _gram;
    }

    @Override
    protected void processRow(Row r) { // called for every row in the chunk
      if(r.isBad() || r.weight == 0) return;
      ++_nobs;
      double y = r.response(0);
      _yy += r.weight*y*y;
      final int numStart = _dinfo.numStart();
      double wz,w;
      if(_glmf._family == Family.multinomial) {
        y = (y == _c)?1:0;
        double mu = r.response(1);
        double eta = r.response(2);
        double d = mu*(1-mu);
        if(d == 0) d = 1e-10;
        wz = r.weight * (eta * d + (y-mu));
        w  = r.weight * d;
      } else if(_beta != null) {
        if (_glmf._family.equals(Family.tweedie))
          _glmfTweedie.computeWeights(y, r.innerProduct(_beta) + _sparseOffset, r.offset, r.weight, _w);
        else
          _glmf.computeWeights(y, r.innerProduct(_beta) + _sparseOffset, r.offset, r.weight, _w);
        w = _w.w; // hessian without the xij xik part
        if (_glmf._family.equals(Family.tweedie))  // already multiplied with w for w.z
          wz = _w.z;
        else
          wz = w*_w.z;
        _likelihood += _w.l;
      } else {
        w = r.weight;
        wz = w*(y - r.offset);
      }
      wsum+=w;
      sumOfRowWeights +=r.weight; // just add the user observation weight for the scaling.
      for(int i = 0; i < r.nBins; ++i)
        _xy[r.binIds[i]] += wz;
      for(int i = 0; i < r.nNums; ++i){
        int id = r.numIds == null?(i + numStart):r.numIds[i];
        double val = r.numVals[i];
        _xy[id] += wz*val;
      }
      if(_dinfo._intercept)
        _xy[_xy.length-1] += wz;
      _gram.addRow(r,w);
    }

    @Override
    public void chunkDone(){adjustForSparseStandardizedZeros();}

    @Override
    public void reduce(GLMIterationTask git){
      ArrayUtils.add(_xy, git._xy);
      _gram.add(git._gram);
      _nobs += git._nobs;
      wsum += git.wsum;
      sumOfRowWeights += git.sumOfRowWeights;
      _likelihood += git._likelihood;
      _sumsqe += git._sumsqe;
      _yy += git._yy;
      super.reduce(git);
    }

    private void adjustForSparseStandardizedZeros(){
      if(_sparse && _dinfo._normSub != null) { // need to adjust gram for missing centering!
        int ns = _dinfo.numStart();
        int interceptIdx = _xy.length - 1;
        double[] interceptRow = _gram._xx[interceptIdx - _gram._diagN];
        double nobs = interceptRow[interceptRow.length - 1]; // weighted _nobs
        for (int i = ns; i < _dinfo.fullN(); ++i) {
          double iMean = _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
          for (int j = 0; j < ns; ++j)
            _gram._xx[i - _gram._diagN][j] -= interceptRow[j] * iMean;
          for (int j = ns; j <= i; ++j) {
            double jMean = _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns];
            _gram._xx[i - _gram._diagN][j] -= interceptRow[i] * jMean + interceptRow[j] * iMean - nobs * iMean * jMean;
          }
        }
        if (_dinfo._intercept) { // do the intercept row
          for (int j = ns; j < _dinfo.fullN(); ++j)
            interceptRow[j] -= nobs * _dinfo._normSub[j - ns] * _dinfo._normMul[j - ns];
        }
        // and the xy vec as well
        for (int i = ns; i < _dinfo.fullN(); ++i) {
          _xy[i] -= _xy[_xy.length - 1] * _dinfo._normSub[i - ns] * _dinfo._normMul[i - ns];
        }
      }
    }

    public boolean hasNaNsOrInf() {
      return ArrayUtils.hasNaNsOrInfs(_xy) || _gram.hasNaNsOrInfs();
    }
  }
  

 /* public static class GLMCoordinateDescentTask extends FrameTask2 {
    final GLMParameters _params;
    final double [] _betaw;
    final double [] _betacd;
    public double [] _temp;
    public double [] _varsum;
    public double _ws=0;
    long _nobs;
    public double _likelihoods;
    public  GLMCoordinateDescentTask(Key jobKey, DataInfo dinfo, double lambda, GLMModel.GLMParameters glm, boolean validate, double [] betaw,
                                     double [] betacd, double ymu, Vec rowFilter, H2OCountedCompleter cmp) {
      super(cmp,dinfo,jobKey,rowFilter);
      _params = glm;
      _betaw = betaw;
      _betacd = betacd;
    }


    @Override public boolean handlesSparseData(){return false;}


    @Override
    public void chunkInit() {
      _temp=MemoryManager.malloc8d(_dinfo.fullN()+1); // using h2o memory manager
      _varsum=MemoryManager.malloc8d(_dinfo.fullN());
    }

    @Override
    protected void processRow(Row r) {
      if(r.bad || r.weight == 0) return;
      ++_nobs;
      final double y = r.response(0);
      assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0  for family=Gamma.";
      assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1>  for family=Binomial. got " + y;
      final double w, eta, mu, var, z;
      final int numStart = _dinfo.numStart();
      double d = 1;
      if( _params._family == Family.gaussian && _params._link == Link.identity){
        w = r.weight;
        z = y - r.offset;
        mu = 0;
        eta = mu;
      } else {
        eta = r.innerProduct(_betaw);
        mu = _params.linkInv(eta + r.offset);
        var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance
        d = _params.linkDeriv(mu);
        z = eta + (y-mu)*d;
        w = r.weight/(var*d*d);
      }
      _likelihoods += r.weight*_params.likelihood(y,mu);
      assert w >= 0|| Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed!

      _ws+=w;
      double xb = r.innerProduct(_betacd);
      for(int i = 0; i < r.nBins; ++i)  { // go over cat variables
        _temp[r.binIds[i]] += (z - xb + _betacd[r.binIds[i]])  *w;
        _varsum[r.binIds[i]] += w ;
      }
      for(int i = 0; i < r.nNums; ++i){ // num vars
        int id = r.numIds == null?(i + numStart):r.numIds[i];
        _temp[id] += (z- xb + r.get(id)*_betacd[id] )*(r.get(id)*w);
        _varsum[id] += w*r.get(id)*r.get(id);
      }
        _temp[_temp.length-1] += w*(z-r.innerProduct(_betacd)+_betacd[_betacd.length-1]);
    }

    @Override
    public void reduce(GLMCoordinateDescentTask git){ // adding contribution of all the chunks
      ArrayUtils.add(_temp, git._temp);
      ArrayUtils.add(_varsum, git._varsum);
      _ws+= git._ws;
      _nobs += git._nobs;
      _likelihoods += git._likelihoods;
      super.reduce(git);
    }

  }
*/

  public static class RandColAddW2AugXZ extends MRTask {
    public int[] _cumRandCatLevels; // cumulative sum of random column categorical levels
    public int _randNumColStart;
    public long _randRowStart;  // index into absolute row number start in Frame _AugXZ
    Job _job;
    Frame _prior_weights_psi; // first column is prior weight, second column is wpsi, third is zmi
    public int _totAugxzColNumber;
    public int[] _weightID;

    public RandColAddW2AugXZ(Job job, int[] randCatLevels, Frame prior_weights_psi, int wpsiID, long randRowStart, 
                             int randNumColStart, int augXZColNum) {
      _job = job;
      _prior_weights_psi = prior_weights_psi;
      _weightID = new int[]{wpsiID};
      _cumRandCatLevels = ArrayUtils.cumsum(randCatLevels);
      _randRowStart = randRowStart;
      _randNumColStart = randNumColStart;
      _totAugxzColNumber = augXZColNum;
    }

    /***
     * Given colIndex to the expanded random columns, this method will calculate which random column that colIndex
     * belongs to.
     * @param cumrandCatLevels
     * @param colIndex
     * @return
     */
    public static int findRandColIndex(int[] cumrandCatLevels, long colIndex) {
      int len = cumrandCatLevels.length;
      for (int index = 0; index < len; index++) {
        if (colIndex < cumrandCatLevels[index])
          return index;
      }
      return (len-1);
    }

    @Override
    public void map(Chunk[] chunks) {       // chunks will be AugXZ
      long chkStartIdx = chunks[0].start(); // absolute row number of AugXZ chunk
      // Note here, we are working on the lower rows of augXZ related to 0 | Iq.
      if ((chkStartIdx+chunks[0].len()) >= _randRowStart) { // only start working if we are looking at correct chunk
        // need to figure out which chunk of priorWeightsWpsi to take and where the row start should be as well
        Chunk[] priorWeightsWpsi = new Chunk[1];
        int chkRowStart = (int) (_randRowStart-chkStartIdx); // relative start in AugXZ
        chkRowStart  = chkRowStart  > 0?chkRowStart:0;  // whole chunk of AugXZ used to calculate lower part of AugXZ
        long chkWeightRowStart = chkRowStart+chkStartIdx-_randRowStart; // first row of absolute row index of weight
        int[] weightChunkInfo = getCorrectChunk(_prior_weights_psi, 0, chkWeightRowStart, priorWeightsWpsi,
                _weightID, null);  // start from 0 to total randColExpanded
        int chkWeightRelRow = weightChunkInfo[2];
        int psiColumnIndex = (int)priorWeightsWpsi[0].start()+_randNumColStart+chkWeightRelRow;// start of column for random Columns
        for (int index = chkRowStart; index < chunks[0]._len; index++) {  // go throw each row of AugXZ
          if (chkWeightRelRow >= weightChunkInfo[1]) {  // need to grab a new weight chunk
            weightChunkInfo = getCorrectChunk(_prior_weights_psi, weightChunkInfo[0]+1,
                    chkWeightRelRow+priorWeightsWpsi[0].start(), priorWeightsWpsi, _weightID, weightChunkInfo);
            chkWeightRelRow = weightChunkInfo[2];
          }
          double wpsi = priorWeightsWpsi[0].atd(chkWeightRelRow);
          for (int colIndex=0; colIndex < psiColumnIndex; colIndex++) {
            chunks[colIndex].set(index, 0.0); // zero out columns to left of psiColumnIndex
          }
          chunks[psiColumnIndex].set(index, wpsi);        // update weight to AugXZ
          psiColumnIndex++;
          for (int colIndex=psiColumnIndex; colIndex < _totAugxzColNumber; colIndex++)
            chunks[colIndex].set(index, 0.0); // zero out columns to right of psiColumnIndex
          chkWeightRelRow++;
        }
      }
    }
  }

  /***
   *
   * This class calculates wpsi, zmi for frame _prior_weights_psi.  
   */
  public static class CalculateW4Rand extends MRTask {
    GLMParameters _parms;
    public int[] _cumRandCatLevels; // cumulative sum of random column categorical levels
    public double[] _psi;
    public double[] _phi;
    public int _numRandCol; // number of random columns specified by user
    Job _job;
    double[] _vi; // store random column coefficients

    public CalculateW4Rand(Job job, GLMParameters params, int[] randCatLevels, double[] psi, double[] phi, 
                           double[] vi) {
      _job = job;
      _parms = params;
      _numRandCol = _parms._random_columns.length;  // number of random columns specified by user
      _cumRandCatLevels = ArrayUtils.cumsum(randCatLevels);
      _psi = psi;
      _phi = phi;
      _vi = vi;
    }

    /***
     * Given colIndex to the expanded random columns, this method will calculate which random column that colIndex
     * belongs to.
     * @param cumrandCatLevels
     * @param colIndex
     * @return
     */
    public static int findRandColIndex(int[] cumrandCatLevels, long colIndex) {
      int len = cumrandCatLevels.length;
      for (int index = 0; index < len; index++) {
        if (colIndex < cumrandCatLevels[index])
          return index;
      }
      return (len-1);
    }

    @Override
    public void map(Chunk[] chunks) {       // chunks will be wpsi_frame
        GLMWeightsFun[] glmfunRand = ReturnGLMMMERunInfo.getRandGLMFuns(null, _numRandCol, _parms);
        double temp, ui, zmi, wpsi;
        for (int index = 0; index < chunks[0]._len; index++) {  // go throw each row of AugXZ
          int randIndex = findRandColIndex(_cumRandCatLevels, index);
          temp = glmfunRand[randIndex].linkInvDeriv(_phi[index]); // du_dv
          ui = glmfunRand[randIndex].linkInv(_vi[index]);
          zmi = _vi[index]+(_psi[index]-ui)/temp;
          chunks[2].set(index, zmi);
          wpsi = chunks[0].atd(index) * temp * temp / (glmfunRand[randIndex].variance(_psi[index]) * _phi[index]);
          chunks[1].set(index, Math.sqrt(wpsi));  // update weight frame with new weight
        }
    }
    
  }
  
  public static class GenerateResid extends MRTask {
    public Job _job;
    double _oneOverSqrtSumDevONMP;
    int _hvColIdx;
    int _residColIdx;
    long _numDataRows;
    
    public GenerateResid(Job job, double oneOverSqrtSumDevONMP, int hvColIdx, int residColIdx, long numDataRows) {
      _job = job;
      _oneOverSqrtSumDevONMP = oneOverSqrtSumDevONMP;
      _hvColIdx = hvColIdx;
      _residColIdx = residColIdx;
      _numDataRows = numDataRows;
    }

    @Override
    public void map(Chunk[] chunks) { // chunk contains infos from GLMMME run
      long chkStartRowIdx = chunks[0].start();
      int chkRowNumber = chunks[0].len();
      for (int rowIndex=0; rowIndex {
    public double _sumEtaDiffSq;
    public double _sumEtaSq;
    public int[] _etaOetaN;
    
    public CalculateEtaInfo(int[] etaOldetaNew) {
      _sumEtaDiffSq = 0;
      _sumEtaSq = 0;
      _etaOetaN = etaOldetaNew;
    }

    @Override
    public void map(Chunk[] chunks) {
      _sumEtaDiffSq = 0;
      _sumEtaSq = 0;
      int chkLen = chunks[0].len();
      for (int rowIndex=0; rowIndex < chkLen; rowIndex++) {
        double tempetaN = chunks[_etaOetaN[1]].atd(rowIndex); // grab etaNew value
        double tempetaDiff = chunks[_etaOetaN[0]].atd(rowIndex)-tempetaN;
        _sumEtaSq += tempetaN*tempetaN;
        _sumEtaDiffSq += tempetaDiff*tempetaDiff;
      }
    }

    @Override
    public void reduce(CalculateEtaInfo other){
      this._sumEtaDiffSq += other._sumEtaDiffSq;
      this._sumEtaSq += other._sumEtaSq;
    }
  }
  
  
  public static class ExtractFrameFromSourceWithProcess extends MRTask {
    public Frame _sourceFrame;
    int[] _devhvColIdx;
    long _startRowIndex;  // matches 0 row of dest chunk
    long _lengthToCopy;
    
    public ExtractFrameFromSourceWithProcess(Frame sourceFrame, int[] devHvColIdx, long startRowIndex, long lengthCopy) {
      _sourceFrame = sourceFrame;
      _devhvColIdx = devHvColIdx;
      _startRowIndex = startRowIndex;
      _lengthToCopy = lengthCopy;
    }

    @Override
    public void map(Chunk[] chunks) {
      long startChkIdx = chunks[0].start(); // absolute row index of chunks to copy to.
      int chkLen = chunks[0].len();
      long sourceChkIdx = _startRowIndex+startChkIdx; // absolute source chunk row index
      Chunk[] sourceChunks = new Chunk[_devhvColIdx.length];
      int[] fetchedChunkInfo = getCorrectChunk(_sourceFrame, 0, sourceChkIdx, sourceChunks, _devhvColIdx, 
              null);
      int fetchedRelRowIndex = fetchedChunkInfo[2];
      for (int rowIndex=0; rowIndex < chkLen; rowIndex++) {
        if (rowIndex+startChkIdx >= _lengthToCopy)
          break;
        if (fetchedRelRowIndex >= fetchedChunkInfo[1]) {
          fetchedChunkInfo = getCorrectChunk(_sourceFrame, fetchedChunkInfo[0]+1, 
                  fetchedRelRowIndex+sourceChunks[0].start(), sourceChunks, _devhvColIdx, fetchedChunkInfo);
          fetchedRelRowIndex = fetchedChunkInfo[2];
        }
        double temp = 1.0-sourceChunks[1].atd(fetchedRelRowIndex);
        chunks[0].set(rowIndex, sourceChunks[0].atd(fetchedRelRowIndex)/temp);  // set response
        chunks[2].set(rowIndex, temp/2);  // set weight
        fetchedRelRowIndex++;
      }
    }
  }

  /***
   * This class will copy columns from a source frame to columns in the destination frame
   * 
   */
  public static class CopyPartsOfFrame extends MRTask {
    public Frame _sourceFrame;
    public int[] _destColIndices;
    public int[] _sourceColIndices;
    public long _nrowsToCopy;

    public CopyPartsOfFrame(Frame fr, int[] destFrameColID, int[] sourceFrameColID, long numRows) {
      _sourceFrame = fr;
      if (sourceFrameColID==null) {
        int numCols = fr.numCols();
        _sourceColIndices = new int[numCols];
        for (int index=0; index < numCols; index++)
          _sourceColIndices[index] = index;
      } else
        _sourceColIndices = sourceFrameColID;
      
      if (destFrameColID == null) {
        int numCols = _sourceColIndices.length;
        _destColIndices = new int[numCols];
        for (int index=0; index < numCols; index++)
          _destColIndices[index]=index;
      } else
        _destColIndices = destFrameColID;
      
      assert _destColIndices.length==_sourceColIndices.length;
      _nrowsToCopy = numRows;
    }

    @Override
    public void map(Chunk[] chunks) { // chunk contains infos from GLMMME run
      int colLen = _sourceColIndices.length;
      long chkStartIdx = chunks[0].start(); // first row of destination frame
      Chunk[] sourceChunks = new Chunk[colLen]; // just fetch the needed columns from the source
      long lastRowIndex = chkStartIdx + chunks[0].len();
      if (chkStartIdx < _nrowsToCopy) { // only copy chunk when there are enough source chunks
        int rowLen = lastRowIndex > _nrowsToCopy ? ((int) (_nrowsToCopy - chkStartIdx)) : chunks[0].len();
        int[] fetchedChkInfo = getCorrectChunk(_sourceFrame, 0, chkStartIdx, sourceChunks, _sourceColIndices, null);
        int fetchedChkRelRow = fetchedChkInfo[2];
        for (int rowIndex = 0; rowIndex < rowLen; rowIndex++) {
          if (fetchedChkRelRow >= fetchedChkInfo[1]) {  // need new chunk
            fetchedChkInfo = getCorrectChunk(_sourceFrame, fetchedChkInfo[0] + 1,
                    fetchedChkRelRow + sourceChunks[0].start(), sourceChunks, _sourceColIndices, fetchedChkInfo);
            fetchedChkRelRow = fetchedChkInfo[2];
          }
          for (int colIndex = 0; colIndex < colLen; colIndex++) {
            chunks[_destColIndices[colIndex]].set(rowIndex, sourceChunks[colIndex].atd(fetchedChkRelRow));
          }
          fetchedChkRelRow++;
        }
      }
    }
  }

  public static class ReturnGLMMMERunInfo extends MRTask {
    public DataInfo _dinfo;
    public Frame _w_prior_wpsi;
    public Frame _qMatrix;
    Job _job;
    double _sumDev;
    double _sumEtaDiffSq;
    double _sumEtaSq;
    public int _totalaugXZCol;
    public int[] _dinfoWCol;  // columns to load from dinfo to calculate z and dev
    public int[] _wpriorwpsiCol;  // columns to load from _w_prior_wpsi to calculate z and dev
    public long _numDataRow;
    public int _maxdinfoCol;  // number of columns to load from dinfo._adaptedFrame
    GLMParameters _parms;
    public double[] _psi;
    public double[] _ubeta;
    public int[] _cumRandCatLevels; // cumulative sum of random column categorical levels
    public int _numRandCol;

    public ReturnGLMMMERunInfo(Job job, DataInfo datainfo, Frame wpriorwpsi, Frame qMatrix, int[] dinfoWCol, int[] wCol,
                               GLMParameters params, double[] psi, double[] ubeta, int[] cumRandCatLevels) {
      _job = job;
      _dinfo = datainfo;
      _w_prior_wpsi = wpriorwpsi;
      _qMatrix= qMatrix;
      _sumDev = 0;
      _sumEtaDiffSq = 0;
      _sumEtaSq = 0;
      _totalaugXZCol = qMatrix.numCols();
      _dinfoWCol = dinfoWCol;
      _wpriorwpsiCol = wCol;
      _numDataRow = _dinfo._adaptedFrame.numRows();
      _maxdinfoCol = _dinfo._weights?4:3;
      _parms = params;
      _psi = psi;
      _ubeta = ubeta;
      _cumRandCatLevels = cumRandCatLevels;
      _numRandCol = cumRandCatLevels.length;
    }

    public static GLMWeightsFun[] getRandGLMFuns(GLMWeightsFun[] randGLMs, int numRandFuncs, GLMParameters params) {
      if (randGLMs == null)
        randGLMs = new GLMWeightsFun[numRandFuncs];
      for (int index=0; index < numRandFuncs; index++) {
        Link randlink;
        if (params._rand_link==null)
          randlink = params._rand_family[index].defaultLink;
        else
          randlink = params._rand_link[index];
        randGLMs[index] = new GLMWeightsFun(params._rand_family[index], randlink,
                params._tweedie_variance_power, params._tweedie_link_power, 0);
      }
      return randGLMs;
    }

    @Override
    public void reduce(ReturnGLMMMERunInfo other){
      this._sumEtaDiffSq += other._sumEtaDiffSq;
      this._sumEtaSq += other._sumEtaSq;
    }

    @Override
    public void map(Chunk[] chunks) { // chunk contains infos from GLMMME run
      GLMWeightsFun glmfun=null;
      GLMWeightsFun[] glmfunRand = null;
      long chkStartRowIdx = chunks[0].start(); // first row number of chunk
      int chkRowNumber = chunks[0].len();
      Chunk[] chunksqMatrix = new Chunk[_totalaugXZCol]; // fetch chunk from AugXZ in order to calculate hv, Augz
      int[] qMatrixInfo = getCorrectChunk(_qMatrix, 0, chkStartRowIdx, chunksqMatrix, null, null);
      Chunk[] chunks4ZDev = new Chunk[4]; // potentially load response, zi, etai, weightID
      int[] zdevChunkInfo = new int[3];
      boolean usingWpsi;
      long rowOffset = chkStartRowIdx-_numDataRow;
      if (chkStartRowIdx >= _numDataRow) {  // load _w_prior_wpsi chunks, get wprior, zmi
        usingWpsi = true;
        zdevChunkInfo = getCorrectChunk(_w_prior_wpsi, 0, rowOffset, chunks4ZDev,
                _wpriorwpsiCol, zdevChunkInfo);
        glmfunRand = getRandGLMFuns(glmfunRand, _numRandCol, _parms);
      } else {  // load from dinfo: response, zi, etai and maybe weightID for prior_weight
        usingWpsi = false;
        glmfun = new GLMWeightsFun(_parms._family, _parms._link, _parms._tweedie_variance_power,
                _parms._tweedie_link_power, 0);
        zdevChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0, chkStartRowIdx, chunks4ZDev, _dinfoWCol,
                zdevChunkInfo);
      }
      for (int rowIndex=0; rowIndex < chkRowNumber; rowIndex++) { // correct chunks are loaded for now
        int zdevAbsRelRowNumber = usingWpsi?(int)(rowIndex+rowOffset):rowIndex+zdevChunkInfo[2];  // offset into zdevChunks
        if (!usingWpsi && (zdevAbsRelRowNumber >= zdevChunkInfo[1])) {  // running out of rows with dinfo
          long rowAbsIndex = rowIndex+chkStartRowIdx;
          if (rowAbsIndex>=_numDataRow) { // load from wprior_wpsi
            usingWpsi=true;
            zdevChunkInfo = getCorrectChunk(_w_prior_wpsi, 0, rowAbsIndex-_numDataRow,
                    chunks4ZDev, _wpriorwpsiCol, zdevChunkInfo);
            if (glmfunRand==null) { // generate glmfunRand[] for the first time only
              glmfunRand = getRandGLMFuns(glmfunRand, _numRandCol, _parms);
            }
          } else {  // still load from dinfo
            zdevChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0,
                    rowAbsIndex, chunks4ZDev, _dinfoWCol, zdevChunkInfo);
            if (glmfun==null)
              glmfun = new GLMWeightsFun(_parms._family, _parms._link, _parms._tweedie_variance_power,
                      _parms._tweedie_link_power, 0);
          }
          zdevAbsRelRowNumber = usingWpsi?(int)(rowIndex+rowOffset):rowIndex+zdevChunkInfo[2];
        }  else if (usingWpsi && (zdevAbsRelRowNumber-zdevChunkInfo[0]) >= zdevChunkInfo[1]) {  // load from wprior_wpsi
          zdevChunkInfo = getCorrectChunk(_w_prior_wpsi, 0, zdevAbsRelRowNumber, chunks4ZDev, _wpriorwpsiCol,
                  zdevChunkInfo);
          if (glmfunRand==null) { // generate glmfunRand[] for the first time only
            glmfunRand = getRandGLMFuns(glmfunRand, _numRandCol, _parms);
          }
          zdevAbsRelRowNumber = (int)(rowIndex+rowOffset);
        }
        _sumDev += calDev(usingWpsi, _cumRandCatLevels, zdevAbsRelRowNumber, chunks4ZDev, chunks, rowIndex, glmfun, glmfunRand, _psi, _ubeta);
        setHv(chunksqMatrix, chunks[1], rowIndex, qMatrixInfo[2]++);  // get hv from augXZ only
        if (qMatrixInfo[2] > qMatrixInfo[1]) {  // need to load in new chunk
          qMatrixInfo = getCorrectChunk(_qMatrix, 1+qMatrixInfo[0], chkStartRowIdx, chunksqMatrix, null, qMatrixInfo);
        }
      }
    }

    public static void setHv(Chunk[] qmat, Chunk hv, int relRowIndex, int qmatRelRowIndex) {
      int numCol = qmat.length;
      double rowSum = 0;
      for (int colIndex=0; colIndex < numCol; colIndex++) {
        double temp = qmat[colIndex].atd(qmatRelRowIndex);
        rowSum += temp*temp;
      }
      hv.set(relRowIndex, rowSum > ONEEQUAL?ONEEQUAL:rowSum);
    }

    public  double calDev(boolean usingWpsi, int[] _cumRandCatLevels, int zdevAbsRelRowNumber, Chunk[] chunks4ZDev,
                          Chunk[] chunks, int rowIndex, GLMWeightsFun glmfun,
                          GLMWeightsFun[] glmfunRand, double[] psi, double[] ubeta) {
      if (usingWpsi) {
        int randIndex = RandColAddW2AugXZ.findRandColIndex(_cumRandCatLevels, zdevAbsRelRowNumber);
        return setZDevEta(chunks4ZDev, chunks, rowIndex, (int) (zdevAbsRelRowNumber-chunks[0].start()),
                (int) zdevAbsRelRowNumber, glmfunRand[randIndex], psi, ubeta);
      } else {          // get z, dev, eta from dinfo
        return setZDevEta(chunks4ZDev, chunks, rowIndex, zdevAbsRelRowNumber, glmfun);
      }
    }

    public static double setZDevEta(Chunk[] wpsiChunks, Chunk[] destChunk, int relRowIndex, int wpsiRowIndex,
                                    int abswpsiRowIndex, GLMWeightsFun glmfuns, double[] psi, double[] ubeta) {
      destChunk[0].set(relRowIndex, wpsiChunks[1].atd(wpsiRowIndex)); // set Z value
      double temp = psi[abswpsiRowIndex]-ubeta[abswpsiRowIndex];
      double devVal=wpsiChunks[0].atd(wpsiRowIndex)*temp*temp;
      destChunk[2].set(relRowIndex, devVal < ZEROEQUAL?ZEROEQUAL:devVal);
      return devVal;
    }

    public  double setZDevEta(Chunk[] dinfoChunks, Chunk[] destChunk, int relRowIndex, int dinfoRowIndex,
                              GLMWeightsFun glmfun) {
      destChunk[0].set(relRowIndex, dinfoChunks[1].atd(dinfoRowIndex)); // set AugZ value
      double eta = dinfoChunks[2].atd(dinfoRowIndex);
      destChunk[3].set(relRowIndex, eta); // set new eta value
      double temp2 = eta-destChunk[5].atd(relRowIndex);
      _sumEtaDiffSq += temp2*temp2;
      _sumEtaSq += eta*eta;
      double temp = dinfoChunks[0].atd(dinfoRowIndex)-glmfun.linkInv(eta);
      destChunk[4].set(relRowIndex, temp);  // set resid = (y-mu.i)
      double prior_weight = dinfoChunks[3]==null?1:dinfoChunks[3].atd(dinfoRowIndex);
      double devVal = prior_weight*temp*temp;
      destChunk[2].set(relRowIndex, devVal < ZEROEQUAL?ZEROEQUAL:devVal);
      return devVal;
    }
  }
  
  public static class ReturnGLMMMERunInfoRandCols extends MRTask {
    public DataInfo _dinfo;
    public Frame _w_prior_wpsi;
    public Frame _qMatrix;
    Job _job;
    double _sumDev;
    public int _totalqMatrixCols;
    public int[] _wpriorwpsiCol;  // columns to load from _w_prior_wpsi to calculate z and dev
    public long _numDataRow;
    GLMParameters _parms;
    public double[] _psi;
    public double[] _ubeta;
    public int[] _cumRandCatLevels; // cumulative sum of random column categorical levels
    public int _numRandCol;

    public ReturnGLMMMERunInfoRandCols(Job job, DataInfo datainfo, Frame wpriorwpsi, Frame qmatrix, int[] wCol,
                               GLMParameters params, double[] psi, double[] ubeta, int[] cumRandCatLevels) {
      _job = job;
      _w_prior_wpsi = wpriorwpsi;
      _qMatrix = qmatrix;
      _sumDev = 0;
      _totalqMatrixCols = qmatrix.numCols();
      _wpriorwpsiCol = wCol;
      _numDataRow = datainfo._adaptedFrame.numRows();
      _parms = params;
      _psi = psi;
      _ubeta = ubeta;
      _cumRandCatLevels = cumRandCatLevels;
      _numRandCol = cumRandCatLevels.length;
    }
    
    @Override
    public void map(Chunk[] chunks) { // chunk contains infos from GLMMME run
      GLMWeightsFun[] glmfunRand = null;
      long chkStartRowIdx = chunks[0].start(); // first row number of chunk
      long maxChkRowIdx = chunks[0].start() + chunks[0].len();
      if (chkStartRowIdx >= _numDataRow || _numDataRow < maxChkRowIdx) { // only update random column part
        int chkRowNumber = chunks[0].len();
        chkStartRowIdx = chkStartRowIdx >= _numDataRow?chkStartRowIdx:_numDataRow;  // absolute row start index
        Chunk[] chunksqMatrix = new Chunk[_totalqMatrixCols]; // fetch chunk from qMatrix in order to calculate hv, Augz
        int[] qMatrixInfo = getCorrectChunk(_qMatrix, 0, chkStartRowIdx, chunksqMatrix, null, null);
        Chunk[] chunks4ZDev = new Chunk[4]; // potentially load response, zi, etai, weightID
        int[] zdevChunkInfo = new int[3];

        long rowOffset = chkStartRowIdx - _numDataRow;
        zdevChunkInfo = getCorrectChunk(_w_prior_wpsi, 0, rowOffset, chunks4ZDev,
                  _wpriorwpsiCol, zdevChunkInfo);
        int zdevRelRowNumber = zdevChunkInfo[2];
        int qMatrixRelRow = qMatrixInfo[2];
        glmfunRand = getRandGLMFuns(glmfunRand, _numRandCol, _parms);
        int chunkRowStart = (int)(chkStartRowIdx-chunks[0].start());
        for (int rowIndex = chunkRowStart; rowIndex < chkRowNumber; rowIndex++) { // correct chunks are loaded for now
          if (zdevRelRowNumber >= zdevChunkInfo[1]) {
            zdevChunkInfo = getCorrectChunk(_w_prior_wpsi, zdevChunkInfo[0]+1, 
                    zdevRelRowNumber+chunks4ZDev[0].start(), chunks4ZDev, _wpriorwpsiCol,
                    zdevChunkInfo);
            zdevRelRowNumber = zdevChunkInfo[2];
            if (glmfunRand == null) { // generate glmfunRand[] for the first time only
              glmfunRand = getRandGLMFuns(glmfunRand, _numRandCol, _parms);
            }
          }
          if (qMatrixRelRow >= qMatrixInfo[1]) {
            qMatrixInfo = getCorrectChunk(_qMatrix, qMatrixInfo[0]+1, qMatrixRelRow+chunksqMatrix[0].start(),
                    chunksqMatrix, null, qMatrixInfo);
            qMatrixRelRow=qMatrixInfo[2];
          }
          int randIndex = RandColAddW2AugXZ.findRandColIndex(_cumRandCatLevels, zdevRelRowNumber);
          _sumDev += setZDevEta(chunks4ZDev, chunks, rowIndex, zdevRelRowNumber, 
                  (int) (zdevRelRowNumber+chunks4ZDev[0].start()), _psi, _ubeta);
          setHv(chunksqMatrix, chunks[1], rowIndex, qMatrixRelRow);  // get hv from augXZ only
          qMatrixRelRow++;
          zdevRelRowNumber++;
        }
      }
    }


    public static GLMWeightsFun[] getRandGLMFuns(GLMWeightsFun[] randGLMs, int numRandFuncs, GLMParameters params) {
      if (randGLMs == null)
        randGLMs = new GLMWeightsFun[numRandFuncs];
      for (int index=0; index < numRandFuncs; index++) {
        Link randlink;
        if (params._rand_link==null)
          randlink = params._rand_family[index].defaultLink;
        else
          randlink = params._rand_link[index];
        randGLMs[index] = new GLMWeightsFun(params._rand_family[index], randlink,
                params._tweedie_variance_power, params._tweedie_link_power, 0);
      }
      return randGLMs;
    }
    
    @Override
    public void reduce(ReturnGLMMMERunInfoRandCols other){
      this._sumDev += other._sumDev;
    }


    public static void setHv(Chunk[] qmat, Chunk hv, int relRowIndex, int qmatRelRowIndex) {
      int numCol = qmat.length;
      double rowSum = 0;
      for (int colIndex=0; colIndex < numCol; colIndex++) {
        double temp = qmat[colIndex].atd(qmatRelRowIndex);
        rowSum += temp*temp;
      }
      hv.set(relRowIndex, rowSum > ONEEQUAL?ONEEQUAL:rowSum);
    }
    
    public static double setZDevEta(Chunk[] wpsiChunks, Chunk[] destChunk, int relRowIndex, int wpsiRowIndex, 
                                    int abswpsiRowIndex,  double[] psi, double[] ubeta) {
      destChunk[0].set(relRowIndex, wpsiChunks[1].atd(wpsiRowIndex)); // set Z value
      double temp = psi[abswpsiRowIndex]-ubeta[abswpsiRowIndex];
      double devVal=wpsiChunks[0].atd(wpsiRowIndex)*temp*temp;
      destChunk[2].set(relRowIndex, devVal < ZEROEQUAL?ZEROEQUAL:devVal);
      return devVal;
    }
  }

  /***
   * fill in the returnFrame from the data portion only
   */
  public static class ReturnGLMMMERunInfoData extends MRTask {
    public DataInfo _dinfo;
    public Frame _qMatrix;
    Job _job;
    double _sumDev;
    double _sumEtaDiffSq;
    double _sumEtaSq;
    public int _totalaugXZCol;
    public int[] _dinfoWCol;  // columns to load from dinfo to calculate z and dev
    public long _numDataRow;
    GLMParameters _parms;

    public ReturnGLMMMERunInfoData(Job job, DataInfo datainfo, Frame qMatrix, int[] dinfoWCol,
                               GLMParameters params) {
      _job = job;
      _dinfo = datainfo;
      _qMatrix = qMatrix;
      _sumDev = 0;
      _sumEtaDiffSq = 0;
      _sumEtaSq = 0;
      _totalaugXZCol = qMatrix.numCols();
      _dinfoWCol = dinfoWCol;
      _numDataRow = _dinfo._adaptedFrame.numRows();
      _parms = params;
    }
    
    @Override
    public void map(Chunk[] chunks) { // chunk contains infos from GLMMME run
      long chkStartRowIdx = chunks[0].start(); // first row number of chunk
      if (chkStartRowIdx < _numDataRow) { // only look at chunks corresponding to the data rows
        GLMWeightsFun glmfun = null;

        long maxRowIndex = chkStartRowIdx+chunks[0].len();
        int chkRowNumber = maxRowIndex>=_numDataRow?chunks[0].len()-(int)(maxRowIndex-_numDataRow):chunks[0].len(); // number of row to consider
        Chunk[] chunksqMatrix = new Chunk[_totalaugXZCol]; // fetch chunk from qMatrix in order to calculate hv, Augz
        int[] qMatrixInfo = getCorrectChunk(_qMatrix, 0, chkStartRowIdx, chunksqMatrix, null, null);
        int qMatrixRelRow = qMatrixInfo[2];
        Chunk[] chunks4ZDev = new Chunk[4]; // potentially load response, zi, etai, weightID
        int[] zdevChunkInfo = new int[3];

        glmfun = new GLMWeightsFun(_parms._family, _parms._link, _parms._tweedie_variance_power,
                _parms._tweedie_link_power, 0);
        zdevChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0, chkStartRowIdx, chunks4ZDev, _dinfoWCol,
                zdevChunkInfo);
        int zdevAbsRelRowNumber = zdevChunkInfo[2];
        for (int rowIndex = 0; rowIndex < chkRowNumber; rowIndex++) { // correct chunks are loaded for now
          if (zdevAbsRelRowNumber >= zdevChunkInfo[1]) { // exceeds chunk limit, grab next one
            zdevChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, zdevChunkInfo[0] + 1,
                    zdevAbsRelRowNumber + chunks4ZDev[0].start(), chunks4ZDev, _dinfoWCol, zdevChunkInfo);
            zdevAbsRelRowNumber = zdevChunkInfo[2];
          }
          if (qMatrixRelRow  > qMatrixInfo[1]) {  // need to load in new chunk
            qMatrixInfo = getCorrectChunk(_qMatrix, 1 + qMatrixInfo[0], qMatrixRelRow+chunksqMatrix[0].start(), chunksqMatrix, null, qMatrixInfo);
            qMatrixRelRow = qMatrixInfo[2];
          }
            if (glmfun == null)
              glmfun = new GLMWeightsFun(_parms._family, _parms._link, _parms._tweedie_variance_power,
                      _parms._tweedie_link_power, 0);
          _sumDev += setZDevEta(chunks4ZDev, chunks, rowIndex,  zdevAbsRelRowNumber, glmfun);
          setHv(chunksqMatrix, chunks[1], rowIndex, qMatrixRelRow);  // get hv from qmatrix only
          qMatrixRelRow++;
          zdevAbsRelRowNumber++;
        }
      }
    }

    @Override
    public void reduce(ReturnGLMMMERunInfoData other){
      this._sumEtaDiffSq += other._sumEtaDiffSq;
      this._sumEtaSq += other._sumEtaSq;
      this._sumDev += other._sumDev;
    }

    public static void setHv(Chunk[] qmat, Chunk hv, int relRowIndex, int qmatRelRowIndex) {
      int numCol = qmat.length;
      double rowSum = 0;
      for (int colIndex=0; colIndex < numCol; colIndex++) {
        double temp = qmat[colIndex].atd(qmatRelRowIndex);
        rowSum += temp*temp;
      }
      hv.set(relRowIndex, rowSum > ONEEQUAL?ONEEQUAL:rowSum);
    }
    
    public double setZDevEta(Chunk[] dinfoChunks, Chunk[] destChunk, int relRowIndex, int dinfoRowIndex,
                             GLMWeightsFun glmfun) {
      destChunk[0].set(relRowIndex, dinfoChunks[1].atd(dinfoRowIndex)); // set AugZ value
      double eta = dinfoChunks[2].atd(dinfoRowIndex);
      destChunk[3].set(relRowIndex, eta); // set new eta value
      double temp2 = eta-destChunk[5].atd(relRowIndex);
      _sumEtaDiffSq += temp2*temp2;
      _sumEtaSq += eta*eta;
      double temp = dinfoChunks[0].atd(dinfoRowIndex)-glmfun.linkInv(eta);
      destChunk[4].set(relRowIndex, temp);  // set resid = (y-mu.i)
      double prior_weight = dinfoChunks[3]==null?1:dinfoChunks[3].atd(dinfoRowIndex);
      double devVal = prior_weight*temp*temp;
      destChunk[2].set(relRowIndex, devVal < ZEROEQUAL?ZEROEQUAL:devVal);
      return devVal;
    }
  }
  public static class ExpandRandomColumns extends MRTask {
    Job _job;
    int[] _randomColIndices;
    int[] _randomColLevels;
    int _numRandCols;
    int _startRandomExpandedColumn;
    public ExpandRandomColumns(Job job, int[] randomColIndices, int[] randomColLevels, int startExpandedCol) {
      _job = job;
      _randomColIndices = randomColIndices;
      _randomColLevels = randomColLevels;
      _startRandomExpandedColumn = startExpandedCol;
      _numRandCols = randomColIndices.length;
    }

    @Override
    public void map(Chunk[] chunks) {
      int chunkRowLen = chunks[0].len();
      int columnOffset = _startRandomExpandedColumn;
      for (int colIndex = 0; colIndex < _numRandCols; colIndex++) { // expand each random column for each row
        for (int rowIndex = 0; rowIndex < chunkRowLen; rowIndex++) {
          int randColVal = ((int) chunks[_randomColIndices[colIndex]].atd(rowIndex)) + columnOffset;
          chunks[randColVal].set(rowIndex, 1);
        }
        columnOffset += _randomColLevels[colIndex];
      }
    }
  }

  // generate AugZ*W as a double array 
  public static class CalculateAugZWData extends MRTask {
    public DataInfo _dinfo; // contains X and Z in response
    public long _numDataRows;
    Job _job;
    public int[] _dinfoWCol;

    public  CalculateAugZWData(Job job, DataInfo dInfo, int dinfoRespColStart) { // pass it norm mul and norm sup - in the weights already done. norm
      _job = job;
      _dinfo = dInfo;
      _numDataRows = _dinfo._adaptedFrame.numRows();  // number of data rows
      _dinfoWCol = new int[]{dinfoRespColStart, dinfoRespColStart+1};
    }

    @Override
    public void map(Chunk[] chunks) { // chunks from AugZ
      long chkStartIdx = chunks[0].start();// first row number of chunks
      if (chkStartIdx < _numDataRows) { // only deal with data portion
        long lastChkIdx = chkStartIdx+chunks[0]._len;
        int chunkLen = lastChkIdx < _numDataRows?chunks[0]._len:(int)(_numDataRows-chkStartIdx);
        Chunk[] augzwChunks = new Chunk[2]; // loaded from _dinfo or _prior_weight_psi
        int[] extraChkInfo = new int[3];
        extraChkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0, chkStartIdx, augzwChunks,
                _dinfoWCol, extraChkInfo);

        int extraRelRow = extraChkInfo[2];
        for (int rowIndex = 0; rowIndex < chunkLen; rowIndex++) {
          if (extraRelRow >= extraChkInfo[1]) { // need to load new chunk
            long chkAbsRowNumber = rowIndex + chkStartIdx;
            extraChkInfo = getCorrectChunk(_dinfo._adaptedFrame, extraChkInfo[0], chkAbsRowNumber, augzwChunks,
                    _dinfoWCol, extraChkInfo);

            extraRelRow = extraChkInfo[2];
          }
          chunks[0].set(rowIndex, augzwChunks[0].atd(extraRelRow) * augzwChunks[1].atd(extraRelRow));
          extraRelRow++;
        }
      }
    }
  }

  // generate AugZ*W as a double array 
  public static class CalculateAugZWRandCols extends MRTask {
    public long _numDataRows;
    Job _job;
    Frame _prior_weight_psi;  // contains prior_weight, wpsi, zmi for random effects/columns
    public int[] _weightWCol;

    public  CalculateAugZWRandCols(Job job, Frame prior_weight_psi, int weightColStart, 
                                   long numDataRows) { // pass it norm mul and norm sup - in the weights already done. norm
      _job = job;
      _prior_weight_psi = prior_weight_psi;
      _numDataRows = numDataRows;  // number of data rows
      _weightWCol = new int[]{weightColStart, weightColStart+1};
    }

    @Override
    public void map(Chunk[] chunks) { // chunks from AugZW
      long chkStartIdx = chunks[0].start();// first row number of chunks
      long chkEndIdx = chkStartIdx+chunks[0]._len;
      if (chkStartIdx > _numDataRows || chkEndIdx > _numDataRows) {
        int chunkLen = chunks[0]._len;
        int chunkStartRow = chkStartIdx > _numDataRows?0:(int)(_numDataRows-chkStartIdx);
        Chunk[] augzwChunks = new Chunk[2]; // loaded from _dinfo or _prior_weight_psi
        int[] extraChkInfo = new int[3];
        extraChkInfo = getCorrectChunk(_prior_weight_psi, 0, 
                chunkStartRow+chkStartIdx - _numDataRows, augzwChunks, _weightWCol, extraChkInfo);
        int extraRelRow = extraChkInfo[2];
        for (int rowIndex = chunkStartRow; rowIndex < chunkLen; rowIndex++) {
          if (extraRelRow >= extraChkInfo[1]) { // need to load new chunk
            extraChkInfo = getCorrectChunk(_prior_weight_psi, extraChkInfo[0]+1, 
                    extraRelRow+augzwChunks[0].start(), augzwChunks, _weightWCol, extraChkInfo);
            extraRelRow = extraChkInfo[2];
          }
          chunks[0].set(rowIndex, augzwChunks[0].atd(extraRelRow) * augzwChunks[1].atd(extraRelRow));
          extraRelRow++;
        }
      }
    }
  }
  
  // generate AugZ*W as a double array 
  public static class CalculateAugZW extends MRTask {
    GLMParameters _parms;
    public DataInfo _dinfo; // contains X and Z in response
    public int[] _random_columnsID;
    public int _augZID;
    public int _dataColNumber;
    public int _randColNumber;
    public int _numColStart;
    public int _numRandCol;
    public long _numDataRows;
    Job _job;
    Frame _prior_weight_psi;  // contains prior_weight, wpsi, zmi for random effects/columns
    public int[] _dinfoWCol;
    public int[] _weightWCol;

    public  CalculateAugZW(Job job, DataInfo dInfo, GLMParameters params, Frame prior_weight_psi, int randCatLevels,
                           int dinfoRespColStart, int weightColStart) { // pass it norm mul and norm sup - in the weights already done. norm
      _job = job;
      _dinfo = dInfo;
      _parms = params;
      _prior_weight_psi = prior_weight_psi;
      _augZID = _dinfo.responseChunkId(2);  // 0: response, 1: wdata, 2: zi
      _dataColNumber = _dinfo.fullN()+1; // add 1 for intercept at the beginning
      _numColStart = _dinfo._cats==0?0:_dinfo._catOffsets[_dinfo._cats];
      _numRandCol = _parms._random_columns.length;
      _random_columnsID = new int[_numRandCol];
      System.arraycopy(_parms._random_columns, 0, _random_columnsID, 0, _numRandCol);
      _randColNumber = randCatLevels; // total number of random columns expanded
      _numDataRows = _dinfo._adaptedFrame.numRows();  // number of data rows
      _dinfoWCol = new int[]{dinfoRespColStart, dinfoRespColStart+1};
      _weightWCol = new int[]{weightColStart, weightColStart+1};
    }

    @Override
    public void map(Chunk[] chunks) { // chunks from AugZ
      long chkStartIdx = chunks[0].start();// first row number of chunks
      int chunkLen = chunks[0]._len;
      Chunk[] augzwChunks = new Chunk[2]; // loaded from _dinfo or _prior_weight_psi
      int[] extraChkInfo = new int[3];
      if (chkStartIdx < _numDataRows) { // grab wdata and zi from _dinfo._adaptedFrame.
        extraChkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0, chkStartIdx, augzwChunks,
                _dinfoWCol, extraChkInfo);
      } else {  // grab weight from _prior_weight_psi
        extraChkInfo = getCorrectChunk(_prior_weight_psi, 0, chkStartIdx-_numDataRows, augzwChunks,
                _weightWCol, extraChkInfo);
      }
      int extraRelRow = extraChkInfo[2];
      for (int rowIndex=0; rowIndex < chunkLen; rowIndex++) {
        if (extraRelRow >= extraChkInfo[1]) { // need to load new chunk
          long chkAbsRowNumber = rowIndex+chkStartIdx;
          if (chkAbsRowNumber < _numDataRows) { // need to load from dinfo
            extraChkInfo = getCorrectChunk(_dinfo._adaptedFrame, extraChkInfo[0], chkAbsRowNumber, augzwChunks,
                    _dinfoWCol, extraChkInfo);
          } else { // need to load from w_prior_psi
            extraChkInfo = getCorrectChunk(_prior_weight_psi, extraChkInfo[0], chkAbsRowNumber-_numDataRows,
                    augzwChunks, _weightWCol, extraChkInfo);
          }
          extraRelRow = extraChkInfo[2];
        }
        chunks[0].set(rowIndex, augzwChunks[0].atd(extraRelRow)*augzwChunks[1].atd(extraRelRow));
        extraRelRow++;
      }
    }
  }
  
  public static class HelpercAIC extends MRTask {
    final double TWOPI = 2*Math.PI; // constant to be used for calculation
    final double _logOneO2pisd = -Math.log(Math.sqrt(TWOPI));
    public double _p; // stores sum(hv)
    public double _devOphi; // store sum(dev/glm.phi
    public double _constT;  // store *sum(log(2*pi*glm.phi);
    boolean _weightPresent;  // indicate if we have prior-weight
    final double _varFix;
    
    public HelpercAIC(boolean weightP, double varFix) {
      _weightPresent = weightP;
      _varFix = varFix;
    }

    @Override
    public void map(Chunk[] chunks) {
      _p = 0;
      _devOphi = 0;
      _constT = 0;
      int chunkLen = chunks[0].len();
      
      for (int rowIndex=0; rowIndex < chunkLen; rowIndex++) {
        double weight = _weightPresent?chunks[2].atd(rowIndex):1;
        double glm_phi = _varFix/weight;
        _constT += Math.log(TWOPI*glm_phi);
        _p += chunks[0].atd(rowIndex);
        _devOphi += chunks[1].atd(rowIndex)/glm_phi;
      }
    }
    
    @Override public void reduce(HelpercAIC other) {
      _p += other._p;
      _constT += other._constT;
      _devOphi += other._devOphi;
    }
  }

  /***
   * This class given the weights generated and stored in _dinfo and wpsi, will multiply the weights to AugXZ and
   * store them in AugXZ.
   */
  public static class CalculateW4Data extends MRTask {
    GLMParameters _parms;
    public DataInfo _dinfo;
    public int _prior_weightID; // column ID of prior-weights for data rows
    public int _wdataID;        // column ID to store weight info for data rows
    public int _offsetID;       // column ID for offets
    public int[] _random_columnsID; // column ID where random column values are stored
    public int[] _randCatLevels;  // categorical levels for random columns
    public int _augZID;           // column ID where zi is stored
    public int _etaOldID;         // column ID where old eta.i value is stored
    public int _dataColNumber;  // fixed column number
    public int _numColStart;    // numerical fixed column index start
    public double[] _beta;    // store fixed coefficients
    public double[] _ubeta;   // store random coefficients
    public double[] _psi;
    public double[] _phi;
    public double _tau;
    public int _numRandCol; // number of random effects/columns
    Job _job;
    public double _sumEtaDiffSq;  // store sum of (eta.i-eta.old)^2
    public double _sumEtaSq;      // store sum(eta.i^2)
    public double _HL_correction; // correction to Hierarchy likelihood, determined by distribution used.

    public CalculateW4Data(Job job, DataInfo dInfo, GLMParameters params, int[] randCatLevels,
                           double[] beta, double[] ubeta, double[] psi, double[] phi, double tau, double hlCorrection) { // pass it norm mul and norm sup - in the weights already done. norm
      _job = job;
      _dinfo = dInfo;
      _parms = params;
      _prior_weightID = _dinfo._weights?_dinfo.weightChunkId():-1;
      _augZID = _dinfo.responseChunkId(2);  // 0: response, 1: wdata, 2: zi, 3: etaOld
      _wdataID = _dinfo.responseChunkId(1);
      _etaOldID = _dinfo.responseChunkId(3);
      _offsetID = _dinfo._offset?_dinfo.offsetChunkId():-1;
      _dataColNumber = _dinfo.fullN()+1; // add 1 for intercept at the beginning
      _numColStart = _dinfo.numCats()==0?0:_dinfo._catOffsets[_dinfo._cats];
      _numRandCol = _parms._random_columns.length;
      _random_columnsID = _parms._random_columns;
      _randCatLevels = randCatLevels;
      _beta = beta;
      _ubeta = ubeta;
      _psi = psi;
      _phi = phi;
      _tau = tau;
      _HL_correction=hlCorrection;
      _sumEtaDiffSq=0;
      _sumEtaSq=0;
    }

    @Override
    public void map(Chunk[] chunks) { // chunks from _dinfo._adaptedFrame
      GLMWeightsFun glmfun = new GLMWeightsFun(_parms._family, _parms._link, _parms._tweedie_variance_power,
              _parms._tweedie_link_power, 0);
      Row row = _dinfo.newDenseRow(); // one row of fixed effects/columns
      double eta, mu, temp, zi, wdata;
      for (int i = 0; i < chunks[0]._len; ++i) { // going over all the rows in the chunk of _dinfo._adaptedFrame
        _dinfo.extractDenseRow(chunks, i, row);
        if (!row.isBad() && row.weight != 0) {
          eta = row.innerProduct(_beta) + row.offset;
          for (int index=0; index < _numRandCol; index++) {
            eta += _ubeta[(int)row.response(4+index)];
          }
          if (Double.isNaN(eta))
            throw H2O.fail("GLM.MME diverged! Try different starting values.");
          double etaDiff = eta - row.response(3);
          chunks[_etaOldID].set(i, eta);  // save current eta as etaOld for next round
          _sumEtaDiffSq += etaDiff * etaDiff;
          _sumEtaSq += eta * eta;
          mu = glmfun.linkInv(eta);
         temp = glmfun.linkInvDeriv(mu);
          zi = eta - row.offset + (row.response(0) - mu) / temp - _HL_correction;
          chunks[_augZID].set(i, zi);
          wdata = row.weight * temp * temp / (glmfun.variance(mu) * _tau);
          chunks[_wdataID].set(i, Math.sqrt(wdata)); // set the new weight back to _dinfo.
        }
      }
    }

    @Override
    public void reduce(CalculateW4Data other){
      this._sumEtaDiffSq += other._sumEtaDiffSq;
      this._sumEtaSq += other._sumEtaSq;
    }

    /***
     * This method will calculate wdata and store it in dinfo response columns.  In addition, it will calculate
     * sum(eta.i-eta.o)^2, sum(eta.i^2).  It will return sqrt(wdata).  We use the same method from R to calculate
     * wdata.
     *
     * @param glmfun
     * @param beta
     * @param ubeta
     * @param tau
     * @param row
     * @param chunks: chunks from dinfo
     * @param rowIndex
     * @return
     */
/*    public double getWeights(GLMWeightsFun glmfun, double[] beta, double[] ubeta, double tau, Row row, Chunk[] chunks,
                             int rowIndex) {
      double eta = row.innerProduct(beta) + row.offset;
      for (int index=0; index < _numRandCol; index++) {
        eta += ubeta[(int)row.response(4+index)];
      }
      if (Double.isNaN(eta))
        throw H2O.fail("GLM.MME diverged! Try different starting values.");
      double etaDiff = eta - row.response(3);
      chunks[_etaOldID].set(rowIndex, eta);  // save current eta as etaOld for next round
      _sumEtaDiffSq += etaDiff * etaDiff;
      _sumEtaSq += eta * eta;
      double mu = glmfun.linkInv(eta);
      double temp = glmfun.linkInvDeriv(mu);
      double zi = eta - row.offset + (row.response(0) - mu) / temp - _HL_correction;
      chunks[_augZID].set(rowIndex, zi);
      double wdata = row.weight * temp * temp / (glmfun.variance(mu) * tau);
      return Math.sqrt(wdata);
    }*/
  }

  /***
   * This class will update the frame AugXZ which contains Ta*sqrt(W inverse) from documentation: 
   *
   * - multiply the generated weight value to Ta and store in AugXZ;
   */
    public static class DataAddW2AugXZ extends MRTask {
    public DataInfo _dinfo;
    public int _wdataID;        // column ID to store weight info for data rows
    public int[] _randCatLevels;  // categorical levels for random columns
    public int _dataColNumber;  // fixed column number
    public int _numColStart;    // numerical fixed column index start
    public int _numRandCol; // number of random effects/columns
    Job _job;
    public long _dataRows;

    public DataAddW2AugXZ(Job job, DataInfo dInfo, int[] randCatLevels) { // pass it norm mul and norm sup - in the weights already done. norm
      _job = job;
      _dinfo = dInfo;
      _wdataID = 1;
      _dataColNumber = _dinfo.fullN()+1; // add 1 for intercept at the beginning
      _numColStart = _dinfo._cats==0?0:_dinfo._catOffsets[_dinfo._cats];
      _numRandCol = randCatLevels.length;
      _randCatLevels = randCatLevels;
      _dataRows = _dinfo._adaptedFrame.numRows();
    }

    @Override
    public void map(Chunk[] chunks) { // chunks from augXZ but only takes care of data part
      long chkStartIdx = chunks[0].start(); // first row number of chunks of augXZ
      if (chkStartIdx < _dataRows) {  // only process data of augXZ upto dataRow
        int numColAugXZ = chunks.length;
        Chunk[] dinfoChunks = new Chunk[_dinfo._adaptedFrame.numCols()];
        double[] processedRow = new double[chunks.length]; // store one row of AugXZ: wdata*(intercept, x, z (expanded random columns))
        int[] dinfoChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, 0, chkStartIdx, dinfoChunks, null, null);
        int dinfoChunkRelRow = dinfoChunkInfo[2];
        int chunkLen = chkStartIdx + chunks[0]._len >= _dataRows ? (int) (_dataRows - chkStartIdx) : chunks[0]._len;
        Row row = _dinfo.newDenseRow(); // one row of fixed effects/columns
        double wdata;
        for (int index = 0; index < chunkLen; index++) {
          if (dinfoChunkRelRow >= dinfoChunkInfo[1]) {
            dinfoChunkInfo = getCorrectChunk(_dinfo._adaptedFrame, dinfoChunkInfo[0]+1,
                    dinfoChunkRelRow+dinfoChunks[0].start(), dinfoChunks, null, null);
            dinfoChunkRelRow = dinfoChunkInfo[2];
          }
          _dinfo.extractDenseRow(dinfoChunks, dinfoChunkRelRow, row);
          Arrays.fill(processedRow, 0.0);
          wdata = row.response[_wdataID];
          row.scalarProduct(wdata, processedRow, _numColStart); // generate wdata*X
          int offset = _dataColNumber;
          for (int randColIndex = 0; randColIndex < _numRandCol; randColIndex++) { // generate x*Z
            int processRowIdx = offset + (int) row.response[4 + randColIndex];  // 0: response, 1: weight, 2: zi, 3: etai, 4 or more: z
            processedRow[processRowIdx] = wdata;  // save wdata as
            offset += _randCatLevels[randColIndex]; // write to next random column value
          }
          for (int colIndex = 0; colIndex < numColAugXZ; colIndex++) {      // assign the rows to the AugXZ
            chunks[colIndex].set(index, processedRow[colIndex]); // set w*X for intercept, data, random columns
          }
          dinfoChunkRelRow++;

        }
      }
    }
    
    /***
     * Given the chkIdx, this method will fetch the chunks with columns specified in vecIdx
     * @param augXZ: Frame frome which chunks are fetched
     * @param chkIdx: chunk index to fetch
     * @param chks: store fetched chunks
     * @param vecIdx: null, fetch all columns, else, contains columns of interest to fetch
     */
    public static void getAllChunks(Frame augXZ, int chkIdx, Chunk[] chks, int[] vecIdx) {
      if (vecIdx==null) { // copy all vectors of the chunk
        int chkLen = chks.length;
        for (int chkIndex =1 ; chkIndex < chkLen; chkIndex++)
          chks[chkIndex] = augXZ.vec(chkIndex).chunkForChunkIdx(chkIdx);
      } else {
        int veclen = vecIdx.length;
        for (int index=1; index < veclen; index++)
          chks[index] = augXZ.vec(vecIdx[index]).chunkForChunkIdx(chkIdx);
      }
    }

    /***
     * Given the absolute row index of interest, this method will find the chunk index of augXZ that contains the
     * absolute row index
     *
     * @param augXZ: Frame where chunks will be fetched
     * @param chkIdx: chunk index to check if it contains the absolute row index of interest
     * @param currentRowAbs: absolute row index of interest
     * @param chks: chunks to stored fetched chunk
     * @param vecIdx: column indices to fetch.  If null, fetch all columns
     * @return
     */
    public static int getOneSingleChunk(Frame augXZ, int chkIdx, long currentRowAbs, Chunk[] chks, int[] vecIdx) {
      chkIdx = chkIdx>=augXZ.vec(0).nChunks()?0:chkIdx;
      if (vecIdx==null) { // copy all vectors of the chunk
        // fetch one vector and check if it contains the correct rows
        chks[0] = augXZ.vec(0).chunkForChunkIdx(chkIdx);
      } else {
        chks[0] = augXZ.vec(vecIdx[0]).chunkForChunkIdx(chkIdx);
      }
      // find correct row offset into chunk.
      long strow = chks[0].start();
      long endrow = chks[0].len()+strow;
      if ((currentRowAbs >= strow) && (currentRowAbs< endrow))
        return -1;
      else if (currentRowAbs < strow)
        return (chkIdx-1);
      else
        return (chkIdx+1);
    }
    
    /**
     * This method, given the absolute row index of interest, will grab the correct chunk from augXZ containing the
     * same absolute row index of interest.  The chunks of augXZ will be stored in chks.  In addition, an integer
     * array will be returned that contain the following information about the fetched chunk of augXZ:
     * - index 0: chunk index;
     * - index 1: number of rows of fetched chunk;
     * - index 2: relative starting index of fetched chunk that will correspond to the absolute row index of interest
     *            passed to this method.
     *
     * @param augXZ: Frame from which chunks will be grabbed
     * @param chkIdx: starting chunk index to looking at
     * @param currentRowAbs: absolute row index of first row of interest
     * @param chks: stored fetched chunk
     * @param vecIdx: null if all columns should be fetched.  Else, contains the columns to be fetched
     * @param returnInfo: information about fetched chunk
     * @return
     */
    public static int[] getCorrectChunk(Frame augXZ, int chkIdx, long currentRowAbs, Chunk[] chks, int[] vecIdx,
                                        int[] returnInfo) {
      assert currentRowAbs < augXZ.numRows();
      int currentIdx = chkIdx >= augXZ.vec(0).nChunks()?0:chkIdx;
      while (currentIdx >= 0) { // currentIdx will be -1 if found the correct chunk
        currentIdx = getOneSingleChunk(augXZ, currentIdx, currentRowAbs, chks, vecIdx); // find chunk that contains currentRowAbs
      }
      getAllChunks(augXZ, chks[0].cidx(),chks, vecIdx); // fetched the chunks of augXZ to chks
      if (returnInfo == null) {
        returnInfo = new int[3];
      }
      returnInfo[0] = chks[0].cidx(); // chunk index of fetched chunks
      returnInfo[1] = chks[0].len();  // number of rows in fetched chunks
      returnInfo[2] = (int) (currentRowAbs-chks[0].start());  // relative row start of first row of fetched chunk
      return returnInfo;
    }
  }
  
  public static class GLMCoordinateDescentTaskSeqNaive extends MRTask {
    public double [] _normMulold;
    public double [] _normSubold;
    public double [] _normMulnew;
    public double [] _normSubnew;
    final double [] _betaold; // current old value at j
    final double [] _betanew; // global beta @ j-1 that was just updated.
    final int [] _catLvls_new; // sorted list of indices of active levels only for one categorical variable
    final int [] _catLvls_old;
    public double [] _temp;
    boolean _skipFirst;
    long _nobs;
    int _cat_num; // 1: c and p categorical, 2:c numeric and p categorical, 3:c and p numeric , 4: c categorical and previous num.
    boolean _interceptnew;
    boolean _interceptold;

    public  GLMCoordinateDescentTaskSeqNaive(boolean interceptold, boolean interceptnew, int cat_num ,
                                        double [] betaold, double [] betanew, int [] catLvlsold, int [] catLvlsnew,
                                        double [] normMulold, double [] normSubold, double [] normMulnew, double [] normSubnew,
                                             boolean skipFirst ) { // pass it norm mul and norm sup - in the weights already done. norm
      //mul and mean will be null without standardization.
      _normMulold = normMulold;
      _normSubold = normSubold;
      _normMulnew = normMulnew;
      _normSubnew = normSubnew;
      _cat_num = cat_num;
      _betaold = betaold;
      _betanew = betanew;
      _interceptold = interceptold; // if updating beta_1, then the intercept is the previous column
      _interceptnew = interceptnew; // if currently updating the intercept value
      _catLvls_old = catLvlsold;
      _catLvls_new = catLvlsnew;
      _skipFirst = skipFirst;
    }

    @Override
    public void map(Chunk [] chunks) {
      int cnt = 0;
      Chunk wChunk = chunks[cnt++];
      Chunk zChunk = chunks[cnt++];
      Chunk ztildaChunk = chunks[cnt++];
      Chunk xpChunk=null, xChunk=null;

      _temp = new double[_betaold.length];
      if (_interceptnew) {
        xChunk = new C0DChunk(1,chunks[0]._len);
        xpChunk = chunks[cnt++];
      } else {
        if (_interceptold) {
          xChunk = chunks[cnt++];
          xpChunk = new C0DChunk(1,chunks[0]._len);
        }
        else {
          xChunk = chunks[cnt++];
          xpChunk = chunks[cnt++];
        }
      }

      // For each observation, add corresponding term to temp - or if categorical variable only add the term corresponding to its active level and the active level
      // of the most recently updated variable before it (if also cat). If for an obs the active level corresponds to an inactive column, we just dont want to include
      // it - same if inactive level in most recently updated var. so set these to zero ( Wont be updating a betaj which is inactive) .
      for (int i = 0; i < chunks[0]._len; ++i) { // going over all the rows in the chunk
        double betanew = 0; // most recently updated prev variable
        double betaold = 0; // old value of current variable being updated
        double w = wChunk.atd(i);
        if(w == 0) continue;
        ++_nobs;
        int observation_level = 0, observation_level_p = 0;
        double val = 1, valp = 1;
        if(_cat_num == 1) {
          observation_level = (int) xChunk.at8(i); // only need to change one temp value per observation.
          if (_catLvls_old != null)
            observation_level = Arrays.binarySearch(_catLvls_old, observation_level);

          observation_level_p = (int) xpChunk.at8(i); // both cat
          if (_catLvls_new != null)
            observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p);

          if(_skipFirst){
            observation_level--;
            observation_level_p--;
          }
        }
        else if(_cat_num == 2){
          val = xChunk.atd(i); // current num and previous cat
          if (_normMulold != null && _normSubold != null)
            val = (val - _normSubold[0]) * _normMulold[0];

          observation_level_p = (int) xpChunk.at8(i);
          if (_catLvls_new != null)
            observation_level_p = Arrays.binarySearch(_catLvls_new, observation_level_p);

          if(_skipFirst){
            observation_level_p--;
          }
        }
        else if(_cat_num == 3){
          val = xChunk.atd(i); // both num
          if (_normMulold != null && _normSubold != null)
            val = (val - _normSubold[0]) * _normMulold[0];
          valp = xpChunk.atd(i);
          if (_normMulnew != null && _normSubnew != null)
            valp = (valp - _normSubnew[0]) * _normMulnew[0];
        }
        else if(_cat_num == 4){
          observation_level = (int) xChunk.at8(i); // current cat
          if (_catLvls_old != null)
            observation_level = Arrays.binarySearch(_catLvls_old, observation_level); // search to see if this level is active.
          if(_skipFirst){
            observation_level--;
          }

          valp = xpChunk.atd(i); //prev numeric
          if (_normMulnew != null && _normSubnew != null)
            valp = (valp - _normSubnew[0]) * _normMulnew[0];
        }

        if(observation_level >= 0)
         betaold = _betaold[observation_level];
        if(observation_level_p >= 0)
         betanew = _betanew[observation_level_p];

        if (_interceptnew) {
            ztildaChunk.set(i, ztildaChunk.atd(i) - betaold + valp * betanew); //
            _temp[0] += w * (zChunk.atd(i) - ztildaChunk.atd(i));
          } else {
            ztildaChunk.set(i, ztildaChunk.atd(i) - val * betaold + valp * betanew);
            if(observation_level >=0 ) // if the active level for that observation is an "inactive column" don't want to add contribution to temp for that observation
            _temp[observation_level] += w * val * (zChunk.atd(i) - ztildaChunk.atd(i));
         }

       }

    }

    @Override
    public void reduce(GLMCoordinateDescentTaskSeqNaive git){
      ArrayUtils.add(_temp, git._temp);
      _nobs += git._nobs;
      super.reduce(git);
    }
  }


  public static class GLMCoordinateDescentTaskSeqIntercept extends MRTask {
    final double [] _betaold;
    public double _temp;
    DataInfo _dinfo;

    public  GLMCoordinateDescentTaskSeqIntercept( double [] betaold, DataInfo dinfo) {
      _betaold = betaold;
      _dinfo = dinfo;
    }

    @Override
    public void map(Chunk [] chunks) {
      int cnt = 0;
      Chunk wChunk = chunks[cnt++];
      Chunk zChunk = chunks[cnt++];
      Chunk filterChunk = chunks[cnt++];
      Row r = _dinfo.newDenseRow();
      for(int i = 0; i < chunks[0]._len; ++i) {
        if(filterChunk.atd(i)==1) continue;
        _dinfo.extractDenseRow(chunks,i,r);
        _temp = wChunk.at8(i)* (zChunk.atd(i)- r.innerProduct(_betaold) );
      }

    }

    @Override
    public void reduce(GLMCoordinateDescentTaskSeqIntercept git){
      _temp+= git._temp;
      super.reduce(git);
    }

  }


  public static class GLMGenerateWeightsTask extends MRTask {
    final GLMParameters _params;
    final double [] _betaw;
    double [] denums;
    double wsum,wsumu;
    DataInfo _dinfo;
    double _likelihood;

    public GLMGenerateWeightsTask(Key jobKey, DataInfo dinfo, GLMModel.GLMParameters glm, double[] betaw) {
      _params = glm;
      _betaw = betaw;
      _dinfo = dinfo;
    }

    @Override
    public void map(Chunk [] chunks) {
      Chunk wChunk = chunks[chunks.length-3];
      Chunk zChunk = chunks[chunks.length-2];
      Chunk zTilda = chunks[chunks.length-1];
      chunks = Arrays.copyOf(chunks,chunks.length-3);
      denums = new double[_dinfo.fullN()+1]; // full N is expanded variables with categories

      Row r = _dinfo.newDenseRow();
      for(int i = 0; i < chunks[0]._len; ++i) {
        _dinfo.extractDenseRow(chunks,i,r);
        if (r.isBad() || r.weight == 0) {
          wChunk.set(i,0);
          zChunk.set(i,0);
          zTilda.set(i,0);
          continue;
        }
        final double y = r.response(0);
        assert ((_params._family != Family.gamma) || y > 0) : "illegal response column, y must be > 0  for family=Gamma.";
        assert ((_params._family != Family.binomial) || (0 <= y && y <= 1)) : "illegal response column, y must be <0,1>  for family=Binomial. got " + y;
        final double w, eta, mu, var, z;
        final int numStart = _dinfo.numStart();
        double d = 1;
        eta = r.innerProduct(_betaw);
        if (_params._family == gaussian && _params._link == Link.identity) {
          w = r.weight;
          z = y - r.offset;
          mu = 0;
        } else {
          mu = _params.linkInv(eta + r.offset);
          var = Math.max(1e-6, _params.variance(mu)); // avoid numerical problems with 0 variance
          d = _params.linkDeriv(mu);
          z = eta + (y - mu) * d;
          w = r.weight / (var * d * d);
        }
        _likelihood += _params.likelihood(y,mu);
        zTilda.set(i,eta-_betaw[_betaw.length-1]);
        assert w >= 0 || Double.isNaN(w) : "invalid weight " + w; // allow NaNs - can occur if line-search is needed!
        wChunk.set(i,w);
        zChunk.set(i,z);

        wsum+=w;
        wsumu+=r.weight; // just add the user observation weight for the scaling.

        for(int j = 0; j < r.nBins; ++j)  { // go over cat variables
          denums[r.binIds[j]] +=  w; // binIds skips the zeros.
        }
        for(int j = 0; j < r.nNums; ++j){ // num vars
          int id = r.numIds == null?(j + numStart):r.numIds[j];
          denums[id]+= w*r.get(id)*r.get(id);
        }

      }
    }

    @Override
    public void reduce(GLMGenerateWeightsTask git){ // adding contribution of all the chunks
      ArrayUtils.add(denums, git.denums);
      wsum+=git.wsum;
      wsumu += git.wsumu;
      _likelihood += git._likelihood;
      super.reduce(git);
    }
  }

  public static class ComputeSEorDEVIANCETsk extends FrameTask2 {
    final double [] _betaNew;
    double _sumsqe;
    double _wsum;
    GLMParameters _parms;
    GLMModel _model;

    public ComputeSEorDEVIANCETsk(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double [] betaNew, GLMParameters parms,
                                  GLMModel model) {
      super(cmp, dinfo, jobKey);
      _glmf = new GLMWeightsFun(parms);
      _betaNew = betaNew;
      _parms = parms;
      _model = model;
    }
    
    transient double _sparseOffsetNew = 0;
    final GLMWeightsFun _glmf;
    transient GLMWeights _glmw;
    @Override public void chunkInit(){
      if(_sparse) {
        _sparseOffsetNew = GLM.sparseOffset(_betaNew, _dinfo);
      }
      _glmw = new GLMWeights();
    }

    @Override
    protected void processRow(Row r) {
      double z = r.response(0) - r.offset;
      double w = r.weight;
      if (_glmf._family != gaussian) {
        double etaOld = r.innerProduct(_betaNew) + _sparseOffsetNew;
        _glmf.computeWeights(r.response(0), etaOld, r.offset, r.weight, _glmw);
        z = _glmw.z;
        w = _glmw.w;
      }
      double eta = _glmf._family.equals(Family.tweedie) ? r.innerProduct(_betaNew) + _sparseOffsetNew + r.offset : r.innerProduct(_betaNew) + _sparseOffsetNew;
      double xmu = _glmf._family.equals(Family.tweedie) ? _glmf.linkInv(eta) : 0;
      if (deviance.equals(_parms._dispersion_parameter_method)) {  // deviance option
        if (!gaussian.equals(_glmf._family)) {
          _sumsqe += Double.isNaN(_glmw.dev) ? 0 : _glmw.dev;
        } else {
          double d = _model.deviance(r.weight, z, _glmf.linkInv(eta));
          _sumsqe += Double.isNaN(d) ? 0 : d;
        }
      } else { // default or pearson
        _sumsqe += _glmf._family.equals(Family.tweedie) ?
                ((r.response(0) - xmu) * (r.response(0) - xmu)) * r.weight / Math.pow(xmu, _glmf._var_power) :
                w * (eta - z) * (eta - z);
      }
      _wsum += Math.sqrt(w);
    }
    
    @Override
    public void reduce(ComputeSEorDEVIANCETsk c){_sumsqe += c._sumsqe; _wsum += c._wsum;}
  }

  /***
   * This function will assist in the estimation of dispersion factors using maximum likelihood
   */
  public static class ComputeGammaMLSETsk extends FrameTask2 {
    final double [] _betaNew;
    double _sumlnyiOui;
    double _sumyiOverui;
    double _wsum;

    public ComputeGammaMLSETsk(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double [] betaNew, GLMParameters parms) {
      super(cmp, dinfo, jobKey);
      _glmf = new GLMWeightsFun(parms);
      _betaNew = betaNew;
    }
    
    transient double _sparseOffsetNew = 0;
    final GLMWeightsFun _glmf;
    transient GLMWeights _glmw;
    @Override public void chunkInit(){
      if(_sparse) {
        _sparseOffsetNew = GLM.sparseOffset(_betaNew, _dinfo);
      }
      _glmw = new GLMWeights();
    }

    @Override
    protected void processRow(Row r) {
      double z = r.response(0) - r.offset;  // response
      double w = r.weight;
      if (z > 0  & w > 0) {
        double eta = _glmf._family.equals(Family.tweedie) ? r.innerProduct(_betaNew) + _sparseOffsetNew + r.offset
                : r.innerProduct(_betaNew) + _sparseOffsetNew;
        double xmu = _glmf.linkInv(eta); // ui
        double temp = w * z / xmu;
        _sumyiOverui += temp;
        _sumlnyiOui += w*Math.log(temp);
        _wsum += w;
      }
    }
    @Override
    public void reduce(ComputeGammaMLSETsk c){
      _sumlnyiOui += c._sumlnyiOui;
      _sumyiOverui += c._sumyiOverui;
      _wsum += c._wsum;}
  }

  /***
   * This function will assist in the estimation of dispersion factors using maximum likelihood
   */
  public static class ComputeDiTriGammaTsk extends FrameTask2 {
    double _sumDigamma;
    double _sumTrigamma;
    double _alpha;
    double[] _betaNew;

    public ComputeDiTriGammaTsk(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, double[] betaNew, GLMParameters parms, double alpha) {
      super(cmp, dinfo, jobKey);
      _glmf = new GLMWeightsFun(parms);
      _alpha = alpha;
      _betaNew = betaNew;
    }

    transient double _sparseOffsetNew = 0;
    final GLMWeightsFun _glmf;
    transient GLMWeights _glmw;
    @Override public void chunkInit(){
      if(_sparse) {
        _sparseOffsetNew = GLM.sparseOffset(_betaNew, _dinfo);
      }
      _glmw = new GLMWeights();
    }

    @Override
    protected void processRow(Row r) {
      double z = r.response(0) - r.offset;  // response
      double w = r.weight;
      if (z > 0  & w > 0) {
        _sumDigamma += w*digamma(w*_alpha);
        _sumTrigamma += w*w*trigamma(w*_alpha);
      }
    }
    @Override
    public void reduce(ComputeDiTriGammaTsk c){
      _sumDigamma += c._sumDigamma;
      _sumTrigamma += c._sumTrigamma;
    }
  }

  static class GLMIncrementalGramTask extends MRTask {
    final int[] _newCols;
    final DataInfo _dinfo;
    double[][] _gram;
    double [] _xy;
    final double [] _beta;
    final GLMWeightsFun _glmf;

    public GLMIncrementalGramTask(int [] newCols, DataInfo dinfo, GLMWeightsFun glmf, double [] beta){
      this._newCols = newCols;
      _glmf = glmf;
      _dinfo = dinfo;
      _beta = beta;
    }
    public void map(Chunk[] chks) {
      GLMWeights glmw = new GLMWeights();
      double [] wsum = new double[_dinfo.fullN()+1];
      double ywsum = 0;
      DataInfo.Rows rows = _dinfo.rows(chks);
      double [][] gram = new double[_newCols.length][_dinfo.fullN() + 1];
      double [] xy = new double[_newCols.length];
      final int ns = _dinfo.numStart();
      double sparseOffset = rows._sparse?GLM.sparseOffset(_beta,_dinfo):0;
      for (int rid = 0; rid < rows._nrows; ++rid) {
        int j = 0;
        Row r = rows.row(rid);
        if(r.weight == 0) continue;
        if(_beta != null) {
          _glmf.computeWeights(r.response(0), r.innerProduct(_beta) + sparseOffset, r.offset, r.weight, glmw);
        } else {
          glmw.w = r.weight;
          glmw.z = r.response(0);
        }
        r.addToArray(glmw.w,wsum);
        ywsum += glmw.z*glmw.w;
        // first cats
        for (int i = 0; i < r.nBins; i++) {
          while (j < _newCols.length && _newCols[j] < r.binIds[i])
            j++;
          if (j == _newCols.length || _newCols[j] >= ns)
            break;
          if (r.binIds[i] == _newCols[j]) {
            r.addToArray(glmw.w, gram[j]);
            xy[j] += glmw.w*glmw.z;
            j++;
          }
        }
        while (j < _newCols.length && _newCols[j] < ns)
          j++;
        // nums
        if (r.numIds != null) { // sparse
          for (int i = 0; i < r.nNums; i++) {
            while (j < _newCols.length && _newCols[j] < r.numIds[i])
              j++;
            if (j == _newCols.length) break;
            if (r.numIds[i] == _newCols[j]) {
              double wx = glmw.w * r.numVals[i];
              r.addToArray(wx, gram[j]);
              xy[j] += wx*glmw.z;
              j++;
            }
          }
        } else { // dense
          for (; j < _newCols.length; j++) {
            int id = _newCols[j];
            double x = r.numVals[id - _dinfo.numStart()];
            if(x == 0) continue;
            double wx = glmw.w * x;
            r.addToArray(wx, gram[j]);
            xy[j] += wx*glmw.z;
          }
          assert j == _newCols.length;
        }
      }
      if(rows._sparse && _dinfo._normSub != null){ // adjust for sparse zeros (skipped centering)
        int numstart = Arrays.binarySearch(_newCols,ns);
        if(numstart < 0) numstart = -numstart-1;
        for(int k = 0; k < numstart; ++k){
          int i = _newCols[k];
          double [] row = gram[k];
          for(int j = ns; j < row.length-1; ++j){
            double mean_j = _dinfo.normSub(j-ns);
            double scale_j = _dinfo.normMul(j-ns);
            gram[k][j] = gram[k][j] - mean_j*scale_j*wsum[i];
          }
        }
        for(int k = numstart; k < gram.length; ++k){
          int i = _newCols[k];
          double mean_i = _dinfo.normSub(i-ns);
          double scale_i = _dinfo.normMul(i-ns);
          // categoricals
          for(int j = 0; j < _dinfo.numStart(); ++j){
            gram[k][j]-=mean_i*scale_i*wsum[j];
          }
          //nums
          for(int j = ns; j < gram[k].length-1; ++j){
            double mean_j = _dinfo.normSub(j-ns);
            double scale_j = _dinfo.normMul(j-ns);
            gram[k][j] = gram[k][j] - mean_j*scale_j*wsum[i] - mean_i*scale_i*wsum[j] + mean_i*mean_j*scale_i*scale_j*wsum[wsum.length-1];
          }
          gram[k][gram[k].length-1] -= mean_i*scale_i*wsum[gram[k].length-1];
          xy[k] -= ywsum * mean_i * scale_i;
        }
      }
      _gram = gram;
      _xy = xy;
    }

    public void reduce(GLMIncrementalGramTask gt) {
      ArrayUtils.add(_xy,gt._xy);
      for(int i = 0; i< _gram.length; ++i)
        ArrayUtils.add(_gram[i],gt._gram[i]);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy