hex.tree.DTree Maven / Gradle / Ivy

Go to download
package hex.tree;

import hex.Distribution;
import hex.genmodel.utils.DistributionFamily;
import hex.tree.uplift.Divergence;
import jsr166y.RecursiveAction;
import org.apache.log4j.Logger;
import water.AutoBuffer;
import water.H2O;
import water.Iced;
import water.MemoryManager;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.util.*;

import java.util.*;

import static hex.tree.SharedTreeModel.SharedTreeParameters.HistogramType;

/** A Decision Tree, laid over a Frame of Vecs, and built distributed.
 *
 *  This class defines an explicit Tree structure, as a collection of {@code
 *  DTree} {@code Node}s.  The Nodes are numbered with a unique {@code _nid}.
 *  Users need to maintain their own mapping from their data to a {@code _nid},
 *  where the obvious technique is to have a Vec of {@code _nid}s (ints), one
 *  per each element of the data Vecs.
 *
 *  
Each {@code Node} has a {@code DHistogram}, describing summary data
 *  about the rows.  The DHistogram requires a pass over the data to be filled
 *  in, and we expect to fill in all rows for Nodes at the same depth at the
 *  same time.  i.e., a single pass over the data will fill in all leaf Nodes'
 *  DHistograms at once.
 *
 *  @author Cliff Click
 */
public class DTree extends Iced {

  private static final Logger LOG = Logger.getLogger(DTree.class);

  final String[] _names; // Column names
  final int _ncols;      // Active training columns
  final long _seed;      // RNG seed; drives sampling seeds if necessary
  private Node[] _ns;    // All the nodes in the tree.  Node 0 is the root.
  public int _len;       // Resizable array
  // Public stats about tree
  public int _leaves;
  public int _depth;
  public final int _mtrys;           // Number of columns to choose amongst in splits (at every split)
  public final int _mtrys_per_tree;  // Number of columns to choose amongst in splits (once per tree)
  public final transient Random _rand; // RNG for split decisions & sampling
  public final transient int[] _cols; // Per-tree selection of columns to consider for splits
  public transient SharedTreeModel.SharedTreeParameters _parms;


  // compute the effective number of columns to sample
  public int actual_mtries() {
    return Math.min(Math.max(1,(int)((double)_mtrys * Math.pow(_parms._col_sample_rate_change_per_level, _depth))),_ncols);
  }

  public DTree(Frame fr, int ncols, int mtrys, int mtrys_per_tree, long seed, SharedTreeModel.SharedTreeParameters parms) {
    _names = fr.names();
    _ncols = ncols;
    _parms = parms;
    _ns = new Node[1];
    _mtrys = mtrys;
    _mtrys_per_tree = mtrys_per_tree;
    _seed = seed;
    _rand = RandomUtils.getRNG(seed);
    int[] activeCols=new int[_ncols];
    for (int i=0;i= 4 levels), split into 2 non-contiguous groups
    final byte _equal;          // Split is 0: <, 2: == with group split (<= 32 levels), 3: == with group split (> 32 levels)
    final double _se;           // Squared error without a split
    final double _se0, _se1;    // Squared error of each subsplit
    final double _n0,  _n1;     // (Weighted) Rows in each final split
    final double _p0,  _p1;     // Predicted value for each split
    final double _tree_p0, _tree_p1;
    final double _p0Treat, _p0Contr, _p1Treat, _p1Contr; // uplift predictions
    final double _n0Treat, _n0Contr, _n1Treat, _n1Contr;

    public Split(int col, int bin, DHistogram.NASplitDir nasplit, IcedBitSet bs, byte equal, double se, double se0, double se1, double n0, double n1, double p0, double p1, double tree_p0, double tree_p1) {
      assert nasplit != DHistogram.NASplitDir.None;
      assert nasplit != DHistogram.NASplitDir.NAvsREST || bs == null : "Split type NAvsREST shouldn't have a bitset";
      assert equal != 1; //no longer done
      // FIXME: Disabled for testing PUBDEV-6495:
      // assert se > se0+se1 || se==Double.MAX_VALUE; // No point in splitting unless error goes down
      assert col >= 0;
      assert bin >= 0;
      _col = col;  _bin = bin; _nasplit = nasplit; _bs = bs;  _equal = equal;  _se = se;
      _n0 = n0;  _n1 = n1;  _se0 = se0;  _se1 = se1;
      _p0 = p0;  _p1 = p1;
      _tree_p0 = tree_p0; _tree_p1 = tree_p1;
      _p0Treat = _p0Contr = _p1Treat = _p1Contr = 0;
      _n0Treat = _n0Contr = _n1Treat = _n1Contr = 0;
    }

    public Split(int col, int bin, DHistogram.NASplitDir nasplit, IcedBitSet bs, byte equal, double se, double se0, double se1, double n0, double n1, double p0, double p1, double tree_p0, double tree_p1,
                 double p0Treat, double p0Contr, double p1Treat, double p1Contr, double n0Treat, double n0Contr, double n1Treat, double n1Contr) {
      assert(nasplit!= DHistogram.NASplitDir.None);
      assert(equal!=1); //no longer done
      // FIXME: Disabled for testing PUBDEV-6495:
      // assert se > se0+se1 || se==Double.MAX_VALUE; // No point in splitting unless error goes down
      assert(col>=0);
      assert(bin>=0);
      _col = col;  _bin = bin; _nasplit = nasplit; _bs = bs;  _equal = equal;  _se = se;
      _n0 = n0;  _n1 = n1;  _se0 = se0;  _se1 = se1;
      _p0 = p0;  _p1 = p1;
      _tree_p0 = tree_p0; _tree_p1 = tree_p1;
      _p0Treat = p0Treat; _p0Contr = p0Contr; _p1Treat = p1Treat; _p1Contr = p1Contr;
      _n0Treat = n0Treat; _n0Contr = n0Contr; _n1Treat = n1Treat; _n1Contr = n1Contr;
    }
    public final double pre_split_se() { return _se; }
    public final double se() { return _se0+_se1; }
    public final int   col() { return _col; }
    public final int   bin() { return _bin; }
    public final DHistogram.NASplitDir naSplitDir() { return _nasplit; }
    public final double n0() { return _n0; }
    public final double n1() { return _n1; }

    /**
     * Returns an optimal numeric split point for numerical splits,
     * -1 for bitwise splits and Float.NaN if a split should be abandoned.
     * @param hs histograms
     * @return "split at" value
     */
    float splat(DHistogram[] hs) {
      return isNumericSplit() ? splatNumeric(hs[_col]) : -1f; // Split-at value (-1 for group-wise splits)
    }

    boolean isNumericSplit() {
      return _nasplit != DHistogram.NASplitDir.NAvsREST && (_equal == 0 || _equal == 1);
    }

    // Split-at dividing point.  Don't use the step*bin+bmin, due to roundoff
    // error we can have that point be slightly higher or lower than the bin
    // min/max - which would allow values outside the stated bin-range into the
    // split sub-bins.  Always go for a value which splits the nearest two
    // elements.
    float splatNumeric(final DHistogram h) {
      assert _nasplit != DHistogram.NASplitDir.NAvsREST : "Shouldn't be called for NA split type 'NA vs REST'";
      assert _bin > 0 && _bin < h.nbins();
      assert _bs==null : "Dividing point is a bitset, not a bin#, so don't call splat() as result is meaningless";
      assert _equal != 1;
      assert _equal==0; // not here for bitset splits, just range splits
      // Find highest non-empty bin below the split
      int x=_bin-1;
      while( x >= 0 && h.bins(x)==0 ) x--;
      // Find lowest  non-empty bin above the split
      int n=_bin;
      while( n < h.nbins() && h.bins(n)==0 ) n++;
      // Lo is the high-side of the low non-empty bin, rounded to int for int columns
      // Hi is the low -side of the hi  non-empty bin, rounded to int for int columns

      // Example: Suppose there are no empty bins, and we are splitting an
      // integer column at 48.4 (more than nbins, so step != 1.0, perhaps
      // step==1.8).  The next lowest non-empty bin is from 46.6 to 48.4, and
      // we set lo=48.4.  The next highest non-empty bin is from 48.4 to 50.2
      // and we set hi=48.4.  Since this is an integer column, we round lo to
      // 48 (largest integer below the split) and hi to 49 (smallest integer
      // above the split).  Finally we average them, and split at 48.5.
      double lo = h.binAt(x+1);
      double hi = h.binAt(n  );
      if (h._isInt > 0) {
        lo = h._step==1 ? lo-1 : Math.floor(lo);
        hi = h._step==1 ? hi   : Math.ceil (hi);
      }
      final float splitAt = (float) ((lo + hi) / 2.0);
      // abandon split if rounding errors could cause observations being incorrectly
      // assigned to child nodes at scoring time
      // this will typically happen when bin lengths are very small (eg. 1e-6)
      // we will abandon a split if `lo` is not a true lower bound to a float `splitAt`
      // (and symmetrically for `hi`)
      if (h._checkFloatSplits && lo != hi && (lo > splitAt || hi < splitAt)) {
        return Float.NaN;
      }
      return splitAt;
    }


    /**
     * Prepare children histograms, one per column.
     * Typically, histograms are created with a level-dependent binning strategy.
     * For the histogram of the current split decision, the children histograms are left/right range-adjusted.
     *
     * Any histgoram can null if there is no point in splitting
     * further (such as there's fewer than min_row elements, or zero
     * error in the response column).  Return an array of DHistograms (one
     * per column), which are bounded by the split bin-limits.  If the column
     * has constant data, or was not being tracked by a prior DHistogram
     * (for being constant data from a prior split), then that column will be
     * null in the returned array.
     * @param currentHistos Histograms for all applicable columns computed for the previous split finding process
     * @param way 0 (left) or 1 (right)
     * @param splat Split point for previous split (if applicable)
     * @param parms user-given parameters (will use nbins, min_rows, etc.)
     * @return Array of histograms to be used for the next level of split finding
     */
    public DHistogram[] nextLevelHistos(DHistogram[] currentHistos, int way, double splat, SharedTreeModel.SharedTreeParameters parms, Constraints cs, BranchInteractionConstraints bcs) {
      double n = way==0 ? _n0 : _n1;
      if( n < parms._min_rows ) {
        if (LOG.isTraceEnabled()) LOG.trace("Not splitting: too few observations left: " + n);
        return null; // Too few elements
      }
      double se = way==0 ? _se0 : _se1;
      if( se <= 1e-30 ) {
        LOG.trace("Not splitting: pure node (perfect prediction).");
        return null; // No point in splitting a perfect prediction
      }

      // Build a next-gen split point from the splitting bin
      int cnt=0;                  // Count of possible splits
      DHistogram nhists[] = new DHistogram[currentHistos.length]; // A new histogram set
      boolean checkBranchInteractions = bcs != null;
      for(int j = 0; j < currentHistos.length; j++ ) { // For every column in the new split
        // Check branch interaction constraint if it is not null 
        if (checkBranchInteractions && !bcs.isAllowedIndex(j)) {
          // Column is denied by branch interaction constraints -> the histogram is set to null
          continue;
        }
        DHistogram h = currentHistos[j];            // old histogram of column
        if( h == null )
          continue;        // Column was not being tracked?
        final int adj_nbins      = Math.max(h.nbins()>>1,parms._nbins); //update number of bins dependent on level depth

        // min & max come from the original column data, since splitting on an
        // unrelated column will not change the j'th columns min/max.
        // Tighten min/max based on actual observed data for tracked columns
        double min, maxEx;
        if( h._vals == null || _equal > 1) { // Not tracked this last pass? For bitset, always keep the full range of factors
          min = h._min;         // Then no improvement over last go
          maxEx = h._maxEx;
        } else {                // Else pick up tighter observed bounds
          min = h.find_min();   // Tracked inclusive lower bound
          if( h.find_maxIn() == min )
            continue; // This column will not split again
          maxEx = h.find_maxEx(); // Exclusive max
        }
        if (_nasplit== DHistogram.NASplitDir.NAvsREST) {
          if (way==1) continue; //no histogram needed - we just split NAs away
          // otherwise leave the min/max alone, and make another histogram (but this time, there won't be any NAs)
        }

        // Tighter bounds on the column getting split: exactly each new
        // DHistogram's bound are the bins' min & max.
        if( _col==j ) {
          switch( _equal ) {
          case 0:  // Ranged split; know something about the left & right sides
            if (_nasplit != DHistogram.NASplitDir.NAvsREST) {
              if (h._vals[h._vals_dim*_bin] == 0)
                throw H2O.unimpl(); // Here I should walk up & down same as split() above.
            }
            assert _bs==null : "splat not defined for BitSet splits";
            double split = splat;
            if( h._isInt > 0 ) split = (float)Math.ceil(split);
            if (_nasplit != DHistogram.NASplitDir.NAvsREST) {
              if (way == 0) maxEx = split;
              else min = split;
            }
            break;
          case 1:               // Equality split; no change on unequals-side
            if( way == 1 )
              continue; // but know exact bounds on equals-side - and this col will not split again
            break;
          case 2:               // BitSet (small) split
          case 3:               // BitSet (big)   split
            break;
          default: throw H2O.fail();
          }
        }
        if( min >  maxEx )
          continue; // Happens for all-NA subsplits
        if( MathUtils.equalsWithinOneSmallUlp(min, maxEx) )
          continue; // This column will not split again
        if( Double.isInfinite(adj_nbins/(maxEx-min)) )
          continue;
        if( h._isInt > 0 && !(min+1 < maxEx ) )
          continue; // This column will not split again
        assert min < maxEx && adj_nbins > 1 : ""+min+"<"+maxEx+" nbins="+adj_nbins;

        // only count NAs if we have any going our way (note: NAvsREST doesn't build a histo for the NA direction)
        final boolean hasNAs = (_nasplit == DHistogram.NASplitDir.NALeft && way == 0 || 
                _nasplit == DHistogram.NASplitDir.NARight && way == 1) && h.hasNABin();

        double[] customSplitPoints = h._customSplitPoints;
        if (parms._histogram_type == HistogramType.UniformRobust && 
                j != _col && // don't apply if we were able to split on the column with the current bins
                GuidedSplitPoints.isApplicableTo(h)
        ) {
          final int nonEmptyBins = h.nonEmptyBins();
          final double density = nonEmptyBins / ((double) h.nbins());

          if (density <= GuidedSplitPoints.LOW_DENSITY_THRESHOLD) {
            customSplitPoints = GuidedSplitPoints.makeSplitPoints(h, adj_nbins, min, maxEx);
          }
        }

        nhists[j] = DHistogram.make(h._name, adj_nbins, h._isInt, min, maxEx, h._intOpt, hasNAs,
                h._seed*0xDECAF+(way+1), parms,
                h._globalSplitPointsKey, cs, h._checkFloatSplits, customSplitPoints);
        cnt++;                    // At least some chance of splitting
      }
      return cnt == 0 ? null : nhists;
    }

    public Constraints nextLevelConstraints(Constraints currentConstraints, int way, double splat, SharedTreeModel.SharedTreeParameters parms) {
      int constraint = currentConstraints.getColumnConstraint(_col);
      if (constraint == 0) {
        return currentConstraints; // didn't split on a column with constraints => no need to modify them
      }
      double mid = (_tree_p0 + _tree_p1) / 2;
      return currentConstraints.withNewConstraint(_col, way, mid);
    }

    @Override public String toString() {
      return "Splitting: col=" + _col + " type=" + ((int)_equal == 0 ? " < " : "bitset")
              + ", splitpoint=" + _bin + ", nadir=" + _nasplit.toString() + ", se0=" + _se0 + ", se1=" + _se1 + ", n0=" + _n0 + ", n1=" + _n1;
    }
  }

  // --------------------------------------------------------------------------
  // An UndecidedNode: Has a DHistogram which is filled in (in parallel
  // with other histograms) in a single pass over the data.  Does not contain
  // any split-decision.
  public static class UndecidedNode extends Node {
    public transient DHistogram[] _hs; //(up to) one histogram per column
    public transient Constraints _cs;
    public transient BranchInteractionConstraints _bics;
    public final int _scoreCols[];      // A list of columns to score; could be null for all
    public UndecidedNode( DTree tree, int pid, DHistogram[] hs, Constraints cs, BranchInteractionConstraints bics) {
      super(tree,pid);
      assert hs.length==tree._ncols;
      _hs = hs; //these histograms have no bins yet (just constructed)
      _cs = cs;
      _bics = bics;
      _scoreCols = scoreCols();
    }

    public UndecidedNode(UndecidedNode node, DTree tree){
      super(tree, node._pid, node._nid, true);
      _hs = node._hs; //these histograms have no bins yet (just constructed)
      _cs = node._cs;
      _bics = node._bics;
      _scoreCols = node._scoreCols;
    }

    // Pick a random selection of columns to compute best score.
    // Can return null for 'all columns'.
    public int[] scoreCols() {
      DTree tree = _tree;
      if (tree.actual_mtries() == _hs.length && tree._mtrys_per_tree == _hs.length)
        return null;

      // per-tree pre-selected columns
      int[] activeCols = tree._cols;
      if (LOG.isTraceEnabled()) LOG.trace("For tree with seed " + tree._seed + ", out of " + _hs.length + " cols, the following cols are activated via mtry_per_tree=" + tree._mtrys_per_tree + ": " + Arrays.toString(activeCols));

      int[] cols = new int[activeCols.length];
      int len=0;

      // collect columns that can be split (non-constant, large enough to split, etc.)
      for(int i = 0; i< activeCols.length; i++ ) {
        int idx = activeCols[i];
        assert(idx == i || tree._mtrys_per_tree < _hs.length);
        if( _hs[idx]==null ) continue; // Ignore not-tracked cols
        assert _hs[idx]._min < _hs[idx]._maxEx && _hs[idx].actNBins() > 1 : "broken histo range "+_hs[idx];
        cols[len++] = idx;        // Gather active column
      }
      if (LOG.isTraceEnabled()) LOG.trace("These columns can be split: " + Arrays.toString(Arrays.copyOfRange(cols, 0, len)));
      int choices = len;        // Number of columns I can choose from

      int mtries = tree.actual_mtries();
      if (choices > 0) { // It can happen that we have no choices, because this node cannot be split any more (all active columns are constant, for example).
        // Draw up to mtry columns at random without replacement.
        for (int i = 0; i < mtries; i++) {
          if (len == 0) break;   // Out of choices!
          int idx2 = tree._rand.nextInt(len);
          int col = cols[idx2];     // The chosen column
          cols[idx2] = cols[--len]; // Compress out of array; do not choose again
          cols[len] = col;          // Swap chosen in just after 'len'
        }
        assert len < choices;
      }
      if (LOG.isTraceEnabled()) LOG.trace("Picking these (mtry=" + mtries + ") columns to evaluate for splitting: " + Arrays.toString(Arrays.copyOfRange(cols, len, choices)));
      return Arrays.copyOfRange(cols, len, choices);
    }

    // Make the parent of this Node use UNINTIALIZED NIDs for its children to prevent the split that this
    // node otherwise induces.  Happens if we find out too-late that we have a
    // perfect prediction here, and we want to turn into a leaf.
    public void doNotSplit( ) {
      if( _pid == NO_PARENT) return; // skip root
      DecidedNode dn = _tree.decided(_pid);
      for( int i=0; i nbins ) nbins = hs.nbins();

      for( int i=0; i w )
        s = String.format("%4.1f",d);
      if( s.length() > w )
        s = String.format("%4.0f",d);
      return p(sb,s,w);
    }

    @Override public StringBuilder toString2(StringBuilder sb, int depth) {
      for( int d=0; d=
    //       T         |  !=   ==
    public final int _nids[];          // Children NIDS for the split LEFT, RIGHT

    transient byte _nodeType; // Complex encoding: see the compressed struct comments
    transient int _size = 0;  // Compressed byte size of this subtree
    transient int _nnodes = 0; // Number of nodes in this subtree

    public DecidedNode(DecidedNode node, DTree tree){
      super(tree, node._pid, node._nid, true);
      _split = node._split;
      _splat = node._splat;
      _nids = node._nids;
      _nodeType = node._nodeType;
      _size = node._size;
      _nnodes = node._nnodes;
    }

    // Make a correctly flavored Undecided
    public UndecidedNode makeUndecidedNode(DHistogram hs[], Constraints cs, BranchInteractionConstraints bics) {
      return new UndecidedNode(_tree, _nid, hs, cs, bics);
    }

    // Pick the best column from the given histograms
    public Split bestCol(UndecidedNode u, DHistogram hs[], Constraints cs) {
      DTree.Split best = null;
      if( hs == null ) return null;
      final int maxCols = u._scoreCols == null /* all cols */ ? hs.length : u._scoreCols.length;
      List findSplits = new ArrayList<>();
      //total work is to find the best split across sum_over_cols_to_split(nbins)
      long nbinsSum = 0;
      for( int i=0; i= _splat ? 1 : 0;
//        else if (_split._equal == 1)
//          bin = d == _splat ? 1 : 0;
        }
        else if (_split._equal >= 2) {
          int b = (int)d;
          if (_split._bs.isInRange(b)) {
            bin = _split._bs.contains(b) ? 1 : 0; // contains goes right
          } else {
            isNA = true;
          }
        }
      }

      // NA handling
      if (isNA) {
        if (_split._nasplit== DHistogram.NASplitDir.NALeft || _split._nasplit == DHistogram.NASplitDir.Left) {
          bin = 0;
        } else if (_split._nasplit == DHistogram.NASplitDir.NARight || _split._nasplit == DHistogram.NASplitDir.Right || _split._nasplit == DHistogram.NASplitDir.NAvsREST) {
          bin = 1;
        } else if (_split._nasplit == DHistogram.NASplitDir.None) {
          bin = 1; // if no NAs in training, but NAs in testing -> go right TODO: Pick optimal direction
        } else throw H2O.unimpl();
      }
      return _nids[bin];
    }

    public double pred( int nid ) {
      return nid==0 ? _split._p0 : _split._p1;
    }

    public double predTreatment( int nid ) {
      return nid==0 ? _split._p0Treat : _split._p1Treat;
    }

    public double predControl( int nid ) {
      return nid==0 ? _split._p0Contr : _split._p1Contr;
    }

    @Override public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append("DecidedNode:\n");
      sb.append("_nid: " + _nid + "\n");
      sb.append("_nids (children): " + Arrays.toString(_nids) + "\n");
      if (_split!=null)
        sb.append("_split:" + _split.toString() + "\n");
      sb.append("_splat:" + _splat + "\n");
      if( _split == null ) {
        sb.append(" col = -1\n");
      } else {
        int col = _split._col;
        if (_split._equal == 1) {
          sb.append(_tree._names[col] + " != " + _splat + "\n" +
                  _tree._names[col] + " == " + _splat + "\n");
        } else if (_split._equal == 2 || _split._equal == 3) {
          sb.append(_tree._names[col] + " not in " + _split._bs.toString() + "\n" +
                  _tree._names[col] + "  is in " + _split._bs.toString() + "\n");
        } else {
          sb.append(
                  _tree._names[col] + " < " + _splat + "\n" +
                          _splat + " >=" + _tree._names[col] + "\n");
        }
      }
      return sb.toString();
    }

    StringBuilder printChild( StringBuilder sb, int nid ) {
      int i = _nids[0]==nid ? 0 : 1;
      assert _nids[i]==nid : "No child nid "+nid+"? " +Arrays.toString(_nids);
      sb.append("[").append(_tree._names[_split._col]);
      sb.append(_split._equal != 0
                ? (i==0 ? " != " : " == ")
                : (i==0 ? " <  " : " >= "));
      sb.append((_split._equal == 2 || _split._equal == 3) ? _split._bs.toString() : _splat).append("]");
      return sb;
    }

    @Override public StringBuilder toString2(StringBuilder sb, int depth) {
      assert(_nids.length==2);
      for( int i=0; i<_nids.length; i++ ) {
        for( int d=0; d= "));
              if (_split._nasplit == DHistogram.NASplitDir.NALeft || _split._nasplit == DHistogram.NASplitDir.Left)
                sb.append(_split._equal != 0 ? (i == 0 ? " is NA or != " : " == ") : (i == 0 ? " is NA or <  " : " >= "));
            } else {
              sb.append(i == 0 ? " not in " : "  is in ");
            }
            sb.append((_split._equal == 2 || _split._equal == 3) ? _split._bs.toString() : _splat).append("\n");
          }
        }
        if( _nids[i] >= 0 && _nids[i] < _tree._len )
          _tree.node(_nids[i]).toString2(sb,depth+1);
      }
      return sb;
    }

    // Size of this subtree; sets _nodeType also
    @Override public final int size(){
      if( _size != 0 ) return _size; // Cached size

      assert _nodeType == 0:"unexpected node type: " + _nodeType;
      if(_split._equal != 0)
        _nodeType |= _split._equal == 1 ? 4 : (_split._equal == 2 ? 8 : 12);

      // int res = 7;  // 1B node type + flags, 2B colId, 4B float split val
      // 1B node type + flags, 2B colId, 4B split val/small group or (2B offset + 4B size) + large group
      int res = _split._equal == 3 ? 9 + _split._bs.numBytes() : 7;

      // NA handling correction
      res++; //1 byte for NA split dir
      if (_split._nasplit == DHistogram.NASplitDir.NAvsREST) {
        assert _split._equal == 0;
        res -= 4; // we don't need to represent the actual split value
      }

      Node left = _tree.node(_nids[0]);
      int lsz = left.size();
      res += lsz;
      if( left instanceof LeafNode ) _nodeType |= (byte)48;
      else {
        int slen = lsz < 256 ? 0 : (lsz < 65535 ? 1 : (lsz<(1<<24) ? 2 : 3));
        _nodeType |= slen; // Set the size-skip bits
        res += (slen+1); //
      }

      Node right = _tree.node(_nids[1]);
      if( right instanceof LeafNode ) _nodeType |= (byte)(48 << 2);
      res += right.size();
      assert (_nodeType&0x33) != 51;
      assert res != 0;
      return (_size = res);
    }

    @Override
    protected int numNodes() {
      if (_nnodes > 0)
        return _nnodes;
      _nnodes = 1 + _tree.node(_nids[0]).numNodes() + _tree.node(_nids[1]).numNodes();
      return _nnodes;
    }

    // Compress this tree into the AutoBuffer
    @Override public AutoBuffer compress(AutoBuffer ab, AutoBuffer abAux) {
      int pos = ab.position();
      if( _nodeType == 0 ) size(); // Sets _nodeType & _size both
      ab.put1(_nodeType);          // Includes left-child skip-size bits
      assert _split != null;    // Not a broken root non-decision?
      assert _split._col >= 0;
      ab.put2((short)_split._col);
      ab.put1((byte)_split._nasplit.value());

      // Save split-at-value or group
      if (_split._nasplit!= DHistogram.NASplitDir.NAvsREST) {
        if (_split._equal == 0 || _split._equal == 1) ab.put4f(_splat);
        else if(_split._equal == 2) _split._bs.compress2(ab);
        else _split._bs.compress3(ab);
      }
      if (abAux != null) {
        abAux.put4(_nid);
        abAux.put4(_tree.node(_nids[0]).numNodes()); // number of nodes in the left subtree; this used to be 'parent node id'
        abAux.put4f((float)_split._n0);
        abAux.put4f((float)_split._n1);
        abAux.put4f((float)_split._p0);
        abAux.put4f((float)_split._p1);
        abAux.put4f((float)_split._se0);
        abAux.put4f((float)_split._se1);
        abAux.put4(_nids[0]);
        abAux.put4(_nids[1]);
      }

      Node left = _tree.node(_nids[0]);
      if( (_nodeType&48) == 0 ) { // Size bits are optional for left leaves !
        int sz = left.size();
        if(sz < 256)            ab.put1(       sz);
        else if (sz < 65535)    ab.put2((short)sz);
        else if (sz < (1<<24))  ab.put3(       sz);
        else                    ab.put4(       sz); // 1<<31-1
      }
      // now write the subtree in
      left.compress(ab, abAux);
      Node rite = _tree.node(_nids[1]);
      rite.compress(ab, abAux);
      assert _size == ab.position()-pos:"reported size = " + _size + " , real size = " + (ab.position()-pos);
      return ab;
    }
  }

  public final static class LeafNode extends Node {
    public float _pred;
    public LeafNode( DTree tree, int pid ) { super(tree,pid); tree._leaves++; }
    public LeafNode( DTree tree, int pid, int nid ) { super(tree,pid,nid); tree._leaves++; }
    public LeafNode( LeafNode node, DTree tree) {
      super(tree,node._pid, node._nid, true);
      _pred = node._pred;
    }
    @Override public String toString() { return "Leaf#"+_nid+" = "+_pred; }
    @Override public final StringBuilder toString2(StringBuilder sb, int depth) {
      for( int d=0; d= 1;

    final boolean hasPreds = hs.hasPreds();
    final boolean hasDenom = hs.hasDenominator();
    final boolean hasNomin = hs.hasNominator();

    // Histogram arrays used for splitting, these are either the original bins
    // (for an ordered predictor), or sorted by the mean response (for an
    // unordered predictor, i.e. categorical predictor).
    double[]   vals =   hs._vals;
    final int vals_dim = hs._vals_dim; 
    int idxs[] = null;          // and a reverse index mapping

    // For categorical (unordered) predictors, sort the bins by average
    // prediction then look for an optimal split.
    if( hs._isInt == 2 && hs._step == 1 ) {
      // Sort the index by average response
      idxs = MemoryManager.malloc4(nbins+1); // Reverse index
      for( int i=0; i=0; b-- ) {
      double n0 =   whi[b+1], n1 = vals[vals_dim*b];
      if( n0==0 && n1==0 )
        continue;
      double m0 =  wYhi[b+1], m1 = vals[vals_dim*b+1];
      double s0 = wYYhi[b+1], s1 = vals[vals_dim*b+2];
      whi[b] = n0+n1;
      wYhi[b] = m0+m1;
      wYYhi[b] = s0+s1;
      if (hasPreds) {
        double p10 = pr1hi[b + 1], p11 = vals[vals_dim * b + 3];
        double p20 = pr2hi[b + 1], p21 = vals[vals_dim * b + 4];
        pr1hi[b] = p10 + p11;
        pr2hi[b] = p20 + p21;
        if (hasDenom) {
          double d0 = denhi[b + 1], d1 = vals[vals_dim * b + 5];
          denhi[b] = d0 + d1;
        }
        if (hasNomin) {
          double d0 = nomhi[b + 1], d1 = vals[vals_dim * b + 6];
          nomhi[b] = d0 + d1;
        }
      }
      assert MathUtils.compare(wlo[b]+ whi[b]+wNA,tot,1e-5,1e-5);
    }

    double best_seL=Double.MAX_VALUE;   // squared error for left side of the best split (so far)
    double best_seR=Double.MAX_VALUE;   // squared error for right side of the best split (so far)
    DHistogram.NASplitDir nasplit = DHistogram.NASplitDir.None;

    // squared error of all non-NAs
    double seNonNA = wYYhi[0] - wYhi[0]* wYhi[0]/ whi[0]; // Squared Error with no split
    if (seNonNA < 0) seNonNA = 0;
    double seBefore = seNonNA;

    double nLeft = 0;
    double nRight = 0;
    double predLeft = 0;
    double predRight = 0;
    double tree_p0 = 0;
    double tree_p1 = 0;

    // if there are any NAs, then try to split them from the non-NAs
    if (wNA>=min_rows) {
      double seAll = (wYYhi[0] + wYYNA) - (wYhi[0] + wYNA) * (wYhi[0] + wYNA) / (whi[0] + wNA);
      double seNA = wYYNA - wYNA * wYNA / wNA;
      if (seNA < 0) seNA = 0;
      best_seL = seNonNA;
      best_seR = seNA;
      nasplit = DHistogram.NASplitDir.NAvsREST;
      seBefore = seAll;
      nLeft = whi[0]; //all non-NAs
      predLeft = wYhi[0];
      nRight = wNA;
      predRight = wYNA;
      if(hasDenom){
        tree_p0 = nomhi[0] /denhi[0];
        tree_p1 = nomNA / denNA;
      } else {
        tree_p0 = predLeft / nLeft;
        tree_p1 = predRight / nRight;
      }
    }

    // Now roll the split-point across the bins.  There are 2 ways to do this:
    // split left/right based on being less than some value, or being equal/
    // not-equal to some value.  Equal/not-equal makes sense for categoricals
    // but both splits could work for any integral datatype.  Do the less-than
    // splits first.
    int best=0;                         // The no-split
    byte equal=0;                       // Ranged check
    for( int b=1; b<=nbins-1; b++ ) {
      if( vals[vals_dim*b] == 0 ) continue; // Ignore empty splits
      if( wlo[b]+wNA < min_rows ) continue;
      if( whi[b]+wNA < min_rows ) break; // w1 shrinks at the higher bin#s, so if it fails once it fails always
      // We're making an unbiased estimator, so that MSE==Var.
      // Then Squared Error = MSE*N = Var*N
      //                    = (wYY/N - wY^2)*N
      //                    = wYY - N*wY^2
      //                    = wYY - N*(wY/N)(wY/N)
      //                    = wYY - wY^2/N

      // no NAs
      if (wNA==0) {
        double selo = wYYlo[b] - wYlo[b] * wYlo[b] / wlo[b];
        double sehi = wYYhi[b] - wYhi[b] * wYhi[b] / whi[b];
        if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
        if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
        if ((selo + sehi < best_seL + best_seR) || // Strictly less error?
                // Or tied MSE, then pick split towards middle bins
                (selo + sehi == best_seL + best_seR &&
                        Math.abs(b - (nbins >> 1)) < Math.abs(best - (nbins >> 1)))) {
          double tmpPredLeft;
          double tmpPredRight;
          if(constraint != 0 && dist._family.equals(DistributionFamily.quantile)) { 
            int quantileBinLeft = 0;
            int quantileBinRight = 0;
            for (int bin = 1; bin <= nbins; bin++) {
              // left tree prediction quantile
              if (bin <= b) {
                double n = wlo[b];
                double quantilePosition = dist._quantileAlpha * n;
                if(quantilePosition < wlo[bin]){
                  quantileBinLeft = bin;
                  bin = b+1;
                }
                // right tree prediction quantile
              } else {
                double n = (wlo[nbins] - wlo[b]);
                double quantilePosition = dist._quantileAlpha * n;
                if (quantilePosition < wlo[bin] - wlo[b]) {
                  quantileBinRight = bin;
                  break;
                }
              }
            }
            tmpPredLeft = wYlo[quantileBinLeft];
            tmpPredRight = wYlo[quantileBinRight] - wYlo[b]; 
          } else {
            tmpPredLeft = hasDenom ? nomlo[b] / denlo[b] : wYlo[b] / wlo[b];
            tmpPredRight = hasDenom ? nomhi[b] / denhi[b] : wYhi[b] / whi[b];
          }
          if (constraint == 0 || (constraint * tmpPredLeft <= constraint * tmpPredRight)) {
            best_seL = selo;
            best_seR = sehi;
            best = b;
            nLeft = wlo[best];
            nRight = whi[best];
            predLeft = wYlo[best];
            predRight = wYhi[best];
            tree_p0 = tmpPredLeft;
            tree_p1 = tmpPredRight;
          }
        }
      } else {
        // option 1: split the numeric feature and throw NAs to the left
        {
          double selo = wYYlo[b] + wYYNA - (wYlo[b] + wYNA) * (wYlo[b] + wYNA) / (wlo[b] + wNA);
          double sehi = wYYhi[b] - wYhi[b] * wYhi[b] / whi[b];
          if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
          if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
          if ((selo + sehi < best_seL + best_seR) || // Strictly less error?
                  // Or tied SE, then pick split towards middle bins
                  (selo + sehi == best_seL + best_seR &&
                          Math.abs(b - (nbins >> 1)) < Math.abs(best - (nbins >> 1)))) {
            if((wlo[b] + wNA) >= min_rows && whi[b] >= min_rows) {
              double tmpPredLeft;
              double tmpPredRight;
              if(constraint != 0 && dist._family.equals(DistributionFamily.quantile)) {
                int quantileBinLeft = 0;
                int quantileBinRight = 0;
                for (int bin = 1; bin <= nbins; bin++) {
                  // left tree prediction quantile
                  if (bin <= b) {
                    double n = wlo[b];
                    double quantilePosition = dist._quantileAlpha * n;
                    if(quantilePosition < wlo[bin]){
                      quantileBinLeft = bin;
                      bin = b+1;
                    }
                    // right tree prediction quantile
                  } else {
                    double n = (wlo[nbins] - wlo[b]);
                    double quantilePosition = dist._quantileAlpha * n;
                    if (quantilePosition < wlo[bin] - wlo[b]) {
                      quantileBinRight = bin;
                      break;
                    }
                  }
                }
                tmpPredLeft = wYlo[quantileBinLeft] + wYNA;
                tmpPredRight = wYlo[quantileBinRight] - wYlo[b];
              } else { 
                tmpPredLeft = hasDenom ? (nomlo[b] + nomNA) / (denlo[b] + denNA) : (wYlo[b] + wYNA) / (wlo[b] + wNA);
                tmpPredRight = hasDenom ? nomhi[b] / denhi[b] : wYhi[b] / whi[b];
              }
              if (constraint == 0 || (constraint * tmpPredLeft <= constraint * tmpPredRight)) {
                best_seL = selo;
                best_seR = sehi;
                best = b;
                nLeft = wlo[best] + wNA;
                nRight = whi[best];
                predLeft = wYlo[best] + wYNA;
                predRight = wYhi[best];
                nasplit = DHistogram.NASplitDir.NALeft;
                tree_p0 = tmpPredLeft;
                tree_p1 = tmpPredRight;
              }
            }
          }
        }

        // option 2: split the numeric feature and throw NAs to the right
        {
          double selo = wYYlo[b] - wYlo[b] * wYlo[b] / wlo[b];
          double sehi = wYYhi[b]+wYYNA - (wYhi[b]+wYNA) * (wYhi[b]+wYNA) / (whi[b]+wNA);
          if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
          if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
          if ((selo + sehi < best_seL + best_seR) || // Strictly less error?
                  // Or tied SE, then pick split towards middle bins
                  (selo + sehi == best_seL + best_seR &&
                          Math.abs(b - (nbins >> 1)) < Math.abs(best - (nbins >> 1)))) {
            if( wlo[b] >= min_rows && (whi[b] + wNA) >= min_rows ) {
              double tmpPredLeft;
              double tmpPredRight;
              if(constraint != 0 && dist._family.equals(DistributionFamily.quantile)) {
                int quantileBinLeft = 0;
                int quantileBinRight = 0;
                double ratio = 1;
                double delta = 1;
                for (int bin = 1; bin <= nbins; bin++) {
                  // left tree prediction quantile
                  if (bin <= b) {
                    double n = wlo[b];
                    double quantilePosition = dist._quantileAlpha * n;
                    if(quantilePosition < wlo[bin]){
                      quantileBinLeft = bin;
                      bin = b+1;
                    }
                    // right tree prediction quantile
                  } else {
                    double n = (wlo[nbins] - wlo[b]);
                    double quantilePosition = dist._quantileAlpha * n;
                    if (quantilePosition < wlo[bin] - wlo[b]) {
                      quantileBinRight = bin;
                      break;
                    }
                  }
                }
                tmpPredLeft = wYlo[quantileBinLeft];
                tmpPredRight = wYlo[quantileBinRight] - wYlo[b] + wYNA;
              } else {
                tmpPredLeft = hasDenom ? nomlo[b] / denlo[b] : wYlo[b] / wlo[b];
                tmpPredRight = hasDenom ? (nomhi[b] + nomNA) / (denhi[b] + denNA) : (wYhi[b] + wYNA) / (whi[b] + wNA);
              }
              if (constraint == 0 || (constraint * tmpPredLeft <= constraint * tmpPredRight)) {
                best_seL = selo;
                best_seR = sehi;
                best = b;
                nLeft = wlo[best];
                nRight = whi[best] + wNA;
                predLeft = wYlo[best];
                predRight = wYhi[best] + wYNA;
                nasplit = DHistogram.NASplitDir.NARight;
                tree_p0 = tmpPredLeft;
                tree_p1 = tmpPredRight;
              }
            }
          }
        }
      }
    }

    if( best==0 && nasplit== DHistogram.NASplitDir.None) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": no optimal split found:\n" + hs);
      return null;
    }

    //if( se <= best_seL+best_se1) return null; // Ultimately roundoff error loses, and no split actually helped
    if (!(best_seL+ best_seR < seBefore * (1- hs._minSplitImprovement))) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": not enough relative improvement: " + (1-(best_seL + best_seR) / seBefore) + "\n" + hs);
      return null;
    }

    assert(Math.abs(tot - (nRight + nLeft)) < 1e-5*tot);

    if( MathUtils.equalsWithinOneSmallUlp((float)(predLeft / nLeft),(float)(predRight / nRight)) ) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": Predictions for left/right are the same.");
      return null;
    }

    if (nLeft < min_rows || nRight < min_rows) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": split would violate min_rows limit.");
      return null;
    }

    // FIXME (PUBDEV-7553): these asserts do not hold because histogram doesn't skip rows with NA response
    // assert hasNomin || nomLeft == predLeft;
    // assert hasNomin || nomRight == predRight;

    final double node_p0 = predLeft / nLeft;
    final double node_p1 = predRight / nRight;

    if (constraint != 0) {
      if (constraint * tree_p0 > constraint * tree_p1) {
        if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": split would violate monotone constraint.");
        return null;
      }
    }

    if (!Double.isNaN(min)) {
      if (tree_p0 < min) {
        if (! useBounds) {
          if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the left split of " + hs._name + ": node will not split");
          return null;
        }
        if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the left split of " + hs._name + ": left node will predict minimum bound: " + min);
        tree_p0 = min;
        if (nasplit == DHistogram.NASplitDir.NAvsREST) {
          best_seL = pr1hi[0];
        } else if (nasplit == DHistogram.NASplitDir.NALeft) {
          best_seL = pr1lo[best] + hs.seP1NA();
        } else {
          best_seL = pr1lo[best];
        }
      }
      if (tree_p1 < min) {
        if (! useBounds) {
          if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the right split of " + hs._name + ": node will not split");
          return null;
        }
        if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the right split of " + hs._name + ": right node will predict minimum bound: " + min);
        tree_p1 = min;
        if (nasplit == DHistogram.NASplitDir.NAvsREST) {
          best_seR = hs.seP1NA();
        } else if (nasplit == DHistogram.NASplitDir.NARight) {
          best_seR = pr1hi[best] + hs.seP1NA();
        } else {
          best_seR = pr1hi[best];
        }
      }
    }
    if (!Double.isNaN(max)) {
      if (tree_p0 > max) {
        if (! useBounds) {
          if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the left split of " + hs._name + ": node will not split");
          return null;
        }
        if (LOG.isTraceEnabled()) LOG.trace("maximum constraint violated in the left split of " + hs._name + ": left node will predict maximum bound: " + max);
        tree_p0 = max;
        if (nasplit == DHistogram.NASplitDir.NAvsREST) {
          best_seL = pr2hi[0];
        } else if (nasplit == DHistogram.NASplitDir.NALeft) {
          best_seL = pr2lo[best] + hs.seP2NA();
        } else {
          best_seL = pr2lo[best];
        }
      }
      if (tree_p1 > max) {
        if (! useBounds) {
          if (LOG.isTraceEnabled()) LOG.trace("minimum constraint violated in the right split of " + hs._name + ": node will not split");
          return null;
        }
        if (LOG.isTraceEnabled()) LOG.trace("maximum constraint violated in the right split of " + hs._name + ": right node will predict maximum bound: " + max);
        tree_p1 = max;
        if (nasplit == DHistogram.NASplitDir.NAvsREST) {
          best_seR = hs.seP2NA();
        } else if (nasplit == DHistogram.NASplitDir.NARight) {
          best_seR = pr2hi[best] + hs.seP2NA();
        } else {
          best_seR = pr2hi[best];
        }
      }
    }

    if (!(best_seL + best_seR < seBefore * (1- hs._minSplitImprovement))) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": not enough relative improvement: " + (1-(best_seL + best_seR) / seBefore) + "\n" + hs);
      return null;
    }

    if( MathUtils.equalsWithinOneSmallUlp((float) tree_p0,(float) tree_p1) ) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": Predictions for left/right are the same.");
      return null;
    }


    // For categorical (unordered) predictors, we sorted the bins by average
    // prediction then found the optimal split on sorted bins
    IcedBitSet bs = null;       // In case we need an arbitrary bitset
    if (idxs != null            // We sorted bins; need to build a bitset
            && nasplit != DHistogram.NASplitDir.NAvsREST) { // NA vs REST don't need a bitset
      final int off = (int) hs._min;
      bs = new IcedBitSet(nbins, off);
      equal = fillBitSet(hs, off, idxs, best, nbins, bs);
      if (equal < 0)
        return null;
    }

    // if still undecided (e.g., if there are no NAs in training), pick a good default direction for NAs in test time
    if (nasplit == DHistogram.NASplitDir.None) {
      nasplit = nLeft > nRight ? DHistogram.NASplitDir.Left : DHistogram.NASplitDir.Right;
    }
    
    assert constraint == 0 || constraint * tree_p0 <= constraint * tree_p1;
    assert (Double.isNaN(min) || min <= tree_p0) && (Double.isNaN(max) || tree_p0 <= max);
    assert (Double.isNaN(min) || min <= tree_p1) && (Double.isNaN(max) || tree_p1 <= max);

    Split split = new Split(col, best, nasplit, bs, equal, seBefore, best_seL, best_seR, nLeft, nRight, node_p0, node_p1, tree_p0, tree_p1);
    if (LOG.isTraceEnabled()) LOG.trace("splitting on " + hs._name + ": " + split);
    return split;
  }

  static Split findBestSplitPointUplift(DHistogram hs, int col, double min_rows) {
    if(hs._valsUplift == null) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": histogram not filled yet.");
      return null;
    }
    final int nbins = hs.nbins();
    assert nbins >= 1;

    final Divergence upliftMetric = hs._upliftMetric;

    // Histogram arrays used for splitting, these are either the original bins
    // (for an ordered predictor), or sorted by the mean response (for an
    // unordered predictor, i.e. categorical predictor).
    double[]   vals =   hs._vals;
    final int vals_dim = hs._vals_dim;
    double[] valsUplift = hs._valsUplift;
    final int valsUpliftDim = 4;
    int idxs[] = null;          // and a reverse index mapping

    // For categorical (unordered) predictors, sort the bins by average
    // prediction then look for an optimal split.
    if( hs._isInt == 2 && hs._step == 1 ) {
      // Sort the index by average response
      idxs = MemoryManager.malloc4(nbins+1); // Reverse index
      for( int i=0; i= 0; b-- ) {
      int id = vals_dim * b;
      double n0 = whi[b+1], n1 = vals[id];
      if( n0==0 && n1==0 )
        continue;
      double m0 =  wYhi[b + 1], m1 = vals[id + 1];
      double s0 = wYYhi[b + 1], s1 = vals[id + 2];
      whi[b] = n0+n1;
      wYhi[b] = m0+m1;
      wYYhi[b] = s0+s1;
      id = valsUpliftDim * b;
      double nt0 = numhiTreat[b + 1], nt1 = valsUplift[id];
      numhiTreat[b] = nt0 + nt1;
      double dt0 = resphiTreat[b + 1], dt1 = valsUplift[id + 1];
      resphiTreat[b] = dt0 + dt1;
      double nc0 = numhiContr[b + 1], nc1 = valsUplift[id + 2];
      numhiContr[b] = nc0 + nc1;
      double dc0 = resphiContr[b + 1], dc1 = valsUplift[id + 3];
      resphiContr[b] = dc0 + dc1;
      assert MathUtils.compare(wlo[b]+ whi[b]+wNA,tot,1e-5,1e-5);
    }

    double best_seL=Double.MAX_VALUE;   // squared error for left side of the best split (so far)
    double best_seR=Double.MAX_VALUE;   // squared error for right side of the best split (so far)
    DHistogram.NASplitDir nasplit = DHistogram.NASplitDir.None;

    // squared error of all non-NAs
    double seNonNA = wYYhi[0] - wYhi[0]* wYhi[0]/ whi[0]; // Squared Error with no split
    if (seNonNA < 0) seNonNA = 0;
    double seBefore = seNonNA;

    double nLeft = 0;
    double nRight = 0;
    double predLeft = 0;
    double predRight = 0;
    double tree_p0 = 0;
    double tree_p1 = 0;

    double  numTreatNA = hs.numTreatmentNA();
    double  respTreatNA = hs.respTreatmentNA();
    double  numContrNA = hs.numControlNA();
    double  respContrNA = hs.respControlNA();

    double bestNLCT1 = 0;
    double bestNLCT0 = 0;
    double bestNRCT1 = 0;
    double bestNRCT0 = 0;
    double bestPrLY1CT1 = 0;
    double bestPrLY1CT0 = 0;
    double bestPrRY1CT1 = 0;
    double bestPrRY1CT0 = 0;

    double nCT1 = numhiTreat[0];
    double nCT0 = numhiContr[0];
    double prY1CT1 = resphiTreat[0]/nCT1;
    double prY1CT0 = resphiContr[0]/nCT0;
    double bestUpliftGain = upliftMetric.node(prY1CT1 , prY1CT0);
    // if there are any NAs, then try to split them from the non-NAs
    if (wNA>=min_rows) {
      double prCT1All = (nCT1 + numTreatNA + 1)/(nCT0 + numContrNA + nCT1 + numTreatNA + 2);
      double prCT0All = 1-prCT1All;
      double prY1CT1All = (resphiTreat[0] + respTreatNA) / (nCT1 + numTreatNA);
      double prY1CT0All = (resphiContr[0] + respContrNA) / (nCT0 + numContrNA);
      double prLCT1 = (nCT1 + 1)/(nCT0 + nCT1 + 2);
      double prLCT0 = 1 - prLCT1;
      double prL = prLCT1 * prCT1All + prLCT0 * prCT0All;
      double prR = 1 - prL;
      double nLCT1 = numhiTreat[0];
      double nLCT0 = numhiContr[0];
      double prLY1CT1 = (resphiTreat[0] + 1) / (numhiTreat[0] + 2);
      double prLY1CT0 = (resphiContr[0] + 1) / (numhiContr[0] + 2);
      double nRCT1 = numTreatNA;
      double nRCT0 = numContrNA;
      double prRY1CT1 = (respTreatNA + 1) / (numTreatNA + 2);
      double prRY1CT0 = (respContrNA + 1) / (numContrNA + 2);
      bestUpliftGain = upliftMetric.value(prY1CT1All, prY1CT0All, prL, prLY1CT1, prLY1CT0, prR, prRY1CT1, prRY1CT0, prCT1All, prCT0All, prLCT1, prLCT0);
      bestNLCT1 = nLCT1;
      bestNLCT0 = nLCT0;
      bestNRCT1 = nRCT1;
      bestNRCT0 = nRCT0;
      bestPrLY1CT1 = prLY1CT1;
      bestPrLY1CT0 = prLY1CT0;
      bestPrRY1CT1 = prRY1CT1;
      bestPrRY1CT0 = prRY1CT0;
      double seAll = (wYYhi[0] + wYYNA) - (wYhi[0] + wYNA) * (wYhi[0] + wYNA) / (whi[0] + wNA);
      double seNA = wYYNA - wYNA * wYNA / wNA;
      if (seNA < 0) seNA = 0;
      best_seL = seNonNA;
      best_seR = seNA;
      nasplit = DHistogram.NASplitDir.NAvsREST;
      seBefore = seAll;
      nLeft = whi[0]; //all non-NAs
      predLeft = wYhi[0];
      nRight = wNA;
      predRight = wYNA;
      tree_p0 = predLeft / nLeft;
      tree_p1 = predRight / nRight;
    }

    // Now roll the split-point across the bins.  There are 2 ways to do this:
    // split left/right based on being less than some value, or being equal/
    // not-equal to some value.  Equal/not-equal makes sense for categoricals
    // but both splits could work for any integral datatype.  Do the less-than
    // splits first.
    int best=0;                         // The no-split
    byte equal=0;                       // Ranged check
    for( int b=1; b<=nbins-1; b++ ) {
      if( vals[vals_dim*b] == 0 ) continue; // Ignore empty splits
      if( wlo[b]+wNA < min_rows ) continue;
      if( whi[b]+wNA < min_rows ) break; // w1 shrinks at the higher bin#s, so if it fails once it fails always
      // We're making an unbiased estimator, so that MSE==Var.
      // Then Squared Error = MSE*N = Var*N
      //                    = (wYY/N - wY^2)*N
      //                    = wYY - N*wY^2
      //                    = wYY - N*(wY/N)(wY/N)
      //                    = wYY - wY^2/N

      // no NAs
      if (wNA==0) {
        double selo = wYYlo[b] - wYlo[b] * wYlo[b] / wlo[b];
        double sehi = wYYhi[b] - wYhi[b] * wYhi[b] / whi[b];
        if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
        if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
        nCT1 = numhiTreat[b];
        nCT0 = numhiContr[b];
        double prCT1 = (nCT1 + 1)/(nCT0 + nCT1 + 2);
        double prCT0 = 1-prCT1;
        double prLCT1 = (numloTreat[b] + 1)/(numloTreat[b] + numhiTreat[b] + 2);
        double prLCT0 = 1 - prLCT1;
        double prL = prLCT1 * prCT1 + prLCT0 * prCT0;
        double prR = 1 - prL;
        double nLCT1 = numloTreat[b];
        double nLCT0 = numloContr[b];
        double prLY1CT1 = (resploTreat[b] + 1) / (numloTreat[b] + 2);
        double prLY1CT0 = (resploContr[b] + 1) / (numloContr[b] + 2);
        double nRCT1 = numhiTreat[b];
        double nRCT0 = numhiContr[b];
        double prRY1CT1 = (resphiTreat[b] + 1) / (numhiTreat[b] + 2);
        double prRY1CT0 = (resphiContr[b] + 1) / (numhiContr[b] + 2);
        double upliftGain = upliftMetric.value(prY1CT1, prY1CT0, prL, prLY1CT1, prLY1CT0, prR, prRY1CT1, prRY1CT0, prCT1, prCT0, prLCT1, prLCT0);
        if (upliftGain > bestUpliftGain) {
          double tmpPredLeft =  wYlo[b] / wlo[b];
          double tmpPredRight =  wYhi[b] / whi[b];
          best_seL = selo;
          best_seR = sehi;
          best = b;
          nLeft = wlo[best];
          nRight = whi[best];
          predLeft = wYlo[best];
          predRight = wYhi[best];
          tree_p0 = tmpPredLeft;
          tree_p1 = tmpPredRight;
          bestUpliftGain = upliftGain;
          bestNLCT1 = nLCT1;
          bestNLCT0 = nLCT0;
          bestNRCT1 = nRCT1;
          bestNRCT0 = nRCT0;
          bestPrLY1CT1 = prLY1CT1;
          bestPrLY1CT0 = prLY1CT0;
          bestPrRY1CT1 = prRY1CT1;
          bestPrRY1CT0 = prRY1CT0;
        }
      } else {
        // option 1: split the numeric feature and throw NAs to the left
        {
          double selo = wYYlo[b] + wYYNA - (wYlo[b] + wYNA) * (wYlo[b] + wYNA) / (wlo[b] + wNA);
          double sehi = wYYhi[b] - wYhi[b] * wYhi[b] / whi[b];
          if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
          if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
          nCT1 = numhiTreat[b] + numTreatNA;
          nCT0 = numhiContr[b] + numContrNA;
          double prCT1 = (nCT1 + 1)/(nCT0 + nCT1 + 2);
          double prCT0 = 1 - prCT1;
          double prLCT1 = (numloTreat[b] + numTreatNA + 1)/(numloTreat[b] + numTreatNA + numhiTreat[b] + 2);
          double prLCT0 = 1 - prLCT1;
          double prL = prLCT1 * prCT1 + prLCT0 * prCT0;
          double prR = 1 - prL;
          double nLCT1 = numloTreat[b] + numTreatNA;
          double nLCT0 = numloContr[b] + numContrNA;
          double prLY1CT1 = (resploTreat[b] + respTreatNA + 1) / (numloTreat[b] + numTreatNA + 2);
          double prLY1CT0 = (resploContr[b] + respContrNA + 1) / (numloContr[b] + numContrNA + 2);
          double nRCT1 = numhiTreat[b];
          double nRCT0 = numhiContr[b];
          double prRY1CT1 = (resphiTreat[b] + 1) / (numhiTreat[b] + 2);
          double prRY1CT0 = (resphiContr[b] + 1) / (numhiContr[b] + 2);
          double upliftGain = upliftMetric.value(prY1CT1, prY1CT0, prL, prLY1CT1, prLY1CT0, prR, prRY1CT1, prRY1CT0, prCT1, prCT0, prLCT1, prLCT0);
          if (upliftGain > bestUpliftGain) {
            if((wlo[b] + wNA) >= min_rows && whi[b] >= min_rows) {
              double tmpPredLeft = (wYlo[b] + wYNA) / (wlo[b] + wNA);
              double tmpPredRight = wYhi[b] / whi[b];
              best_seL = selo;
              best_seR = sehi;
              best = b;
              nLeft = wlo[best] + wNA;
              nRight = whi[best];
              predLeft = wYlo[best] + wYNA;
              predRight = wYhi[best];
              nasplit = DHistogram.NASplitDir.NALeft;
              tree_p0 = tmpPredLeft;
              tree_p1 = tmpPredRight;
              bestUpliftGain = upliftGain;
              bestNLCT1 = nLCT1;
              bestNLCT0 = nLCT0;
              bestNRCT1 = nRCT1;
              bestNRCT0 = nRCT0;
              bestPrLY1CT1 = prLY1CT1;
              bestPrLY1CT0 = prLY1CT0;
              bestPrRY1CT1 = prRY1CT1;
              bestPrRY1CT0 = prRY1CT0;
            }
          }
        }

        // option 2: split the numeric feature and throw NAs to the right
        {
          double selo = wYYlo[b] - wYlo[b] * wYlo[b] / wlo[b];
          double sehi = wYYhi[b]+wYYNA - (wYhi[b]+wYNA) * (wYhi[b]+wYNA) / (whi[b]+wNA);
          if (selo < 0) selo = 0;    // Roundoff error; sometimes goes negative
          if (sehi < 0) sehi = 0;    // Roundoff error; sometimes goes negative
          nCT1 = numhiTreat[b] + numTreatNA;
          nCT0 = numhiContr[b] + numContrNA;
          double prCT1 = (nCT1 + 1)/(nCT0 + nCT1 + 2);
          double prCT0 = 1 - prCT1;
          double prLCT1 = (numloTreat[b] + 1)/(numloTreat[b] + numhiTreat[0] + numTreatNA + 2);
          double prLCT0 = 1 - prLCT1;
          double prL = prLCT1 * prCT1 + prLCT0 * prCT0;
          double prR = 1 - prL;
          double nLCT1 = numloTreat[b];
          double nLCT0 = numloContr[b];
          double prLY1CT1 = (resploTreat[b] + respTreatNA + 1) / (numloTreat[b] + 2);
          double prLY1CT0 = (resploContr[b] + respContrNA + 1) / (numloContr[b] + 2);
          double nRCT1 = numhiTreat[b] + numTreatNA;
          double nRCT0 = numhiContr[b] + numContrNA;
          double prRY1CT1 = (resphiTreat[b] + 1) / (numhiTreat[b] + numTreatNA + 2);
          double prRY1CT0 = (resphiContr[b] + 1) / (numhiContr[b] + numContrNA + 2);
          double upliftGain = upliftMetric.value(prY1CT1, prY1CT0, prL, prLY1CT1, prLY1CT0, prR, prRY1CT1, prRY1CT0, prCT1, prCT0, prLCT1, prLCT0);
          if (upliftGain > bestUpliftGain) {
            if( wlo[b] >= min_rows && (whi[b] + wNA) >= min_rows ) {
              double tmpPredLeft = wYlo[b] / wlo[b];
              double tmpPredRight = (wYhi[b] + wYNA) / (whi[b] + wNA);
              best_seL = selo;
              best_seR = sehi;
              best = b;
              nLeft = wlo[best];
              nRight = whi[best] + wNA;
              predLeft = wYlo[best];
              predRight = wYhi[best] + wYNA;
              nasplit = DHistogram.NASplitDir.NARight;
              tree_p0 = tmpPredLeft;
              tree_p1 = tmpPredRight;
              bestNLCT1 = nLCT1;
              bestNLCT0 = nLCT0;
              bestNRCT1 = nRCT1;
              bestNRCT0 = nRCT0;
              bestPrLY1CT1 = prLY1CT1;
              bestPrLY1CT0 = prLY1CT0;
              bestPrRY1CT1 = prRY1CT1;
              bestPrRY1CT0 = prRY1CT0;
            }
          }
        }
      }
    }

    if( best==0 && nasplit== DHistogram.NASplitDir.None) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": no optimal split found:\n" + hs);
      return null;
    }

    if (!(best_seL+ best_seR < seBefore * (1- hs._minSplitImprovement))) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": not enough relative improvement: " + (1-(best_seL + best_seR) / seBefore) + "\n" + hs);
      return null;
    }

    assert(Math.abs(tot - (nRight + nLeft)) < 1e-5*tot);

    if( MathUtils.equalsWithinOneSmallUlp((float)(predLeft / nLeft),(float)(predRight / nRight)) ) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": Predictions for left/right are the same.");
      return null;
    }

    if (nLeft < min_rows || nRight < min_rows) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": split would violate min_rows limit.");
      return null;
    }

    final double node_p0 = predLeft / nLeft;
    final double node_p1 = predRight / nRight;

    if( MathUtils.equalsWithinOneSmallUlp((float) tree_p0,(float) tree_p1) ) {
      if (LOG.isTraceEnabled()) LOG.trace("can't split " + hs._name + ": Predictions for left/right are the same.");
      return null;
    }
    
    // For categorical (unordered) predictors, we sorted the bins by average
    // prediction then found the optimal split on sorted bins
    IcedBitSet bs = null;       // In case we need an arbitrary bitset
    if( idxs != null ) {        // We sorted bins; need to build a bitset
      final int off = (int) hs._min;
      bs = new IcedBitSet(nbins, off);
      equal = fillBitSet(hs, off, idxs, best, nbins, bs);
      if (equal < 0)
        return null;
    }

    // if still undecided (e.g., if there are no NAs in training), pick a good default direction for NAs in test time
    if (nasplit == DHistogram.NASplitDir.None) {
      nasplit = nLeft > nRight ? DHistogram.NASplitDir.Left : DHistogram.NASplitDir.Right;
    }

    Split split = new Split(col, best, nasplit, bs, equal, seBefore, best_seL, best_seR, nLeft, nRight, node_p0, node_p1, tree_p0, tree_p1, bestPrLY1CT1, bestPrLY1CT0, bestPrRY1CT1, bestPrRY1CT0, bestNLCT1, bestNLCT0, bestNRCT1, bestNRCT0);
    if (LOG.isTraceEnabled()) LOG.trace("splitting on " + hs._name + ": " + split);
    return split;
  }

  private static byte fillBitSet(DHistogram hs, int off, int[] idxs, int best, int nbins, IcedBitSet bs) {
    for( int i=best; i 0) {
        if (bs.contains(i + off))
          nonEmptyThatWentRight++;
        else
          nonEmptyThatWentLeft++;
      }
    }
    boolean shouldGoLeft = nonEmptyThatWentLeft >= nonEmptyThatWentRight;
    for (int i=0; i