All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.rapids.ASTQPFPC Maven / Gradle / Ivy

package water.rapids;

import hex.quantile.Quantile;
import hex.quantile.QuantileModel;
import jsr166y.CountedCompleter;
import water.*;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.util.ArrayUtils;

/**
 * Created by spencer on 5/2/15.
 *
 * Quantile Per Feature Per Column
 */
public class ASTQPFPC extends ASTUniPrefixOp {
  int _cidx;  // the class index
  double _probs[];
  // ast => (qpfpc %fr #c.idx (dlist ...))
  @Override String opStr() { return "qpfpc"; }
  @Override ASTOp make() {return new ASTQPFPC(); }
  ASTQPFPC parse_impl(Exec E) {
    AST ary = E.parse();
    _cidx = (int)E.nextDbl();
    AST a = E.parse();
    if( a instanceof ASTDoubleList ) _probs = ((ASTDoubleList)a)._d;
    else if( a instanceof ASTNum )   _probs = new double[]{((ASTNum)a)._d};
    else _probs=null;
    E.eatEnd();

    ASTQPFPC res = (ASTQPFPC)clone();
    res._probs = _probs;
    res._asts = new AST[]{ary};
    return res;
  }
  @Override void apply(Env e) {
    // For each Vec, subset to class ID and compute quantiles
    // Blow out if Vec is not numeric.
    Frame f = e.popAry();
    Vec classVec = f.vecs()[_cidx];
    for( Vec v: f.vecs() ) {
      if( v==classVec ) continue;
      if(!v.isNumeric() ) throw new IllegalArgumentException("Columns must be numeric!");
    }
    // must unfortunately create the frames -- could be painful data copy move... TODO: make quantile accept conditionals
    final int nclasses = classVec.domain().length;
    long[] selectValues = new long[nclasses];
    for(int i=0;i {
    final long _selectValue;
    final Frame _fr;
    final Key _key;

    SubsetTask(H2O.H2OCountedCompleter cc, long s, Frame fr, Key key) { super(cc); _selectValue = s; _fr = fr; _key = key; }
    @Override protected void compute2() {
      Frame f = new MRTask() {
        @Override public void map(Chunk[] c, NewChunk nc) {
          for (int i = 0; i < c[0]._len; ++i)
            if (c[1].at8(i) == _selectValue) nc.addNum(c[0].atd(i));
        }
      }.doAll(1, _fr, false/*run_local*/).outputFrame(_key, new String[]{_fr.names()[0]}, null);
      DKV.put(_key,f);
      tryComplete();
    }
  }

  private static class ParallelSubsets extends H2O.H2OCountedCompleter {
    // runs over each class for the given Vec and creates the subset.
    // TODO: call back to quantile computation and finally return the result up the chain...

    // IN
    final Vec _feature;           // the feature vector
    final Vec _class;             // the class vector
    final long[] _selectValues;   // which value to subset on
    final Key[] _frameKeys;       // array of Keys for the subsetted Frames
    final double _probs[];
    final String[] _classLabels;
    final String _colLabel;

    // OUT
    double[/*classes*/][/*probs*/] _dist; // results for this feature over the set of classes
    String[] _rownames;  // row labels of the form "_"

    ParallelSubsets(H2O.H2OCountedCompleter cc, Vec f, Vec c, long[] selectValues, Key[] frameKeys, double[] probs, String classLabels[], String colLabel) {
      super(cc); _feature=f; _class=c; _selectValues=selectValues; _frameKeys=frameKeys; _probs=probs; _dist = new double[_selectValues.length][_probs.length];
      _classLabels=classLabels; _colLabel=colLabel; _rownames=new String[_selectValues.length];
    }

    @Override protected void compute2() {
      addToPendingCount(2*_selectValues.length-1);
      int i=0;
      for( long l:_selectValues ) {
        Frame f = new Frame(_feature, _class);
        _rownames[i]=_classLabels[i]+"_"+_colLabel;
        new SubsetTask(new QuantileCallback(i), l, f, _frameKeys[i++]).fork();
      }
    }

    private class QuantileCallback extends H2O.H2OCallback {
      int _i;
      Quantile.QTask _q;
      public QuantileCallback(int i) { super(ParallelSubsets.this); _i=i; }
      @Override public void callback(H2O.H2OCountedCompleter cc) {
        // launch Quantile task on ith _frameKey
        QuantileModel.QuantileParameters parms = new QuantileModel.QuantileParameters();
        parms._probs = _probs;
        parms._train = _frameKeys[_i];
        _q = new Quantile.QTask(new QuantileCallback2(_i,this), _probs, (Frame)DKV.getGet(_frameKeys[_i]), QuantileModel.CombineMethod.INTERPOLATE);
        _q.fork();
      }
    }

    private class QuantileCallback2 extends H2O.H2OCallback {
      int _i;
      QuantileCallback _q;
      public QuantileCallback2(int i, QuantileCallback q) { super(ParallelSubsets.this); _i = i; _q=q; }
      @Override public void callback(H2O.H2OCountedCompleter cc) {
        _dist[_i] = _q._q._quantiles[0];
        Keyed.remove(_frameKeys[_i]);  // toss the subset frame
      }
    }
  }

  private static class ParallelSubsetTask extends H2O.H2OCountedCompleter {
    // launch a SubsetMRTask for each  pair.
    // On callback, toss into the Quantile computation

    // IN
    final Vec[] _features;
    final Vec   _classes;
    final long[] _selectValues;
    final double[] _probs;
    final String[] _colLabels;
    final String[] _classLabels;

    // OUT:
    double[/*features*/][/*classes*/][/*probs*/] _dist; // limit length of probs to at most 100 so that results can easily fit into a long[][][], with which a Frame will be made. Should be fine for most cases (5000 features, 500 classes ~ 2GBs); for 100 features, 20 classes ~ 16 MBs
    String[][] _rownames;
    Key[] _fkeys;
    ParallelSubsets[] _ps;

    ParallelSubsetTask(Vec[] features, Vec classes, long[] selectValues, double[] probs, String[] colLabels, String[] classLabels) {
      _features=features; _classes=classes; _selectValues=selectValues; _probs=probs;
      _dist=new double[features.length][][]; _colLabels=colLabels; _classLabels=classLabels; _rownames=new String[features.length][];
    }

    @Override protected void compute2() {
      addToPendingCount(_features.length-1);
      // loop over Vecs
      _ps=new ParallelSubsets[_features.length];
      Key[][] fkeys = new Key[_features.length][_selectValues.length];
      for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy