All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.tree.Sample Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.tree;

import water.MRTask;
import water.fvec.C4VolatileChunk;
import water.fvec.Chunk;
import water.util.RandomUtils;

import java.util.Random;

// Deterministic sampling
public class Sample extends MRTask {
  final long _seed;
  final int _useMarker;
  final int _ignoreMarker;
  final double _rate;
  final double[] _rate_per_class;

  public Sample(DTree tree, double rate, double[] rate_per_class) {
    this(tree._seed, rate, rate_per_class, 0, ScoreBuildHistogram.OUT_OF_BAG);
  }

  public Sample(long seed, double rate, double[] rate_per_class, int useMarker,int ignoreMarker) {
    _seed = seed;
    _useMarker = useMarker;
    _ignoreMarker = ignoreMarker;
    _rate = rate;
    _rate_per_class = rate_per_class;
  }

  @Override
  public void map(Chunk nids, Chunk ys) {
    C4VolatileChunk nids2 = (C4VolatileChunk) nids;
    Random rand = RandomUtils.getRNG(_seed);
    int [] is = nids2.getValues();
    for (int row = 0; row < nids._len; row++) {
      boolean skip = ys.isNA(row);
      if (!skip) {
        double rate = _rate_per_class==null ? _rate : _rate_per_class[(int)ys.at8(row)];
        rand.setSeed(_seed + row + nids.start()); //seeding is independent of chunking
        skip = rand.nextFloat() >= rate; //float is good enough, half as much cost
      }
      if (skip) is[row] = _ignoreMarker;     // Flag row as being ignored by sampling
      else if (_useMarker != 0) is[row] = _useMarker;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy