hex.tree.Sample Maven / Gradle / Ivy
package hex.tree;
import water.MRTask;
import water.fvec.C4VolatileChunk;
import water.fvec.Chunk;
import water.util.RandomUtils;
import java.util.Random;
// Deterministic sampling
public class Sample extends MRTask {
final DTree _tree;
final double _rate;
final double[] _rate_per_class;
public Sample(DTree tree, double rate, double[] rate_per_class) {
_tree = tree;
_rate = rate;
_rate_per_class = rate_per_class;
}
@Override
public void map(Chunk nids, Chunk ys) {
C4VolatileChunk nids2 = (C4VolatileChunk) nids;
Random rand = RandomUtils.getRNG(_tree._seed);
int [] is = nids2.getValues();
for (int row = 0; row < nids._len; row++) {
boolean skip = ys.isNA(row);
if (!skip) {
double rate = _rate_per_class==null ? _rate : _rate_per_class[(int)ys.at8(row)];
rand.setSeed(_tree._seed + row + nids.start()); //seeding is independent of chunking
skip = rand.nextFloat() >= rate; //float is good enough, half as much cost
}
if (skip) is[row] = ScoreBuildHistogram.OUT_OF_BAG; // Flag row as being ignored by sampling
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy