hex.aggregator.AggregatorModel Maven / Gradle / Ivy
package hex.aggregator;
import hex.*;
import hex.pca.PCAModel;
import hex.util.LinearAlgebraUtils;
import water.*;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.udf.CFuncRef;
import water.util.ArrayUtils;
import water.util.FrameUtils;
import water.util.VecUtils;
import java.util.Arrays;
public class AggregatorModel extends Model implements Model.ExemplarMembers {
@Override
public ToEigenVec getToEigenVec() {
return LinearAlgebraUtils.toEigen;
}
public static class AggregatorParameters extends Model.Parameters {
public String algoName() { return "Aggregator"; }
public String fullName() { return "Aggregator"; }
public String javaName() { return AggregatorModel.class.getName(); }
@Override public long progressUnits() { return 5 + 2*train().anyVec().nChunks() - 1; } // nChunks maps and nChunks-1 reduces, multiply by two for main job overhead
//public double _radius_scale=1.0;
// public int _max_iterations = 1000; // Max iterations for SVD
public DataInfo.TransformType _transform = DataInfo.TransformType.NORMALIZE; // Data transformation
public PCAModel.PCAParameters.Method _pca_method = PCAModel.PCAParameters.Method.Power; // Method for dimensionality reduction
public int _k = 1; // Number of principal components
public int _target_num_exemplars = 5000;
public double _rel_tol_num_exemplars = 0.5;
public boolean _use_all_factor_levels = false; // When expanding categoricals, should first level be kept or dropped?
public boolean _save_mapping_frame = false;
public int _num_iteration_without_new_exemplar = 500;
}
public static class AggregatorOutput extends Model.Output {
public AggregatorOutput(Aggregator b) { super(b); }
@Override public int nfeatures() { return _output_frame.get().numCols()-1/*counts*/; }
@Override public ModelCategory getModelCategory() { return ModelCategory.Clustering; }
public Key _output_frame;
public Key _mapping_frame;
}
public Aggregator.Exemplar[] _exemplars;
public long[] _counts;
public Key _exemplar_assignment_vec_key;
public AggregatorModel(Key selfKey, AggregatorParameters parms, AggregatorOutput output) {
super(selfKey,parms,output);
}
@Override
protected PredictScoreResult predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics, CFuncRef customMetricFunc) {
return new PredictScoreResult(null, null, null);
}
@Override
protected Futures remove_impl(Futures fs, boolean cascade) {
Keyed.remove(_exemplar_assignment_vec_key);
return super.remove_impl(fs, cascade);
}
@Override
public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) {
return null;
}
@Override
protected double[] score0(double[] data, double[] preds) {
return preds;
}
public Frame createFrameOfExemplars(Frame orig, Key destination_key) {
final long[] keep = new long[_exemplars.length];
for (int i=0;i= c2.start()+c2._len) continue;
c2.set((int)(keep[i]-c2.start()), 1);
}
}
}.doAll(new Frame(new Vec[]{exAssignment.makeZero()}))._fr.vec(0);
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length+1);
vecs[vecs.length-1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(),ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
booleanCol.remove();
assert(res.numRows()==_exemplars.length);
Vec cnts = res.anyVec().makeZero();
Vec.Writer vw = cnts.open();
for (int i=0;i<_counts.length;++i)
vw.set(i, _counts[i]);
vw.close();
res.add("counts", cnts);
DKV.put(destination_key, res);
return res;
}
public Frame createMappingOfExemplars(Key destinationKey){
final long[] keep = MemoryManager.malloc8(_exemplars.length);
for (int i=0;i destination_key, final int exemplarIdx) {
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i=0;i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy