hex.svd.SVDModel Maven / Gradle / Ivy
package hex.svd;
import hex.*;
import water.DKV;
import water.Key;
import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
public class SVDModel extends Model {
public static class SVDParameters extends Model.Parameters {
public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA)
public int _nv = 1; // Number of right singular vectors to calculate
public int _max_iterations = 1000; // Maximum number of iterations
public long _seed = System.nanoTime(); // RNG seed
public boolean _keep_u = true; // Should left singular vectors be saved in memory? (Only applies if _only_v = false)
// public Key _u_key; // Frame key for left singular vectors (U)
public String _u_name;
public boolean _only_v = false; // Compute only right singular vectors? (Faster if true)
public boolean _use_all_factor_levels = true; // When expanding categoricals, should first level be dropped?
}
public static class SVDOutput extends Model.Output {
// Right singular vectors (V)
public double[][] _v;
// Singular values (diagonal of D)
public double[] _d;
// Frame key for left singular vectors (U)
public Key _u_key;
// Number of categorical and numeric columns
public int _ncats;
public int _nnums;
// Number of good rows in training frame (not skipped)
public long _nobs;
// Categorical offset vector
public int[] _catOffsets;
// If standardized, mean of each numeric data column
public double[] _normSub;
// If standardized, one over standard deviation of each numeric data column
public double[] _normMul;
// Permutation matrix mapping training col indices to adaptedFrame
public int[] _permutation;
// Expanded column names of adapted training frame
public String[] _names_expanded;
public SVDOutput(SVD b) { super(b); }
@Override public ModelCategory getModelCategory() { return ModelCategory.DimReduction; }
}
public SVDModel(Key selfKey, SVDParameters parms, SVDOutput output) { super(selfKey,parms,output); }
@Override public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) {
return new ModelMetricsSVD.SVDModelMetrics(_parms._nv);
}
public static class ModelMetricsSVD extends ModelMetricsUnsupervised {
public ModelMetricsSVD(Model model, Frame frame) {
super(model, frame, Double.NaN);
}
// SVD currently does not have any model metrics to compute during scoring
public static class SVDModelMetrics extends MetricBuilderUnsupervised {
public SVDModelMetrics(int dims) {
_work = new double[dims];
}
@Override public double[] perRow(double[] dataRow, float[] preds, Model m) { return dataRow; }
@Override public ModelMetrics makeModelMetrics(Model m, Frame f, double sigma) {
return m._output.addModelMetrics(new ModelMetricsSVD(m, f));
}
}
}
@Override protected Frame scoreImpl(Frame orig, Frame adaptedFr, String destination_key) {
Frame adaptFrm = new Frame(adaptedFr);
for(int i = 0; i < _parms._nv; i++)
adaptFrm.add("PC"+String.valueOf(i+1),adaptFrm.anyVec().makeZero());
new MRTask() {
@Override public void map( Chunk chks[] ) {
double tmp [] = new double[_output._names.length];
double preds[] = new double[_parms._nv];
for( int row = 0; row < chks[0]._len; row++) {
double p[] = score0(chks, row, tmp, preds);
for( int c=0; c= _output._catOffsets[j+1]) continue; // Skip categorical levels not in training frame
preds[i] += _output._v[_output._catOffsets[j]+level][i];
}
int dcol = _output._ncats;
int vcol = numStart;
for (int j = 0; j < _output._nnums; j++) {
preds[i] += (data[_output._permutation[dcol]] - _output._normSub[j]) * _output._normMul[j] * _output._v[vcol][i];
dcol++; vcol++;
}
}
return preds;
}
@Override public Frame score(Frame fr, String destination_key) {
Frame adaptFr = new Frame(fr);
adaptTestForTrain(adaptFr, true, false); // Adapt
Frame output = scoreImpl(fr, adaptFr, destination_key); // Score
cleanup_adapt( adaptFr, fr );
return output;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy