All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.deeplearning.DeepLearningModelInfo Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.deeplearning;

import hex.DataInfo;
import hex.genmodel.utils.DistributionFamily;
import static java.lang.Double.isNaN;

import hex.Model;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import water.*;
import water.fvec.Frame;
import water.util.*;

import java.util.Arrays;
import java.util.Random;


/**
 * This class contains the state of the Deep Learning model
 * This will be shared: one per node
 */
final public class DeepLearningModelInfo extends Iced {

  public TwoDimTable summaryTable;

  public DataInfo data_info;
  public DataInfo data_info() {
    return data_info;
  }

  // model is described by parameters and the following arrays
  private Storage.DenseRowMatrix[] dense_row_weights; //one 2D weight matrix per layer (stored as a 1D array each)
  private Storage.DenseVector[] biases; //one 1D bias array per layer
  private Storage.DenseVector[] avg_activations; //one 1D array per hidden layer

  // helpers for storing previous step deltas
  // Note: These two arrays *could* be made transient and then initialized freshly in makeNeurons() and in DeepLearningTask.initLocal()
  // But then, after each reduction, the weights would be lost and would have to restart afresh -> not *exactly* right, but close...
  private Storage.DenseRowMatrix[] dense_row_weights_momenta;
  private Storage.DenseVector[] biases_momenta;

  // helpers for AdaDelta
  private Storage.DenseRowMatrix[] dense_row_ada_dx_g;
  private Storage.DenseVector[] biases_ada_dx_g;

  private boolean[] _saw_missing_cats;  // whether missing value was encountered for each categorical predictor - needed for varimp

  // compute model size (number of model parameters required for making predictions)
  // momenta are not counted here, but they are needed for model building
  public long size() {
    long siz = 0;
    for (Storage.DenseRowMatrix w : dense_row_weights) if (w != null) siz += w.size();
    for (Storage.Vector b : biases) siz += b.size();
    return siz;
  }

  /**
   * Check whether a missing value was found for every categorical predictor
   * @param cats activation of categorical buckets for a given row
   */
  void checkMissingCats(int[] cats)  {
    if (cats == null) return;
    if (_saw_missing_cats == null) return;
    for (int i=0; i _model_id;
  public final DeepLearningParameters get_params() { return parameters; }
  public final void set_params(DeepLearningParameters p, Key model_id ) {
    parameters = (DeepLearningParameters) p.clone();
    _model_id = model_id;
  }

  private double[] mean_rate;
  private double[] rms_rate;
  private double[] mean_bias;
  private double[] rms_bias;
  private double[] mean_weight;
  public double[] rms_weight;
  public double[] mean_a;

  private volatile boolean unstable = false;
  public boolean isUnstable() { return unstable; }
  public void setUnstable() {
    if (!unstable) computeStats();
    unstable = true;
  }

  private long processed_global;
  public synchronized long get_processed_global() { return processed_global; }
  public synchronized void set_processed_global(long p) { processed_global = p; }
  public synchronized void add_processed_global(long p) { processed_global += p; }
  private long processed_local;
  public synchronized long get_processed_local() { return processed_local; }
  public synchronized void set_processed_local(long p) { processed_local = p; }
  public synchronized void add_processed_local(long p) { processed_local += p; }
  public synchronized long get_processed_total() { return processed_global + processed_local; }

  // package local helpers
  int[] units; //number of neurons per layer, extracted from parameters and from datainfo

  final boolean _classification; // Classification cache (nclasses>1)
  final Frame _train;         // Prepared training frame
  final Frame _valid;         // Prepared validation frame

  /**
   * Dummy constructor, only to be used for deserialization from autobuffer
   */
  private DeepLearningModelInfo() {
    super(); // key is null
    _classification = false;
    _train = _valid = null;
  }

  /**
   * Main constructor
   * @param params Model parameters
   * @param dinfo Data Info
   * @param nClasses number of classes (1 for regression, 0 for autoencoder)
   * @param train User-given training data frame, prepared by AdaptTestTrain
   * @param valid User-specified validation data frame, prepared by AdaptTestTrain
   */
  public DeepLearningModelInfo(final DeepLearningParameters params, Key model_id, final DataInfo dinfo, int nClasses, Frame train, Frame valid) {
    _classification = nClasses > 1;
    _train = train;
    _valid = valid;
    data_info = dinfo;
    parameters = (DeepLearningParameters) params.clone(); //make a copy, don't change model's parameters
    _model_id = model_id;
    DeepLearningParameters.Sanity.modifyParms(parameters, parameters, nClasses); //sanitize the model_info's parameters

    final int num_input = dinfo.fullN();
    final int num_output = get_params()._autoencoder ? num_input :
            (_classification && parameters._distribution != DistributionFamily.modified_huber ? train.vec(parameters._response_column).cardinality() : 1);
    if (!get_params()._autoencoder) assert(num_output == nClasses || parameters._distribution == DistributionFamily.modified_huber );

    _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null;
    assert (num_input > 0);
    assert (num_output > 0);
    if (has_momenta() && adaDelta())
      throw new IllegalArgumentException("Cannot have non-zero momentum and adaptive rate at the same time.");
    final int layers = get_params()._hidden.length;
    // units (# neurons for each layer)
    units = new int[layers + 2];
    if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
      units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input);
    else
      units[0] = num_input;
    System.arraycopy(get_params()._hidden, 0, units, 1, layers);
    units[layers + 1] = num_output;

    boolean printLevels = units[0] > 1000L;
    boolean warn = units[0] > 100000L;
    if (printLevels) {
      final String[][] domains = dinfo._adaptedFrame.domains();
      if (warn) {
        Log.warn("===================================================================================================================================");
        Log.warn(num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory.");
      }
      FrameUtils.printTopCategoricalLevels(dinfo._adaptedFrame, warn, 10);
      if (warn) {
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(" *) Limit the total number of one-hot encoded features by setting 'categorical_encoding=\"enum_limited\"'");
          Log.warn(" *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features' (experimental)");
          Log.warn(" *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/deep-learning.html#faq");
        }
        Log.warn("===================================================================================================================================");
      }
    }

    int[] mult = new int[layers + 1];
    for (int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy