hex.deeplearning.DeepLearningTask Maven / Gradle / Ivy

Go to download
package hex.deeplearning;

import hex.FrameTask;
import water.H2O;
import water.H2O.H2OCountedCompleter;
import water.Key;
import water.util.Log;

import java.util.Arrays;
import java.util.Random;

public class DeepLearningTask extends FrameTask {
  final private boolean _training;
  private hex.deeplearning.DeepLearningModel.DeepLearningModelInfo _input;
  hex.deeplearning.DeepLearningModel.DeepLearningModelInfo _output;
  final public hex.deeplearning.DeepLearningModel.DeepLearningModelInfo model_info() { return _output; }

  transient Neurons[] _neurons;

  int _chunk_node_count = 1;

  public DeepLearningTask(Key jobKey, hex.deeplearning.DeepLearningModel.DeepLearningModelInfo input, float fraction){this(jobKey, input,fraction,null);}
  private DeepLearningTask(Key jobKey, hex.deeplearning.DeepLearningModel.DeepLearningModelInfo input, float fraction, H2OCountedCompleter cmp){
    super(jobKey,input.data_info(),cmp);
    _training=true;
    _input=input;
    _useFraction=fraction;
    _shuffle = _input.get_params()._shuffle_training_data;
    assert(_output == null);
  }

  // transfer ownership from input to output (which will be worked on)
  @Override protected void setupLocal(){
    super.setupLocal();
    _output = _input; //faster, good enough in this case (since the input was freshly deserialized by the Weaver)
    _input = null;
    _output.set_processed_local(0l);
  }

  // create local workspace (neurons)
  // and link them to shared weights
  @Override protected void chunkInit(){
    _neurons = makeNeuronsForTraining(_output);
  }

  @Override public final void processRow(long seed, final double [] nums, final int numcats, final int [] cats, double [] responses){
    if (model_info().get_params()._reproducible) {
      seed += model_info().get_processed_global(); //avoid periodicity
    } else {
      seed = new Random().nextLong();
    }
    ((Neurons.Input)_neurons[0]).setInput(seed, nums, numcats, cats);
    step(seed, _neurons, _output, _training, responses);
  }

  @Override protected void chunkDone(long n) {
    if (_training) _output.add_processed_local(n);
  }

  @Override public void reduce(DeepLearningTask other){
    if (other._output.get_processed_local() > 0 //other NNTask was active (its model_info should be used for averaging)
            && other._output != _output) //other NNTask worked on a different model_info
    {
      // avoid adding remote model info to unprocessed local data, still random
      // (this can happen if we have no chunks on the master node)
      if (_output.get_processed_local() == 0) {
        _output = other._output;
        _chunk_node_count = other._chunk_node_count;
      } else {
        _output.add(other._output);
        _chunk_node_count += other._chunk_node_count;
      }
    }
    if (other._output.unstable()) _output.set_unstable();
  }

  static long _lastWarn;
  static long _warnCount;
  @Override protected void postGlobal(){
    if (H2O.CLOUD.size() > 1 && !_output.get_params()._replicate_training_data) {
      long now = System.currentTimeMillis();
      if (_chunk_node_count < H2O.CLOUD.size() && (now - _lastWarn > 5000) && _warnCount < 3) {
//        Log.info("Synchronizing across " + _chunk_node_count + " H2O node(s).");
        Log.warn(H2O.CLOUD.size() - _chunk_node_count + " node(s) (out of " + H2O.CLOUD.size()
                + ") are not contributing to model updates. Consider setting replicate_training_data to true or using a larger training dataset (or fewer H2O nodes).");
        _lastWarn = now;
        _warnCount++;
      }
    }
    if (!_output.get_params()._replicate_training_data || H2O.CLOUD.size() == 1) {
      _output.div(_chunk_node_count);
      _output.add_processed_global(_output.get_processed_local());
      _output.set_processed_local(0l);
    }
    assert(_input == null);
  }

  public static Neurons[] makeNeuronsForTraining(final DeepLearningModel.DeepLearningModelInfo minfo) {
    return makeNeurons(minfo, true);
  }
  public static Neurons[] makeNeuronsForTesting(final DeepLearningModel.DeepLearningModelInfo minfo) {
    return makeNeurons(minfo, false);
  }

  // Helper
  private static Neurons[] makeNeurons(final DeepLearningModel.DeepLearningModelInfo minfo, boolean training) {
    DataInfo dinfo = minfo.data_info();
    final DeepLearningModel.DeepLearningParameters params = minfo.get_params();
    final int[] h = params._hidden;
    Neurons[] neurons = new Neurons[h.length + 2]; // input + hidden + output
    // input
    neurons[0] = new Neurons.Input(dinfo.fullN(), dinfo);
    // hidden
    for( int i = 0; i < h.length; i++ ) {
      switch( params._activation) {
        case Tanh:
          neurons[i+1] = new Neurons.Tanh(h[i]);
          break;
        case TanhWithDropout:
          neurons[i+1] = new Neurons.TanhDropout(h[i]);
          break;
        case Rectifier:
          neurons[i+1] = new Neurons.Rectifier(h[i]);
          break;
        case RectifierWithDropout:
          neurons[i+1] = new Neurons.RectifierDropout(h[i]);
          break;
        case Maxout:
          neurons[i+1] = new Neurons.Maxout(h[i]);
          break;
        case MaxoutWithDropout:
          neurons[i+1] = new Neurons.MaxoutDropout(h[i]);
          break;
      }
    }
    // output
    if(minfo._classification)
      neurons[neurons.length - 1] = new Neurons.Softmax(minfo.units[minfo.units.length-1]);
    else
      neurons[neurons.length - 1] = new Neurons.Linear(1);

    //copy parameters from NN, and set previous/input layer links
    for( int i = 0; i < neurons.length; i++ )
      neurons[i].init(neurons, i, params, minfo, training);

//    // debugging
//    for (Neurons n : neurons) Log.info(n.toString());
    return neurons;
  }

  // forward/backward propagation
  // assumption: layer 0 has _a filled with (horizontalized categoricals) double values
  public static void step(long seed, Neurons[] neurons, DeepLearningModel.DeepLearningModelInfo minfo, boolean training, double[] responses) {
    try {
      for (int i=1; i0; --i)
          neurons[i].bprop();
      }
    }
    catch(RuntimeException ex) {
      Log.warn(ex.getMessage());
      minfo.set_unstable();
      throw new RuntimeException("Canceling job due to numerical instability.");
    }
  }

}