hex.deeplearning.DeepLearningTask Maven / Gradle / Ivy
package hex.deeplearning;
import hex.genmodel.utils.DistributionFamily;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import hex.DataInfo;
import hex.FrameTask;
import water.DKV;
import water.H2O;
import water.IcedUtils;
import water.Key;
import water.util.Log;
import water.util.RandomUtils;
import java.util.Arrays;
import java.util.Random;
public class DeepLearningTask extends FrameTask {
final private boolean _training;
private DeepLearningModelInfo _localmodel; //per-node state (to be reduced)
private DeepLearningModelInfo _sharedmodel; //input/output
transient Neurons[] _neurons;
transient Random _dropout_rng;
int _chunk_node_count = 1;
/**
* Accessor to the object containing the (final) state of the Deep Learning model
* Should only be queried after calling this.doAll(Frame training)
* @return "The" final model after one Map/Reduce iteration
*/
final public DeepLearningModelInfo model_info() {
assert(_sharedmodel != null);
return _sharedmodel;
}
/**
* The only constructor
* @param jobKey
* @param inputModel Initial model state
* @param fraction Fraction of rows of the training to train with
* @param iteration
*/
public DeepLearningTask(Key jobKey, DeepLearningModelInfo inputModel, float fraction, int iteration){
this(jobKey,inputModel,fraction,iteration,null);
}
public DeepLearningTask(Key jobKey, DeepLearningModelInfo inputModel, float fraction, int iteration, H2O.H2OCountedCompleter cmp){
super(jobKey, inputModel.data_info(),inputModel.get_params()._seed + inputModel.get_processed_global(), iteration, inputModel.get_params()._sparse,cmp);
assert(inputModel.get_processed_local() == 0);
_training=true;
_sharedmodel = inputModel;
// if (model_info().get_params()._elastic_averaging)
// DKV.put(_sharedmodel.elasticAverageModelInfoKey(), _sharedmodel);
_useFraction=fraction;
_shuffle = model_info().get_params()._shuffle_training_data;
}
/**
* Transfer ownership from global (shared) model to local model which will be worked on
*/
@Override protected void setupLocal(){
assert(_localmodel == null);
super.setupLocal();
if (model_info().get_params()._elastic_averaging) {
//Load my local model from DKV, to continue training
_localmodel = DKV.getGet(_sharedmodel.localModelInfoKey(H2O.SELF));
if (_localmodel != null) {
if (!Arrays.equals(_localmodel.units, _sharedmodel.units)) {
_localmodel = IcedUtils.deepCopy(_sharedmodel);
} else {
//Make sure that the local model has the right global (shared) parameters after checkpoint restart!
_localmodel.set_params(_sharedmodel.get_params(), _sharedmodel._model_id);
_localmodel.set_processed_global(_sharedmodel.get_processed_global());
}
}
else {
// first time around - use the randomized initial weights and don't spread the shared (random) model
_localmodel = IcedUtils.deepCopy(_sharedmodel);
_sharedmodel = null;
}
} else {
_localmodel = _sharedmodel;
_sharedmodel = null;
}
_localmodel.set_processed_local(0);
}
// Create local workspace (neurons) and link them to shared weights
@Override protected boolean chunkInit(){
if (_localmodel.get_processed_local() >= _useFraction * _fr.numRows())
return false;
_neurons = makeNeuronsForTraining(_localmodel);
_dropout_rng = RandomUtils.getRNG(System.currentTimeMillis());
return true;
}
/**
* Process one training row at a time (online learning)
* @param seed Seed is only used if reproducible mode is enabled
* @param r Row (must be dense for now)
* @param mb mini-batch internal index
*/
@Override public final void processRow(long seed, DataInfo.Row r, int mb) {
if (_localmodel.get_params()._reproducible) {
seed += _localmodel.get_processed_global(); //avoid periodicity
} else {
seed = _dropout_rng.nextLong(); // non-reproducible case - make a fast & good random number
}
_localmodel.checkMissingCats(r.binIds);
((Neurons.Input) _neurons[0]).setInput(seed, r.isSparse() ? r.numIds : null, r.numVals, r.nBins, r.binIds, mb);
}
/**
* Apply the gradient to update the weights
* @param seed
* @param responses
* @param offsets
* @param n number of trained examples in this last mini batch (usually == mini_batch_size, but can be less)
*/
@Override public void processMiniBatch(long seed, double[] responses, double[] offsets, int n) {
assert(_training);
if (_localmodel.get_params()._reproducible) {
seed += _localmodel.get_processed_global(); //avoid periodicity
} else {
seed = _dropout_rng.nextLong(); // non-reproducible case - make a fast & good random number
}
fpropMiniBatch(seed, _neurons, _localmodel, _localmodel.get_params()._elastic_averaging ? _sharedmodel : null, _training, responses, offsets, n);
bpropMiniBatch(_neurons, n);
}
/**
* Helper to apply back-propagation without clearing out the gradients afterwards
* Used for gradient checking
* @param neurons
* @param n number of trained examples in this last mini batch (usually == mini_batch_size, but can be less)
*/
static public void bpropMiniBatch(Neurons[] neurons, int n) {
neurons[neurons.length - 1].bpropOutputLayer(n);
for (int i = neurons.length - 2; i > 0; --i)
neurons[i].bprop(n);
for (int mb=0;mb 0 //other DLTask was active (its model_info should be used for averaging)
&& other._localmodel != _localmodel) //other DLTask worked on a different model_info
{
// avoid adding remote model info to unprocessed local data, still random
// (this can happen if we have no chunks on the master node)
if (_localmodel.get_processed_local() == 0) {
_localmodel = other._localmodel;
_chunk_node_count = other._chunk_node_count;
} else {
_localmodel.add(other._localmodel);
_chunk_node_count += other._chunk_node_count;
}
if (other._localmodel.isUnstable()) _localmodel.setUnstable();
}
}
static long _lastWarn;
static long _warnCount;
/**
* After all reduces are done, the driver node calls this method to clean up
* This is only needed if we're not inside a DeepLearningTask2 (which will do the reduction between replicated data workers).
* So if replication is disabled, and every node works on partial data, then we have work to do here (model averaging).
*/
@Override protected void postGlobal(){
DeepLearningParameters dlp = _localmodel.get_params();
if (H2O.CLOUD.size() > 1 && !dlp._replicate_training_data) {
long now = System.currentTimeMillis();
if (_chunk_node_count < H2O.CLOUD.size() && (now - _lastWarn > 5000) && _warnCount < 3) {
// Log.info("Synchronizing across " + _chunk_node_count + " H2O node(s).");
Log.warn(H2O.CLOUD.size() - _chunk_node_count + " node(s) (out of " + H2O.CLOUD.size()
+ ") are not contributing to model updates. Consider setting replicate_training_data to true or using a larger training dataset (or fewer H2O nodes).");
_lastWarn = now;
_warnCount++;
}
}
// Check that we're not inside a DeepLearningTask2
assert ((!dlp._replicate_training_data || H2O.CLOUD.size() == 1) == !_run_local);
if (!_run_local) {
_localmodel.add_processed_global(_localmodel.get_processed_local()); //move local sample counts to global ones
_localmodel.set_processed_local(0l);
// model averaging
if (_chunk_node_count > 1)
_localmodel.div(_chunk_node_count);
if (_localmodel.get_params()._elastic_averaging)
_sharedmodel = DeepLearningModelInfo.timeAverage(_localmodel);
} else {
//Get ready for reduction in DeepLearningTask2
//Just swap the local and global models
_sharedmodel = _localmodel;
}
if (_sharedmodel == null)
_sharedmodel = _localmodel;
_localmodel = null;
}
public static Neurons[] makeNeuronsForTraining(final DeepLearningModelInfo minfo) {
return makeNeurons(minfo, true);
}
public static Neurons[] makeNeuronsForTesting(final DeepLearningModelInfo minfo) {
return makeNeurons(minfo, false);
}
// Helper
private static Neurons[] makeNeurons(final DeepLearningModelInfo minfo, boolean training) {
DataInfo dinfo = minfo.data_info();
final DeepLearningParameters params = minfo.get_params();
final int[] h = params._hidden;
Neurons[] neurons = new Neurons[h.length + 2]; // input + hidden + output
// input
neurons[0] = new Neurons.Input(params, minfo.units[0], dinfo);
// hidden
for( int i = 0; i < h.length + (params._autoencoder ? 1 : 0); i++ ) {
int n = params._autoencoder && i == h.length ? minfo.units[0] : h[i];
switch( params._activation ) {
case Tanh:
neurons[i+1] = new Neurons.Tanh(n);
break;
case TanhWithDropout:
neurons[i+1] = params._autoencoder && i == h.length ? new Neurons.Tanh(n) : new Neurons.TanhDropout(n);
break;
case Rectifier:
neurons[i+1] = new Neurons.Rectifier(n);
break;
case RectifierWithDropout:
neurons[i+1] = params._autoencoder && i == h.length ? new Neurons.Rectifier(n) : new Neurons.RectifierDropout(n);
break;
case Maxout:
neurons[i+1] = new Neurons.Maxout(params,(short)2,n);
break;
case MaxoutWithDropout:
neurons[i+1] = params._autoencoder && i == h.length ? new Neurons.Maxout(params,(short)2,n) : new Neurons.MaxoutDropout(params,(short)2,n);
break;
case ExpRectifier:
neurons[i+1] = new Neurons.ExpRectifier(n);
break;
case ExpRectifierWithDropout:
neurons[i+1] = params._autoencoder && i == h.length ? new Neurons.ExpRectifier(n) : new Neurons.ExpRectifierDropout(n);
break;
}
}
if(!params._autoencoder) {
if (minfo._classification && minfo.get_params()._distribution != DistributionFamily.modified_huber)
neurons[neurons.length - 1] = new Neurons.Softmax(minfo.units[minfo.units.length - 1]);
else
neurons[neurons.length - 1] = new Neurons.Linear();
}
//copy parameters from NN, and set previous/input layer links
for( int i = 0; i < neurons.length; i++ ) {
neurons[i].init(neurons, i, params, minfo, training);
neurons[i]._input = neurons[0];
}
// // debugging
// for (Neurons n : neurons) Log.info(n.toString());
return neurons;
}
/**
* Forward propagation
* assumption: layer 0 has _a filled with (horizontalized categoricals) double values
* @param seed
* @param neurons
* @param minfo
* @param consensus_minfo
* @param training
* @param n Number of actually trained samples in this mini-batch
*/
public static void fpropMiniBatch(long seed, Neurons[] neurons, DeepLearningModelInfo minfo,
DeepLearningModelInfo consensus_minfo, boolean training, double[] responses, double[] offset, int n) {
// Forward propagation
for (int i=1; i 0) {
assert (!minfo._classification); // Regression
double[] m = minfo.data_info()._normRespMul;
double[] s = minfo.data_info()._normRespSub;
double mul = m == null ? 1 : m[0];
double sub = s == null ? 0 : s[0];
neurons[neurons.length - 1]._a[mb].add(0, ((offset[mb] - sub) * mul));
}
if (training) {
// Compute the gradient at the output layer
// auto-encoder: pass a dummy "response" (ignored)
// otherwise: class label or regression target
neurons[neurons.length - 1].setOutputLayerGradient(responses[mb], mb, n);
// Elastic Averaging - set up helpers needed during back-propagation
if (consensus_minfo != null) {
for (int i = 1; i < neurons.length; i++) {
neurons[i]._wEA = consensus_minfo.get_weights(i - 1);
neurons[i]._bEA = consensus_minfo.get_biases(i - 1);
}
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy