All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.djl.training.optimizer.Adagrad Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
 * with the License. A copy of the License is located at
 *
 * http://aws.amazon.com/apache2.0/
 *
 * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
 * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package ai.djl.training.optimizer;

import ai.djl.Device;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.internal.NDArrayEx;
import ai.djl.ndarray.types.SparseFormat;
import ai.djl.training.tracker.ParameterTracker;
import ai.djl.training.tracker.Tracker;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
 * {@code Adagrad} is an AdaGrad {@link Optimizer}.
 *
 * 

This class implements * *

Adagrad updates the weights using:
*
* \( grad = clip(grad * resc_grad, clip_grad) + wd * weight \)
* \( history += grad^2 \)
* \( weight -= lr * grad / (sqrt(history) + epsilon) \)
*
* where grad represents the gradient, wd represents weight decay, and lr represents learning rate. * * @see The D2L chapter on * Adagrad */ public class Adagrad extends Optimizer { private ParameterTracker learningRateTracker; private float epsilon; private Map> history; /** * Creates a new instance of {@code Adam} optimizer. * * @param builder the builder to create a new instance of {@code Adam} optimizer */ protected Adagrad(Builder builder) { super(builder); learningRateTracker = builder.learningRateTracker; epsilon = builder.epsilon; history = new ConcurrentHashMap<>(); } /** {@inheritDoc} */ @Override public void update(String parameterId, NDArray weight, NDArray grad) { int t = updateCount(parameterId); float newLearningRate = learningRateTracker.getNewValue(parameterId, t); float weightDecay = getWeightDecay(); if (Float.isNaN(newLearningRate) || Float.isNaN(weightDecay) || Float.isInfinite(newLearningRate) || Float.isInfinite(weightDecay)) { throw new IllegalStateException("learning rate or weight decay is nan or infinite"); } NDList inputs = new NDList( weight, grad.toSparse(SparseFormat.ROW_SPARSE), // FIXME: add regular adagrad MxNet withDefaultState( history, parameterId, weight.getDevice(), k -> weight.zerosLike())); NDList weights = new NDList(weight); NDArrayEx ex = weight.getNDArrayInternal(); // TODO: change to our own implementation ex.adagradUpdate( inputs, weights, newLearningRate, weightDecay, rescaleGrad, clipGrad, epsilon); } /** * Creates a builder to build a {@code Adam}. * * @return a new builder */ public static Builder builder() { return new Builder(); } /** The Builder to construct an {@link Adagrad} object. */ public static final class Builder extends OptimizerBuilder { private ParameterTracker learningRateTracker = Tracker.fixed(0.001f); private float epsilon = 1e-8f; Builder() {} /** {@inheritDoc} */ @Override protected Builder self() { return this; } /** * Sets the {@link ParameterTracker} for this optimizer. * * @param learningRateTracker the {@link ParameterTracker} to be set * @return this {@code Builder} */ public Builder optLearningRateTracker(ParameterTracker learningRateTracker) { this.learningRateTracker = learningRateTracker; return this; } /** * Sets \(epsilon\) - a small quantity for numerical stability. * * @param epsilon a small quantity for numerical stability * @return this {@code Builder} */ public Builder optEpsilon(float epsilon) { this.epsilon = epsilon; return this; } /** * Builds a {@link Adagrad} block. * * @return the {@link Adagrad} block */ public Adagrad build() { return new Adagrad(this); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy