deepboof.backward.DFunctionBatchNorm Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2016, Peter Abeles. All Rights Reserved.
 *
 * This file is part of DeepBoof
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package deepboof.backward;

import deepboof.Tensor;
import deepboof.forward.FunctionBatchNorm;

import java.util.List;

/**
 * Implementation of {@link FunctionBatchNorm Batch Normalization} for training networks.  This has distinctly
 * different behavior from forward only implementations.  In this learning implementation, statistics of
 * the input parameters are recomputed every time {@link #forward} is invoked.  While for the forward only
 * implementation those statistics are known already and not recomputed.
 *
 * The above described change in behavior also changes how parameters are specified.  mean and variance
 * are no longer input parameters but are computed dynamically in the forwards pass.
 *
 * NOTES:
 * 
 *     Variance is computed the unbiased formulation, i.e. divide by N-1 instead of N
 * 
 *
 * @author Peter Abeles
 */
public interface DFunctionBatchNorm>
        extends DBatchNorm {
    /**
     * Applies batch normalization to each variable in the input.
     *
     * There is only a parameter tensor if {@link #hasGammaBeta()} returns true.  If true then
     * gamma, and beta are encoded in a single tensor in an interleaved fashion (gamma, beta).
     *
     *      * Summary Table
     * -------------------------------------------------
     * Input   shape = (N, d[i], ... , d[k])
     * Output  shape = (N, d[i], ... , d[k])
     * Params  shape = (d[i], ... , d[k], 2)
     * -------------------------------------------------
     * N    = Size of mini-batch
     * d[i] = length of a dimension
     * 
     *
     * NOTE: Interleaving is used in the parameters instead of multiple tensors to improve memory locality,
     * which reduces cache misses.
     *
     * @param input Input tensor.  Tensor with a shape of (N, d[i], ... , d[k]), where N is mini-batch size
     * @param output Output tensor. Same shape as input tensor Modified.
     */
    @Override
    void forward(T input , T output );

    /**
     * See {@link #forward} for a description of parameters.
     *
     * @param parameters Variable tensor.  (d[i], ... , d[k], 2). Not modified.
     */
    @Override
    void setParameters(List parameters );
}