All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.nn.conf.layers.Layer Maven / Gradle / Ivy

/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.deeplearning4j.nn.conf.layers;

import lombok.Data;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.api.TrainingConfig;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.optimize.api.TrainingListener;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.learning.regularization.Regularization;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;

import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.*;

/**
 * A neural network layer.
 */

@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
@Data
@NoArgsConstructor
public abstract class Layer implements TrainingConfig, Serializable, Cloneable {

    protected String layerName;
    protected IDropout iDropout;
    protected List constraints;


    public Layer(Builder builder) {
        this.layerName = builder.layerName;
        this.iDropout = builder.iDropout;
    }

    /**
     * Initialize the weight constraints. Should be called last, in the outer-most constructor
     */
    protected void initializeConstraints(Builder builder) {
        //Note: this has to be done AFTER all constructors have finished - otherwise the required
        // fields may not yet be set yet
        List allConstraints = new ArrayList<>();
        if (builder.allParamConstraints != null && !initializer().paramKeys(this).isEmpty()) {
            for (LayerConstraint c : builder.allParamConstraints) {
                LayerConstraint c2 = c.clone();
                c2.setParams(new HashSet<>(initializer().paramKeys(this)));
                allConstraints.add(c2);
            }
        }

        if (builder.weightConstraints != null && !initializer().weightKeys(this).isEmpty()) {
            for (LayerConstraint c : builder.weightConstraints) {
                LayerConstraint c2 = c.clone();
                c2.setParams(new HashSet<>(initializer().weightKeys(this)));
                allConstraints.add(c2);
            }
        }

        if (builder.biasConstraints != null && !initializer().biasKeys(this).isEmpty()) {
            for (LayerConstraint c : builder.biasConstraints) {
                LayerConstraint c2 = c.clone();
                c2.setParams(new HashSet<>(initializer().biasKeys(this)));
                allConstraints.add(c2);
            }
        }
        if (!allConstraints.isEmpty()) {
            this.constraints = allConstraints;
        } else {
            this.constraints = null;
        }
        this.iDropout = builder.iDropout;
    }

    /**
     * Reset the learning related configs of the layer to default. When instantiated with a global
     * neural network configuration the parameters specified in the neural network configuration
     * will be used. For internal use with the transfer learning API. Users should not have to call
     * this method directly.
     */
    public void resetLayerDefaultConfig() {
        //clear the learning related params for all layers in the origConf and set to defaults
        this.iDropout = null;
        this.constraints = null;
    }

    @Override
    public Layer clone() {
        try {
            Layer ret = (Layer) super.clone();
            //Let's check for any INDArray fields and dup them (in case cloned layer will be used in different threads on CUDA...
            // we don't want it being relocated contantly between devices)
            Class c = getClass();
            while (c != Object.class) {
                Field[] fields = c.getDeclaredFields();
                for (Field f : fields) {
                    if (f.getType() == INDArray.class) {
                        f.setAccessible(true);
                        INDArray toClone;
                        try {
                            toClone = (INDArray) f.get(this);
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                        if (toClone != null) {
                            try {
                                f.set(this, toClone.dup());
                            } catch (Exception e) {
                                throw new RuntimeException(e);
                            }
                        }
                    }
                }

                c = c.getSuperclass();
            }

            return ret;
        } catch (CloneNotSupportedException e) {
            throw new RuntimeException(e);
        }
    }

    public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
                                                                Collection trainingListeners, int layerIndex, INDArray layerParamsView,
                                                                boolean initializeParams, DataType networkDataType);

    /**
     * @return The parameter initializer for this model
     */
    public abstract ParamInitializer initializer();

    /**
     * For a given type of input to this layer, what is the type of the output?
     *
     * @param layerIndex Index of the layer
     * @param inputType Type of input for the layer
     * @return Type of output from the layer
     * @throws IllegalStateException if input type is invalid for this layer
     */
    public abstract InputType getOutputType(int layerIndex, InputType inputType);

    /**
     * Set the nIn value (number of inputs, or input channels for CNNs) based on the given input
     * type
     *
     * @param inputType Input type for this layer
     * @param override If false: only set the nIn value if it's not already set. If true: set it
     * regardless of whether it's already set or not.
     * @throws IllegalStateException if input type is invalid for this layer
     */
    public abstract void setNIn(InputType inputType, boolean override);


    /**
     * For the given type of input to this layer, what preprocessor (if any) is required?
* Returns null if no preprocessor is required, otherwise returns an appropriate {@link * InputPreProcessor} for this layer, such as a {@link org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor} * * @param inputType InputType to this layer * @return Null if no preprocessor is required, otherwise the type of preprocessor necessary for * this layer/input combination * @throws IllegalStateException if input type is invalid for this layer */ public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); /** * Get the regularization types (l1/l2/weight decay) for the given parameter. Different parameters may have different * regularization types. * * @param paramName Parameter name ("W", "b" etc) * @return Regularization types (if any) for the specified parameter */ public abstract List getRegularizationByParam(String paramName); /** * Is the specified parameter a layerwise pretraining only parameter?
For example, visible * bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't used * during supervised backprop.
Layers (like DenseLayer, etc) with no pretrainable parameters * will return false for all (valid) inputs. * * @param paramName Parameter name/key * @return True if the parameter is for layerwise pretraining only, false otherwise */ public abstract boolean isPretrainParam(String paramName); /** * Get the updater for the given parameter. Typically the same updater will be used for all * updaters, but this is not necessarily the case * * @param paramName Parameter name * @return IUpdater for the parameter */ public IUpdater getUpdaterByParam(String paramName) { throw new UnsupportedOperationException( "Not supported: all layers with parameters should override this method"); } @Override public void setDataType(DataType dataType) { //No-op for most layers } /** * This is a report of the estimated memory consumption for the given layer * * @param inputType Input type to the layer. Memory consumption is often a function of the input * type * @return Memory report for the layer */ public abstract LayerMemoryReport getMemoryReport(InputType inputType); @SuppressWarnings("unchecked") @Getter @Setter public abstract static class Builder> { protected String layerName = null; protected List allParamConstraints; protected List weightConstraints; protected List biasConstraints; protected IDropout iDropout; /** * Layer name assigns layer string name. Allows easier differentiation between layers. */ public T name(String layerName) { this.setLayerName(layerName); return (T) this; } /** * Dropout probability. This is the probability of retaining each input activation * value for a layer. dropOut(x) will keep an input activation with probability x, and set * to 0 with probability 1-x.
dropOut(0.0) is a special value / special case - when set * to 0.0., dropout is disabled (not applied). Note that a dropout value of 1.0 is * functionally equivalent to no dropout: i.e., 100% probability of retaining each input * activation.
When useDropConnect(boolean) is set to true (false by default), this * method sets the drop connect probability instead. *

* Note 1: Dropout is applied at training time only - and is automatically not applied at * test time (for evaluation, etc)
Note 2: This sets the probability per-layer. Care * should be taken when setting lower values for complex networks (too much information may * be lost with aggressive (very low) dropout values).
Note 3: Frequently, dropout is * not applied to (or, has higher retain probability for) input (first layer) layers. * Dropout is also often not applied to output layers. This needs to be handled MANUALLY by * the user - set .dropout(0) on those layers when using global dropout setting.
Note 4: * Implementation detail (most users can ignore): DL4J uses inverted dropout, as described * here: * http://cs231n.github.io/neural-networks-2/ *

* * @param inputRetainProbability Dropout probability (probability of retaining each input * activation value for a layer) * @see #dropOut(IDropout) */ public T dropOut(double inputRetainProbability) { if (inputRetainProbability == 0.0) { return dropOut(null); } return dropOut(new Dropout(inputRetainProbability)); } /** * Set the dropout for all layers in this network * * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc */ public T dropOut(IDropout dropout) { this.setIDropout(dropout); return (T) this; } /** * Set constraints to be applied to this layer. Default: no constraints.
Constraints can * be used to enforce certain conditions (non-negativity of parameters, max-norm * regularization, etc). These constraints are applied at each iteration, after the * parameters have been updated. * * @param constraints Constraints to apply to all parameters of this layer */ public T constrainAllParameters(LayerConstraint... constraints) { this.setAllParamConstraints(Arrays.asList(constraints)); return (T) this; } /** * Set constraints to be applied to bias parameters of this layer. Default: no * constraints.
Constraints can be used to enforce certain conditions (non-negativity of * parameters, max-norm regularization, etc). These constraints are applied at each * iteration, after the parameters have been updated. * * @param constraints Constraints to apply to all bias parameters of this layer */ public T constrainBias(LayerConstraint... constraints) { this.setBiasConstraints(Arrays.asList(constraints)); return (T) this; } /** * Set constraints to be applied to the weight parameters of this layer. Default: no * constraints.
Constraints can be used to enforce certain conditions (non-negativity of * parameters, max-norm regularization, etc). These constraints are applied at each * iteration, after the parameters have been updated. * * @param constraints Constraints to apply to all weight parameters of this layer */ public T constrainWeights(LayerConstraint... constraints) { this.setWeightConstraints(Arrays.asList(constraints)); return (T) this; } public abstract E build(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy