All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.arbiter.BaseNetworkSpace Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

package org.deeplearning4j.arbiter;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import org.deeplearning4j.arbiter.adapter.ActivationParameterSpaceAdapter;
import org.deeplearning4j.arbiter.conf.dropout.DropoutSpace;
import org.deeplearning4j.arbiter.layers.LayerSpace;
import org.deeplearning4j.arbiter.optimize.api.AbstractParameterSpace;
import org.deeplearning4j.arbiter.optimize.api.ParameterSpace;
import org.deeplearning4j.arbiter.optimize.parameter.FixedValue;
import org.deeplearning4j.arbiter.optimize.serde.jackson.JsonMapper;
import org.deeplearning4j.arbiter.optimize.serde.jackson.YamlMapper;
import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.*;
import org.deeplearning4j.nn.conf.distribution.Distribution;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.stepfunctions.StepFunction;
import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.shade.jackson.annotation.JsonTypeInfo;
import org.nd4j.shade.jackson.core.JsonProcessingException;

import java.util.*;

/**
 * This is an abstract ParameterSpace for both MultiLayerNetworks (MultiLayerSpace) and ComputationGraph (ComputationGraphSpace)
 * 

* Functionality here should match {@link org.deeplearning4j.nn.conf.NeuralNetConfiguration.Builder} * * @param Type of network (MultiLayerNetwork or ComputationGraph) * @author Alex Black */ @EqualsAndHashCode(callSuper = false) @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @Data public abstract class BaseNetworkSpace extends AbstractParameterSpace { protected Long seed; protected ParameterSpace optimizationAlgo; protected ParameterSpace activationFunction; protected ParameterSpace biasInit; protected ParameterSpace weightInit; protected ParameterSpace dist; protected ParameterSpace maxNumLineSearchIterations; protected ParameterSpace miniBatch; protected ParameterSpace minimize; protected ParameterSpace stepFunction; protected ParameterSpace l1; protected ParameterSpace l2; protected ParameterSpace l1Bias; protected ParameterSpace l2Bias; protected ParameterSpace updater; protected ParameterSpace biasUpdater; protected ParameterSpace weightNoise; private ParameterSpace dropout; protected ParameterSpace gradientNormalization; protected ParameterSpace gradientNormalizationThreshold; protected ParameterSpace convolutionMode; protected List layerSpaces = new ArrayList<>(); //NeuralNetConfiguration.ListBuilder/MultiLayerConfiguration.Builder options: protected ParameterSpace backpropType; protected ParameterSpace tbpttFwdLength; protected ParameterSpace tbpttBwdLength; protected ParameterSpace> allParamConstraints; protected ParameterSpace> weightConstraints; protected ParameterSpace> biasConstraints; protected int numEpochs = 1; static { JsonMapper.getMapper().registerSubtypes(ComputationGraphSpace.class, MultiLayerSpace.class); YamlMapper.getMapper().registerSubtypes(ComputationGraphSpace.class, MultiLayerSpace.class); } @SuppressWarnings("unchecked") protected BaseNetworkSpace(Builder builder) { this.seed = builder.seed; this.optimizationAlgo = builder.optimizationAlgo; this.activationFunction = builder.activationFunction; this.biasInit = builder.biasInit; this.weightInit = builder.weightInit; this.dist = builder.dist; this.maxNumLineSearchIterations = builder.maxNumLineSearchIterations; this.miniBatch = builder.miniBatch; this.minimize = builder.minimize; this.stepFunction = builder.stepFunction; this.l1 = builder.l1; this.l2 = builder.l2; this.l1Bias = builder.l1Bias; this.l2Bias = builder.l2Bias; this.updater = builder.updater; this.biasUpdater = builder.biasUpdater; this.weightNoise = builder.weightNoise; this.dropout = builder.dropout; this.gradientNormalization = builder.gradientNormalization; this.gradientNormalizationThreshold = builder.gradientNormalizationThreshold; this.convolutionMode = builder.convolutionMode; this.allParamConstraints = builder.allParamConstraints; this.weightConstraints = builder.weightConstraints; this.biasConstraints = builder.biasConstraints; this.backpropType = builder.backpropType; this.tbpttFwdLength = builder.tbpttFwdLength; this.tbpttBwdLength = builder.tbpttBwdLength; this.numEpochs = builder.numEpochs; } protected BaseNetworkSpace() { //Default constructor for Jackson json/yaml serialization } protected NeuralNetConfiguration.Builder randomGlobalConf(double[] values) { //Create MultiLayerConfiguration... NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder(); if (seed != null) builder.seed(seed); if (optimizationAlgo != null) builder.optimizationAlgo(optimizationAlgo.getValue(values)); if (activationFunction != null) builder.activation(activationFunction.getValue(values)); if (biasInit != null) builder.biasInit(biasInit.getValue(values)); if (weightInit != null) builder.weightInit(weightInit.getValue(values)); if (dist != null) builder.dist(dist.getValue(values)); if (maxNumLineSearchIterations != null) builder.maxNumLineSearchIterations(maxNumLineSearchIterations.getValue(values)); if (miniBatch != null) builder.miniBatch(miniBatch.getValue(values)); if (minimize != null) builder.minimize(minimize.getValue(values)); if (stepFunction != null) builder.stepFunction(stepFunction.getValue(values)); if (l1 != null) builder.l1(l1.getValue(values)); if (l2 != null) builder.l2(l2.getValue(values)); if (l1Bias != null) builder.l1Bias(l1Bias.getValue(values)); if (l2Bias != null) builder.l2Bias(l2Bias.getValue(values)); if (updater != null) builder.updater(updater.getValue(values)); if (biasUpdater != null) builder.biasUpdater(biasUpdater.getValue(values)); if (weightNoise != null) builder.weightNoise(weightNoise.getValue(values)); if (dropout != null) builder.dropOut(dropout.getValue(values)); if (gradientNormalization != null) builder.gradientNormalization(gradientNormalization.getValue(values)); if (gradientNormalizationThreshold != null) builder.gradientNormalizationThreshold(gradientNormalizationThreshold.getValue(values)); if (convolutionMode != null) builder.convolutionMode(convolutionMode.getValue(values)); if (allParamConstraints != null){ List c = allParamConstraints.getValue(values); if(c != null){ builder.constrainAllParameters(c.toArray(new LayerConstraint[c.size()])); } } if (weightConstraints != null){ List c = weightConstraints.getValue(values); if(c != null){ builder.constrainWeights(c.toArray(new LayerConstraint[c.size()])); } } if (biasConstraints != null){ List c = biasConstraints.getValue(values); if(c != null){ builder.constrainBias(c.toArray(new LayerConstraint[c.size()])); } } return builder; } @Override public List collectLeaves() { Map global = getNestedSpaces(); //Note: Results on previous line does NOT include the LayerSpaces, therefore we need to add these manually... //This is because the type is a list, not a ParameterSpace LinkedList stack = new LinkedList<>(); stack.add(this); for (LayerConf layerConf : layerSpaces) { LayerSpace ls = layerConf.getLayerSpace(); stack.addAll(ls.collectLeaves()); } List out = new ArrayList<>(); while (!stack.isEmpty()) { ParameterSpace next = stack.removeLast(); if (next.isLeaf()) { out.add(next); } else { Map m = next.getNestedSpaces(); ParameterSpace[] arr = m.values().toArray(new ParameterSpace[m.size()]); for (int i = arr.length - 1; i >= 0; i--) { stack.add(arr[i]); } } } return out; } @Override public boolean isLeaf() { return false; } @Override public void setIndices(int... indices) { throw new UnsupportedOperationException("Cannot set indices for non leaf"); } @Override public String toString() { StringBuilder sb = new StringBuilder(); for (Map.Entry e : getNestedSpaces().entrySet()) { sb.append(e.getKey()).append(": ").append(e.getValue()).append("\n"); } int i = 0; for (LayerConf conf : layerSpaces) { sb.append("Layer config ").append(i++).append(": (Number layers:").append(conf.numLayers) .append(", duplicate: ").append(conf.duplicateConfig).append("), ") .append(conf.layerSpace.toString()).append("\n"); } return sb.toString(); } @AllArgsConstructor @Data @NoArgsConstructor public static class LayerConf { protected LayerSpace layerSpace; protected String layerName; protected String[] inputs; protected ParameterSpace numLayers; protected boolean duplicateConfig; protected InputPreProcessor preProcessor; } @SuppressWarnings("unchecked") protected abstract static class Builder> { private Long seed; private ParameterSpace optimizationAlgo; private ParameterSpace activationFunction; private ParameterSpace biasInit; private ParameterSpace weightInit; private ParameterSpace dist; private ParameterSpace maxNumLineSearchIterations; private ParameterSpace miniBatch; private ParameterSpace minimize; private ParameterSpace stepFunction; private ParameterSpace l1; private ParameterSpace l2; private ParameterSpace l1Bias; private ParameterSpace l2Bias; private ParameterSpace updater; private ParameterSpace biasUpdater; private ParameterSpace weightNoise; private ParameterSpace dropout; private ParameterSpace gradientNormalization; private ParameterSpace gradientNormalizationThreshold; private ParameterSpace convolutionMode; private ParameterSpace> allParamConstraints; private ParameterSpace> weightConstraints; private ParameterSpace> biasConstraints; //NeuralNetConfiguration.ListBuilder/MultiLayerConfiguration.Builder options: private ParameterSpace backpropType; private ParameterSpace tbpttFwdLength; private ParameterSpace tbpttBwdLength; //Early stopping configuration / (fixed) number of epochs: private EarlyStoppingConfiguration earlyStoppingConfiguration; private int numEpochs = 1; protected boolean validateOutputLayerConfig = true; public T seed(long seed) { this.seed = seed; return (T) this; } public T optimizationAlgo(OptimizationAlgorithm optimizationAlgorithm) { return optimizationAlgo(new FixedValue<>(optimizationAlgorithm)); } public T optimizationAlgo(ParameterSpace parameterSpace) { this.optimizationAlgo = parameterSpace; return (T) this; } public T activation(Activation activationFunction) { return activation(new FixedValue<>(activationFunction)); } public T activation(ParameterSpace activationFunction) { return activationFn(new ActivationParameterSpaceAdapter(activationFunction)); } public T activationFn(ParameterSpace activationFunction) { this.activationFunction = activationFunction; return (T) this; } public T biasInit(double biasInit){ return biasInit(new FixedValue<>(biasInit)); } public T biasInit(ParameterSpace biasInit){ this.biasInit = biasInit; return (T) this; } public T weightInit(WeightInit weightInit) { return weightInit(new FixedValue<>(weightInit)); } public T weightInit(ParameterSpace weightInit) { this.weightInit = weightInit; return (T) this; } public T dist(Distribution dist) { return dist(new FixedValue<>(dist)); } public T dist(ParameterSpace dist) { this.dist = dist; return (T) this; } public T maxNumLineSearchIterations(int maxNumLineSearchIterations) { return maxNumLineSearchIterations(new FixedValue<>(maxNumLineSearchIterations)); } public T maxNumLineSearchIterations(ParameterSpace maxNumLineSearchIterations) { this.maxNumLineSearchIterations = maxNumLineSearchIterations; return (T) this; } public T miniBatch(boolean minibatch) { return miniBatch(new FixedValue<>(minibatch)); } public T miniBatch(ParameterSpace miniBatch) { this.miniBatch = miniBatch; return (T) this; } public T minimize(boolean minimize) { return minimize(new FixedValue<>(minimize)); } public T minimize(ParameterSpace minimize) { this.minimize = minimize; return (T) this; } public T stepFunction(StepFunction stepFunction) { return stepFunction(new FixedValue<>(stepFunction)); } public T stepFunction(ParameterSpace stepFunction) { this.stepFunction = stepFunction; return (T) this; } public T l1(double l1) { return l1(new FixedValue<>(l1)); } public T l1(ParameterSpace l1) { this.l1 = l1; return (T) this; } public T l2(double l2) { return l2(new FixedValue<>(l2)); } public T l2(ParameterSpace l2) { this.l2 = l2; return (T) this; } public T l1Bias(double l1Bias) { return l1Bias(new FixedValue<>(l1Bias)); } public T l1Bias(ParameterSpace l1Bias) { this.l1Bias = l1Bias; return (T) this; } public T l2Bias(double l2Bias) { return l2Bias(new FixedValue<>(l2Bias)); } public T l2Bias(ParameterSpace l2Bias) { this.l2Bias = l2Bias; return (T) this; } public T updater(IUpdater updater){ return updater(new FixedValue<>(updater)); } public T updater(ParameterSpace updater) { this.updater = updater; return (T) this; } public T biasUpdater(IUpdater biasUpdater){ return biasUpdater(new FixedValue<>(biasUpdater)); } public T biasUpdater(ParameterSpace biasUpdater){ this.biasUpdater = biasUpdater; return (T)this; } public T weightNoise(IWeightNoise weightNoise){ return weightNoise(new FixedValue<>(weightNoise)); } public T weightNoise(ParameterSpace weightNoise){ this.weightNoise = weightNoise; return (T) this; } public T dropOut(double dropout){ return idropOut(new Dropout(dropout)); } public T dropOut(ParameterSpace dropOut){ return idropOut(new DropoutSpace(dropOut)); } public T idropOut(IDropout idropOut){ return idropOut(new FixedValue<>(idropOut)); } public T idropOut(ParameterSpace idropOut){ this.dropout = idropOut; return (T) this; } public T gradientNormalization(GradientNormalization gradientNormalization) { return gradientNormalization(new FixedValue<>(gradientNormalization)); } public T gradientNormalization(ParameterSpace gradientNormalization) { this.gradientNormalization = gradientNormalization; return (T) this; } public T gradientNormalizationThreshold(double threshold) { return gradientNormalizationThreshold(new FixedValue<>(threshold)); } public T gradientNormalizationThreshold(ParameterSpace gradientNormalizationThreshold) { this.gradientNormalizationThreshold = gradientNormalizationThreshold; return (T) this; } public T convolutionMode(ConvolutionMode convolutionMode) { return convolutionMode(new FixedValue(convolutionMode)); } public T convolutionMode(ParameterSpace convolutionMode) { this.convolutionMode = convolutionMode; return (T) this; } public T backpropType(BackpropType backpropType) { return backpropType(new FixedValue<>(backpropType)); } public T backpropType(ParameterSpace backpropType) { this.backpropType = backpropType; return (T) this; } public T tbpttFwdLength(int tbpttFwdLength) { return tbpttFwdLength(new FixedValue<>(tbpttFwdLength)); } public T tbpttFwdLength(ParameterSpace tbpttFwdLength) { this.tbpttFwdLength = tbpttFwdLength; return (T) this; } public T tbpttBwdLength(int tbpttBwdLength) { return tbpttBwdLength(new FixedValue<>(tbpttBwdLength)); } public T tbpttBwdLength(ParameterSpace tbpttBwdLength) { this.tbpttBwdLength = tbpttBwdLength; return (T) this; } public T constrainWeights(LayerConstraint... constraints){ return constrainWeights(new FixedValue>(Arrays.asList(constraints))); } public T constrainWeights(ParameterSpace> constraints){ this.weightConstraints = constraints; return (T) this; } public T constrainBias(LayerConstraint... constraints){ return constrainBias(new FixedValue>(Arrays.asList(constraints))); } public T constrainBias(ParameterSpace> constraints){ this.biasConstraints = constraints; return (T) this; } public T constrainAllParams(LayerConstraint... constraints){ return constrainAllParams(new FixedValue>(Arrays.asList(constraints))); } public T constrainAllParams(ParameterSpace> constraints){ this.allParamConstraints = constraints; return (T) this; } public T validateOutputLayerConfig(boolean validate){ this.validateOutputLayerConfig = validate; return (T) this; } /** * Fixed number of training epochs. Default: 1 * Note if both EarlyStoppingConfiguration and number of epochs is present, early stopping will be used in preference. */ public T numEpochs(int numEpochs) { this.numEpochs = numEpochs; return (T) this; } public abstract E build(); } /** * Return a json configuration of this configuration space. * * @return */ public String toJson() { try { return JsonMapper.getMapper().writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } } /** * Return a yaml configuration of this configuration space. * * @return */ public String toYaml() { try { return YamlMapper.getMapper().writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy