Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.deeplearning4j.nn.layers.samediff.SameDiffGraphVertex Maven / Gradle / Ivy
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.layers.samediff;
import lombok.val;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.api.TrainingConfig;
import org.deeplearning4j.nn.conf.layers.samediff.SDVertexParams;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex;
import org.deeplearning4j.nn.params.SameDiffParamInitializer;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.array.SingleThreadArrayHolder;
import org.nd4j.autodiff.samediff.internal.InferenceSession;
import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
import org.nd4j.autodiff.util.SameDiffUtils;
import org.nd4j.common.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.common.primitives.Pair;
import java.util.*;
public class SameDiffGraphVertex extends BaseGraphVertex {
protected SameDiffVertex config;
protected SameDiff sameDiff;
protected SDVariable outputVar;
protected ExternalErrorsFunction fn;
protected String outputKey;
protected Map inputVars;
protected INDArray[] maskArrays;
protected INDArray params;
protected INDArray gradients;
protected Map paramTable;
protected Map gradTable;
private MaskState currentMaskState;
private int minibatchSize;
public SameDiffGraphVertex(SameDiffVertex config, ComputationGraph graph, String name, int vertexIndex,
INDArray paramsView, boolean initParams, DataType dataType) {
super(graph, name, vertexIndex, null, null, dataType);
this.config = config;
SDVertexParams vp = config.getVertexParams();
paramTable = SameDiffParamInitializer.getInstance().subsetAndReshape(vp.getParameterKeys(),
vp.getParamShapes(), paramsView, null, config);
if(initParams){
config.initializeParameters(paramTable);
}
this.params = paramsView;
}
@Override
public String toString() {
return null;
}
@Override
public boolean hasLayer() {
return false;
}
@Override
public Layer getLayer() {
return null;
}
@Override
public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) {
try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
if (sameDiff == null) {
doInit();
}
}
Map phMap = new HashMap<>();
config.validateInput(inputs);
for(int i=0; i doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
Gradient g = new DefaultGradient();
try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
if (sameDiff == null) {
doInit();
}
}
List inputNames = config.getVertexParams().getInputs();
if(!sameDiff.hasGradientFunction()) {
//Create when scoped out, to ensure any arrays are not in WS
String[] inArr = inputNames.toArray(new String[inputNames.size()]);
sameDiff.createGradFunction(inArr);
}
config.validateInput(inputs);
//Configure memory management for SameDiff instance - use DL4J workspaces
Map sessionMap = sameDiff.getFunction("grad").getSessions();
if(!sessionMap.containsKey(Thread.currentThread().getId())){
sessionMap.put(Thread.currentThread().getId(), SameDiff.getInferenceFactory().create(sameDiff.getFunction("grad")));
}
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
Map phMap = new HashMap<>();
List inputs = config.getVertexParams().getInputs();
int i=0;
for(String s : inputs){
phMap.put(s, this.inputs[i++]);
}
for( int j=0; j required = new ArrayList<>(config.getVertexParams().getInputs()); //Ensure that the input placeholder gradients are calculated
required.addAll(paramTable.keySet());
Map gradsMap = sameDiff.calculateGradients(phMap, required);
for(String s : paramTable.keySet() ){
INDArray sdGrad = gradsMap.get(s);
INDArray dl4jGrad = gradTable.get(s);
dl4jGrad.assign(sdGrad); //TODO OPTIMIZE THIS
g.gradientForVariable().put(s, dl4jGrad);
}
INDArray[] dLdIns = new INDArray[inputs.size()];
String fnName = fn.getGradPlaceholderName();
for(int j=0; j(g, dLdIns);
}
@Override
public void setBackpropGradientsViewArray(INDArray backpropGradientsViewArray) {
SDVertexParams vp = config.getVertexParams();
gradTable = SameDiffParamInitializer.getInstance().subsetAndReshape(vp.getParameterKeys(),
vp.getParamShapes(), backpropGradientsViewArray, null, config);
}
@Override
public Pair feedForwardMaskArrays(INDArray[] maskArrays, MaskState currentMaskState, int minibatchSize) {
this.maskArrays = maskArrays;
this.currentMaskState = currentMaskState;
return config.feedForwardMaskArrays(maskArrays, currentMaskState, minibatchSize);
}
protected void doInit(){
try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
sameDiff = SameDiff.create();
//Use SingleThreadArrayHolder so we can use views (also don't nede multithreading here, DL4J is not thread safe)
sameDiff.setArrayHolders(new SingleThreadArrayHolder(), new SingleThreadArrayHolder(), false);
inputVars = new LinkedHashMap<>();
LinkedHashMap maskVars = new LinkedHashMap<>();
int i=0;
for(String s : config.getVertexParams().getInputs()){
val inputShape = inputs[i++].shape().clone();
INDArray maskTemp = createMask(dataType, inputShape);
inputShape[0] = -1;
SDVariable inputVar = sameDiff.placeHolder(s, dataType, inputShape);
inputVars.put(s, inputVar);
long[] maskShape = maskTemp.shape().clone();
maskShape[0] = -1;
SDVariable maskVar = sameDiff.placeHolder(s + "_mask", maskTemp.dataType(), maskShape);
maskVars.put(s, maskVar);
}
Map paramShapes = config.getVertexParams().getParamShapes();
Map params = new LinkedHashMap<>();
for (String s : paramShapes.keySet()) {
val ps = paramShapes.get(s);
SDVariable v = sameDiff.var(s, dataType, ps);
params.put(s, v);
}
SDVariable layerOutput = config.defineVertex(sameDiff, inputVars, params, maskVars);
Preconditions.checkNotNull(layerOutput, "Invalid output: layer output is null");
outputVar = layerOutput;
for (Map.Entry e : paramTable.entrySet()) {
sameDiff.associateArrayWithVariable(e.getValue(), sameDiff.getVariable(e.getKey()));
}
//Define the function for external errors:
fn = SameDiffUtils.externalErrors(sameDiff, null, layerOutput);
fn.outputVariable();
this.outputKey = outputVar.name();
}
}
@Override
public void clearVertex() {
clear();
}
@Override
public Map paramTable(boolean backpropOnly) {
return paramTable;
}
@Override
public TrainingConfig getConfig() {
return config;
}
@Override
public INDArray params() {
return params;
}
@Override
public INDArray getGradientsViewArray() {
return gradients;
}
//Package private
static INDArray createMask(DataType dataType, long[] shape){
switch (shape.length){
case 2: // FF-Type input
return Nd4j.ones(dataType,shape[0], 1);
case 3: // RNN-Type input
return Nd4j.ones(dataType, shape[0], shape[2]);
case 4: //CNN input
return Nd4j.ones(dataType, shape[0], 1, 1, 1);
default:
Preconditions.throwEx("Can not create all-ones-mask for given input shape %s.", Arrays.toString(shape));
return null;
}
}
}