Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer Maven / Gradle / Ivy
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.layers.ocnn;
import lombok.Getter;
import lombok.Setter;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.BaseOutputLayer;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.activations.impl.ActivationReLU;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Broadcast;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.nd4j.common.primitives.Pair;
import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.R_KEY;
import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.V_KEY;
import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.W_KEY;
public class OCNNOutputLayer extends BaseOutputLayer {
@Setter
@Getter
private IActivation activation = new ActivationReLU();
private static IActivation relu = new ActivationReLU();
private ILossFunction lossFunction;
private int batchWindowSizeIndex;
private INDArray window;
public OCNNOutputLayer(NeuralNetConfiguration conf, DataType dataType) {
super(conf, dataType);
this.lossFunction = new OCNNLossFunction();
org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = (org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer();
ocnnOutputLayer.setLossFn(this.lossFunction);
}
@Override
public void setLabels(INDArray labels) {
//no-op
}
/** Compute score after labels and input have been set.
* @param fullNetRegTerm Regularization score term for the entire network
* @param training whether score should be calculated at train or test time (this affects things like application of
* dropout, etc)
* @return score (loss function)
*/
@Override
public double computeScore(double fullNetRegTerm, boolean training, LayerWorkspaceMgr workspaceMgr) {
if (input == null)
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
INDArray preOut = preOutput2d(training, workspaceMgr);
ILossFunction lossFunction = layerConf().getLossFn();
double score = lossFunction.computeScore(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
layerConf().getActivationFn(), maskArray,false);
if(conf().isMiniBatch())
score /= getInputMiniBatchSize();
score += fullNetRegTerm;
this.score = score;
return score;
}
@Override
public boolean needsLabels() {
return false;
}
@Override
public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true);
Pair pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
//150
long inputShape = (( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) this.getConf().getLayer()).getNIn();
INDArray delta = pair.getSecond();
//4 x 150
INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), new long[]{inputShape, delta.length()}, 'f');
epsilonNext = epsilonNext.assign(delta.broadcast(epsilonNext.shape())).transpose();
//Normally we would clear weightNoiseParams here - but we want to reuse them for forward + backward + score
// So this is instead done in MultiLayerNetwork/CompGraph backprop methods
return new Pair<>(pair.getFirst(), epsilonNext);
}
/** Returns tuple: {Grafdient,Delta,Output} given preOut */
private Pair getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
ILossFunction lossFunction = layerConf().getLossFn();
INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer conf = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf().getLayer();
if(conf.getLastEpochSinceRUpdated() == 0 && epochCount == 0) {
INDArray currentR = doOutput(false,workspaceMgr);
if(window == null) {
window = Nd4j.createUninitializedDetached(preOut.dataType(), conf.getWindowSize()).assign(0.0);
}
if(batchWindowSizeIndex < window.length() - currentR.length()) {
window.put(new INDArrayIndex[]{NDArrayIndex.interval(batchWindowSizeIndex,batchWindowSizeIndex + currentR.length())},currentR);
}
else if(batchWindowSizeIndex < window.length()) {
int windowIdx = (int) window.length() - batchWindowSizeIndex;
window.put(new INDArrayIndex[]{NDArrayIndex.interval(window.length() - windowIdx,window.length())},currentR.get(NDArrayIndex.interval(0,windowIdx)));
}
batchWindowSizeIndex += currentR.length();
conf.setLastEpochSinceRUpdated(epochCount);
}
else if(conf.getLastEpochSinceRUpdated() != epochCount) {
double percentile = window.percentileNumber(100.0 * conf.getNu()).doubleValue();
getParam(R_KEY).putScalar(0,percentile);
conf.setLastEpochSinceRUpdated(epochCount);
batchWindowSizeIndex = 0;
}
else {
//track a running average per minibatch per epoch
//calculate the average r value quantl=ile
//once the epoch changes
INDArray currentR = doOutput(false,workspaceMgr);
window.put(new INDArrayIndex[]{NDArrayIndex.interval(batchWindowSizeIndex,batchWindowSizeIndex + currentR.length())},currentR);
}
Gradient gradient = new DefaultGradient();
INDArray vGradView = gradientViews.get(V_KEY);
double oneDivNu = 1.0 / layerConf().getNu();
INDArray xTimesV = input.mmul(getParam(V_KEY));
INDArray derivW = layerConf().getActivationFn().getActivation(xTimesV.dup(),true).negi();
INDArray w = getParam(W_KEY);
derivW = derivW.muliColumnVector(delta).mean(0).muli(oneDivNu).addi(w.reshape(w.length()));
gradient.setGradientFor(W_KEY,gradientViews.get(W_KEY).assign(derivW));
//dG -> sigmoid derivative
INDArray firstVertDerivV = layerConf().getActivationFn()
.backprop(xTimesV.dup(),Nd4j.ones(input.dataType(), xTimesV.shape()))
.getFirst().muliRowVector(getParam(W_KEY).neg());
firstVertDerivV = firstVertDerivV.muliColumnVector(delta)
.reshape('f',input.size(0),1,layerConf().getHiddenSize());
INDArray secondTermDerivV = input.reshape('f',
input.size(0),getParam(V_KEY).size(0),1);
long[] shape = new long[firstVertDerivV.shape().length];
for(int i = 0; i < firstVertDerivV.rank(); i++) {
shape[i] = Math.max(firstVertDerivV.size(i),secondTermDerivV.size(i));
}
INDArray firstDerivVBroadcast = Nd4j.createUninitialized(input.dataType(), shape);
INDArray mulResult = firstVertDerivV.broadcast(firstDerivVBroadcast);
int[] bcDims = {0,1};
Broadcast.mul(mulResult, secondTermDerivV, mulResult, bcDims);
INDArray derivV = mulResult
.mean(0).muli(oneDivNu).addi(getParam(V_KEY));
gradient.setGradientFor(V_KEY,vGradView.assign(derivV));
INDArray derivR = Nd4j.scalar(delta.meanNumber()).muli(oneDivNu).addi(-1);
gradient.setGradientFor(R_KEY,gradientViews.get(R_KEY).assign(derivR));
clearNoiseWeightParams();
delta = backpropDropOutIfPresent(delta);
return new Pair<>(gradient, delta);
}
@Override
public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) {
this.input = input;
return doOutput(training,workspaceMgr);
}
/**{@inheritDoc}
*/
@Override
public double f1Score(INDArray examples, INDArray labels) {
throw new UnsupportedOperationException();
}
@Override
public Type type() {
return Type.FEED_FORWARD;
}
@Override
protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
return doOutput(training,workspaceMgr);
}
@Override
protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
return labels;
}
@Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
return doOutput(training,workspaceMgr);
}
private INDArray doOutput(boolean training,LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false);
INDArray w = getParamWithNoise(W_KEY,training,workspaceMgr);
INDArray v = getParamWithNoise(V_KEY,training,workspaceMgr);
applyDropOutIfNecessary(training, workspaceMgr);
INDArray first = Nd4j.createUninitialized(input.dataType(), input.size(0), v.size(1));
input.mmuli(v, first);
INDArray act2d = layerConf().getActivationFn().getActivation(first, training);
INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), input.size(0));
act2d.mmuli(w.reshape(w.length()), output);
this.labels = output;
return output;
}
/**Compute the score for each example individually, after labels and input have been set.
*
* @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization)
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
*/
@Override
public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
//For RNN: need to sum up the score over each time step before returning.
if (input == null || labels == null)
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
INDArray preOut = preOutput2d(false, workspaceMgr);
ILossFunction lossFunction = layerConf().getLossFn();
INDArray scoreArray =
lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
layerConf().getActivationFn(), maskArray);
INDArray summedScores = scoreArray.sum(1);
if (fullNetRegTerm != 0.0) {
summedScores.addi(fullNetRegTerm);
}
return summedScores;
}
public class OCNNLossFunction implements ILossFunction {
@Override
public double computeScore(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask, boolean average) {
double wSum = Transforms.pow(getParam(W_KEY),2).sumNumber().doubleValue() * 0.5;
double vSum = Transforms.pow(getParam(V_KEY),2).sumNumber().doubleValue() * 0.5;
org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = (org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf().getLayer();
INDArray rSubPre = preOutput.rsub(getParam(R_KEY).getDouble(0));
INDArray rMeanSub = relu.getActivation(rSubPre,true);
double rMean = rMeanSub.meanNumber().doubleValue();
double rSum = getParam(R_KEY).getDouble(0);
double nuDiv = (1 / ocnnOutputLayer.getNu()) * rMean;
double lastTerm = -rSum;
return (wSum + vSum + nuDiv + lastTerm);
}
@Override
public INDArray computeScoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
INDArray r = getParam(R_KEY).sub(preOutput);
return r;
}
@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
INDArray preAct = preOutput.rsub(getParam(R_KEY).getDouble(0));
INDArray target = relu.backprop(preAct,Nd4j.ones(preOutput.dataType(), preAct.shape())).getFirst();
return target;
}
@Override
public Pair computeGradientAndScore(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask, boolean average) {
//TODO: probably a more efficient way to do this...
return new Pair<>(computeScore(labels, preOutput, activationFn, mask, average),
computeGradient(labels, preOutput, activationFn, mask));
}
@Override
public String name() {
return "OCNNLossFunction";
}
}
}