Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.deeplearning4j.nn.layers.convolution.Convolution1DLayer Maven / Gradle / Ivy
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.layers.convolution;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.RNNFormat;
import org.deeplearning4j.nn.conf.layers.Convolution1D;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.deeplearning4j.util.Convolution1DUtils;
import org.deeplearning4j.util.ConvolutionUtils;
import org.nd4j.common.base.Preconditions;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.PaddingMode;
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
import org.nd4j.linalg.factory.Broadcast;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.common.primitives.Pair;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import java.util.Arrays;
import java.util.List;
public class Convolution1DLayer extends ConvolutionLayer {
public Convolution1DLayer(NeuralNetConfiguration conf, DataType dataType) {
super(conf, dataType);
}
@Override
public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true);
if (epsilon.rank() != 3)
throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
+ " array as epsilon for Convolution1DLayer backprop with shape "
+ Arrays.toString(epsilon.shape())
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
Pair fwd = preOutput(false,true,workspaceMgr);
IActivation afn = layerConf().getActivationFn();
INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params
org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = layerConf();
Conv1DConfig conf = Conv1DConfig.builder()
.k(c.getKernelSize()[0])
.s(c.getStride()[0])
.d(c.getDilation()[0])
.p(c.getPadding()[0])
.dataFormat(Conv1DConfig.NCW)
.paddingMode(ConvolutionUtils.paddingModeForConvolutionMode(convolutionMode))
.build();
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getParam(ConvolutionParamInitializer.WEIGHT_KEY),
RNNFormat.NCW);
INDArray[] inputArrs;
INDArray[] outputArrs;
INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
getRnnDataFormat());
INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
INDArray input = this.input.castTo(dataType);
if(layerConf().getRnnDataFormat() == RNNFormat.NWC) {
input = input.permute(0,2,1); //NHWC to NCHW
}
if(layerConf().hasBias()) {
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
b = b.reshape(b.length());
inputArrs = new INDArray[]{input, w, b, delta};
INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
bg = bg.reshape(bg.length());
outputArrs = new INDArray[]{epsOut, wg, bg};
} else {
inputArrs = new INDArray[]{input, w, delta};
outputArrs = new INDArray[]{epsOut, wg};
}
Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
Nd4j.exec(op);
Gradient retGradient = new DefaultGradient();
if(layerConf().hasBias()) {
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
}
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c');
if (getRnnDataFormat() == RNNFormat.NWC) {
epsOut = epsOut.permute(0, 2, 1);
}
return new Pair<>(retGradient, epsOut);
}
@Override
protected Pair preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr);
INDArray p3d = preOutput.getFirst();
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
preOutput.setFirst(p);
return preOutput;
}
@Override
protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false);
INDArray input = this.input.castTo(dataType);
if(layerConf().getRnnDataFormat() == RNNFormat.NWC) {
if(input.rank() == 3)
input = input.permute(0,2,1); //NHWC to NCHW
else if(input.rank() == 4) {
input = input.permute(0,2,3,1); //NHWC to NCHW
}
}
org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = layerConf();
Conv1DConfig conf = Conv1DConfig.builder()
.k(c.getKernelSize()[0])
.s(c.getStride()[0])
.d(c.getDilation()[0])
.p(c.getPadding()[0])
.dataFormat(Conv1DConfig.NCW)
.paddingMode(ConvolutionUtils.paddingModeForConvolutionMode(convolutionMode))
.build();
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getParam(ConvolutionParamInitializer.WEIGHT_KEY)
,RNNFormat.NCW);
INDArray[] inputs;
if(layerConf().hasBias()) {
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
b = b.reshape(b.length());
inputs = new INDArray[]{input, w, b};
} else {
inputs = new INDArray[]{input, w};
}
Conv1D op = new Conv1D(inputs, null, conf);
List outShape = op.calculateOutputShape();
op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
Nd4j.exec(op);
INDArray output = op.getOutputArgument(0);
if(getRnnDataFormat() == RNNFormat.NWC) {
output = output.permute(0,2,1);
}
return new Pair<>(output, null);
}
@Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
INDArray act4d = super.activate(training, workspaceMgr);
INDArray act3d = act4d.rank() > 3 ?
act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
if(maskArray != null) {
INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst();
Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
act3d.shape(), maskOut.shape());
Broadcast.mul(act3d, maskOut, act3d, 0, 2);
}
return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d); //Should be zero copy most of the time
}
@Override
public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
int minibatchSize) {
INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, layerConf().getKernelSize()[0],
layerConf().getStride()[0], layerConf().getPadding()[0], layerConf().getDilation()[0],
layerConf().getConvolutionMode());
return new Pair<>(reduced, currentMaskState);
}
@Override
public org.deeplearning4j.nn.conf.layers.Convolution1DLayer layerConf() {
return (org.deeplearning4j.nn.conf.layers.Convolution1DLayer) conf().getLayer();
}
private RNNFormat getRnnDataFormat(){
return layerConf().getRnnDataFormat();
}
}