org.apache.sysml.api.dl.CaffeLayer.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.api.dl
import caffe.Caffe.LayerParameter
import scala.collection.JavaConversions._
import org.apache.sysml.parser.LanguageException
import java.util.HashSet
import java.io.File
import org.apache.sysml.api.DMLScript
import org.apache.sysml.runtime.util.ConvolutionUtils
import caffe.Caffe.EltwiseParameter.EltwiseOp
import org.apache.sysml.runtime.DMLRuntimeException;
import java.util.ArrayList
trait CaffeLayer extends BaseDMLGenerator {
// -------------------------------------------------
// Any layer that wants to reuse SystemML-NN has to override following methods that help in generating the DML for the given layer:
def sourceFileName:String;
def init(dmlScript:StringBuilder):Unit;
def forward(dmlScript:StringBuilder, isPrediction:Boolean):Unit;
def backward(dmlScript:StringBuilder, outSuffix:String):Unit;
var computedOutputShape:(String, String, String) = null
def outputShape:(String, String, String) = {
if(computedOutputShape == null) computedOutputShape = bottomLayerOutputShape
computedOutputShape
}
// -------------------------------------------------
var computedBottomLayerOutputShape:(String, String, String) = null
def bottomLayerOutputShape:(String, String, String) = {
if(computedBottomLayerOutputShape == null) {
// Note: if you get org.apache.sysml.parser.LanguageException: Map is null exception
// from org.apache.sysml.api.dl.CaffeNetwork.org$apache$sysml$api$dl$CaffeNetwork$$convertLayerParameterToCaffeLayer
// you are attempting to get traverse the network (for example: bottomLayerOutputShape) before it is created.
val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(ret.size == 0) throw new LanguageException("Expected atleast 1 bottom layer for " + param.getName)
computedBottomLayerOutputShape = ret(0).outputShape
}
computedBottomLayerOutputShape
}
def param:LayerParameter
def id:Int
def net:CaffeNetwork
// --------------------------------------------------------------------------------------
// No need to override these methods in subclasses
// Exception: Only Data layer overrides "out" method to use 'Xb' for consistency
// Naming of the below methods is consistent with the nn library:
// X (feature map from the previous layer) ----> Forward pass ----> out (feature map to the next layer)
// dX (errors to the previous layer) <---- Backward pass <---- dout (errors from the next layer)
def out = "out" + id
var computedX:String = null
def X:String = {
if(computedX == null) {
val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(ret.size == 0) throw new LanguageException("Expected atleast 1 bottom layer for " + param.getName)
else if(ret.size == 1) computedX = ret(0).out
else computedX = sum(new StringBuilder, ret.map(_.out).toList).toString()
}
computedX
}
var computedDout:String = null
def dout: String = {
if(computedDout == null) {
val ret = net.getTopLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(ret.size == 0) throw new LanguageException("Expected atleast 1 top layer for " + param.getName)
else if(ret.size == 1) computedDout = ret(0).dX(id)
else computedDout = sum(new StringBuilder, ret.map(_.dX(id)).toList).toString()
}
computedDout
}
def dX(bottomLayerID:Int) = "dOut" + id + "_" + bottomLayerID
// --------------------------------------------------------------------------------------
// No need to override these methods in subclasses, instead classes that have weights and biases
// should implement HasWeight and HasBias traits.
def dWeight():String = throw new DMLRuntimeException("dWeight is not implemented in super class")
def dBias():String = throw new DMLRuntimeException("dBias is not implemented in super class")
def weight():String = null;
def weightShape():Array[Int];
def bias():String = null;
def biasShape():Array[Int];
def shouldUpdateWeight():Boolean = if(weight != null) true else false
def shouldUpdateBias():Boolean = if(bias != null) true else false
// --------------------------------------------------------------------------------------
// Helper methods to simplify the code of subclasses
def invokeInit(dmlScript:StringBuilder, returnVariables:List[String], arguments:String*):Unit = {
invoke(dmlScript, sourceFileName + "::", returnVariables, "init", arguments.toList)
}
def invokeForward(dmlScript:StringBuilder, returnVariables:List[String], arguments:String*):Unit = {
invoke(dmlScript, sourceFileName + "::", returnVariables, "forward", arguments.toList)
}
// -----------------------------------------------------------------------------------
// All the layers (with the exception of Concat) call one of the below methods in the backward function.
// The preceding layer expects that 'dX(bottomLayerID) + outSuffix' is assigned.
// l1 <--- dX(1) <-----|
// |-- [current layer: dOut3 (computed by backward)] <---- "dOut" + id + outSuffix
// l2 <--- dX(2) <-----|
// The below functions perform two functions:
// 1. Compute backward: either call dml file's backward (for example: invokeBackward) or just propagate next layers errors (assignDoutToDX)
// 2. Then make sure that all the preceding layer get the errors using:
// bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + "dOut" + id + outSuffix + "; "))
// The layers that have a corresponding dml script call this method.
// Assumption: the first variable of resultVariables is always dX
def invokeBackward(dmlScript:StringBuilder, outSuffix:String, resultVariables:List[String], arguments:String*):Unit = {
invoke(dmlScript, sourceFileName + "::", resultVariables.map(_ + outSuffix), "backward", arguments.toList, false)
val bottomLayerIDs = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l).id)
dmlScript.append("; ")
bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + resultVariables(0) + outSuffix + "; "))
dmlScript.append("\n")
}
// On-the-fly layers (such as Scale and Elementwise) call this function to propagate next layers errors to the previous layer
def assignDoutToDX(dmlScript:StringBuilder, outSuffix:String):Unit = {
dmlScript.append("dOut" + id + outSuffix + " = " + dout)
val bottomLayerIDs = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l).id)
dmlScript.append("; ")
bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + "dOut" + id + outSuffix + "; "))
dmlScript.append("\n")
}
// --------------------------------------------------------------------------------------
}
trait IsLossLayer extends CaffeLayer {
def computeLoss(dmlScript:StringBuilder, numTabs:Int):Unit
}
trait HasWeight extends CaffeLayer {
override def weight = param.getName + "_weight"
override def dWeight = param.getName + "_dWeight"
}
trait HasBias extends CaffeLayer {
override def bias = param.getName + "_bias"
override def dBias = param.getName + "_dBias"
}
class Data(val param:LayerParameter, val id:Int, val net:CaffeNetwork, val numChannels:String, val height:String, val width:String) extends CaffeLayer {
// -------------------------------------------------
override def sourceFileName = null
override def init(dmlScript:StringBuilder) = {
if(param.hasTransformParam && param.getTransformParam.hasScale) {
dmlScript.append("X_full = X_full * " + param.getTransformParam.getScale + "\n")
}
if(param.hasDataParam && param.getDataParam.hasBatchSize) {
dmlScript.append("BATCH_SIZE = " + param.getDataParam.getBatchSize + "\n")
}
else {
Caffe2DML.LOG.debug("Using default batch size of 64 as batch size is not set with DataParam")
dmlScript.append("BATCH_SIZE = 64\n")
}
}
var dataOutputShape = ("$num_channels", "$height", "$width")
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = { }
override def out = "Xb"
override def backward(dmlScript:StringBuilder, outSuffix:String) = { }
override def outputShape = (numChannels, height, width)
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
// -------------------------------------------------
}
// ------------------------------------------------------------------
// weight is ema_mean and bias is ema_var
// Fuse
class BatchNorm(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
// val scale =
override def sourceFileName = "batch_norm2d"
/*
* Initialize the parameters of this layer.
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
* Inputs:
* - C: Number of input channels (dimensionality of input depth).
*
* Outputs:
* - gamma: Scale parameters, of shape (C, 1).
* - beta: Shift parameters, of shape (C, 1).
* - ema_mean: Exponential moving average of the mean, of
* shape (C, 1).
* - ema_var: Exponential moving average of the variance, of
* shape (C, 1).
*/
override def init(dmlScript:StringBuilder) = invokeInit(dmlScript, List[String](gamma, beta, ema_mean, ema_var), numChannels)
var update_mean_var = true
/*
* Computes the forward pass for a 2D (spatial) batch normalization
* layer. The input data has N examples, each represented as a 3D
* volume unrolled into a single vector.
*
* A spatial batch normalization layer uses the per-channel sample
* mean and per-channel uncorrected sample variance during training
* to normalize each channel of the input data. Additionally, it
* introduces learnable parameters (gamma, beta) to control the
* amount of normalization.
*
* `y = ((x-mean) / sqrt(var+eps)) * gamma + beta`
*
* This implementation maintains exponential moving averages of the
* mean and variance during training for use during testing.
*
* Reference:
* - Batch Normalization: Accelerating Deep Network Training by
* Reducing Internal Covariate Shift, S. Ioffe & C. Szegedy, 2015
* - https://arxiv.org/abs/1502.03167
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - gamma: Scale parameters, of shape (C, 1).
* - beta: Shift parameters, of shape (C, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
* - mode: 'train' or 'test' to indicate if the model is currently
* being trained or tested. During training, the current batch
* mean and variance will be used to normalize the inputs, while
* during testing, the exponential average of the mean and
* variance over all previous batches will be used.
* - ema_mean: Exponential moving average of the mean, of
* shape (C, 1).
* - ema_var: Exponential moving average of the variance, of
* shape (C, 1).
* - mu: Momentum value for moving averages.
* Typical values are in the range of [0.9, 0.999].
* - epsilon: Smoothing term to avoid divide by zero errors.
* Typical values are in the range of [1e-5, 1e-3].
*
* Outputs:
* - out: Outputs, of shape (N, C*Hin*Win).
* - ema_mean_upd: Updated exponential moving average of the mean,
* of shape (C, 1).
* - ema_var_upd: Updated exponential moving average of the variance,
* of shape (C, 1).
* - cache_mean: Cache of the batch mean, of shape (C, 1).
* Note: This is used for performance during training.
* - cache_var: Cache of the batch variance, of shape (C, 1).
* Note: This is used for performance during training.
* - cache_norm: Cache of the normalized inputs, of
* shape (C, N*Hin*Win). Note: This is used for performance
* during training.
*/
def forward(dmlScript: StringBuilder, isPrediction: Boolean): Unit = {
val mode = if(isPrediction) "\"test\"" else "\"train\""
invokeForward(dmlScript, List[String](out, withSuffix(ema_mean), withSuffix(ema_var), withSuffix(cache_mean), withSuffix(cache_var), withSuffix(cache_norm)),
X, gamma, beta, numChannels, Hin, Win, mode, ema_mean, ema_var, ma_fraction, eps)
}
/*
* Computes the backward pass for a 2D (spatial) batch normalization
* layer.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of shape (N, C*Hin*Win).
* - out: Outputs from the forward pass, of shape (N, C*Hin*Win).
* - ema_mean_upd: Updated exponential moving average of the mean
* from the forward pass, of shape (C, 1).
* - ema_var_upd: Updated exponential moving average of the variance
* from the forward pass, of shape (C, 1).
* - cache_mean: Cache of the batch mean from the forward pass, of
* shape (C, 1). Note: This is used for performance during
* training.
* - cache_var: Cache of the batch variance from the forward pass,
* of shape (C, 1). Note: This is used for performance during
* training.
* - cache_norm: Cache of the normalized inputs from the forward
* pass, of shape (C, N*Hin*Win). Note: This is used for
* performance during training.
* - X: Input data matrix to the forward pass, of
* shape (N, C*Hin*Win).
* - gamma: Scale parameters, of shape (C, 1).
* - beta: Shift parameters, of shape (C, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
* - mode: 'train' or 'test' to indicate if the model is currently
* being trained or tested. During training, the current batch
* mean and variance will be used to normalize the inputs, while
* during testing, the exponential average of the mean and
* variance over all previous batches will be used.
* - ema_mean: Exponential moving average of the mean, of
* shape (C, 1).
* - ema_var: Exponential moving average of the variance, of
* shape (C, 1).
* - mu: Momentum value for moving averages.
* Typical values are in the range of [0.9, 0.999].
* - epsilon: Smoothing term to avoid divide by zero errors.
* Typical values are in the range of [1e-5, 1e-3].
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
* - dgamma: Gradient wrt `W`, of shape (C, 1).
* - dbeta: Gradient wrt `b`, of shape (C, 1).
*
*/
def backward(dmlScript: StringBuilder, outSuffix:String): Unit = {
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dgamma, dbeta), dout, out, ema_mean, ema_var, cache_mean, cache_var, cache_norm, X, gamma, beta, numChannels,
Hin, Win, "\"train\"", ema_mean, ema_var, ma_fraction, eps)
}
private def withSuffix(str:String):String = if(update_mean_var) str else str + "_ignore"
override def weight = "ema_mean" + id
override def weightShape():Array[Int] = Array(numChannels.toInt, 1)
override def biasShape():Array[Int] = Array(numChannels.toInt, 1)
override def bias = "ema_var" + id
def cache_mean(): String = "cache_mean" + id
def cache_var():String = "cache_mean" + id
def cache_norm():String = "cache_norm" + id
var scaleLayer:Scale = null
def gamma():String = { checkNextLayer(); scaleLayer.weight }
def ma_fraction():String = if(param.getBatchNormParam.hasMovingAverageFraction()) param.getBatchNormParam.getMovingAverageFraction.toString else "0.999"
def eps():String = if(param.getBatchNormParam.hasEps()) param.getBatchNormParam.getEps.toString else "1e-5"
def beta():String = { checkNextLayer(); scaleLayer.bias }
def dgamma():String = { checkNextLayer(); scaleLayer.dWeight }
def dbeta():String = { checkNextLayer(); scaleLayer.dBias }
override def shouldUpdateWeight():Boolean = false
override def shouldUpdateBias():Boolean = false
def ema_mean(): String = weight
def ema_var(): String = bias
def checkNextLayer(): Unit = {
if(scaleLayer == null) {
val topLayers = net.getTopLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(topLayers.length != 1 && !topLayers(0).isInstanceOf[Scale]) throw new LanguageException("Only one top layer of type Scale allowed for BatchNorm")
scaleLayer = topLayers(0).asInstanceOf[Scale]
}
}
def numChannels = bottomLayerOutputShape._1
def Hin = bottomLayerOutputShape._2
def Win = bottomLayerOutputShape._3
}
// weight is gamma and bias is beta
class Scale(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
if(!param.getScaleParam.getBiasTerm) throw new LanguageException("Add \"scale_param { bias_term: true }\" to the layer " + param.getName)
override def sourceFileName = null
override def init(dmlScript: StringBuilder): Unit = {}
// TODO: Generalize this !!
def forward(dmlScript: StringBuilder, isPrediction: Boolean): Unit = assign(dmlScript, out, X)
override def backward(dmlScript: StringBuilder, outSuffix:String): Unit = assignDoutToDX(dmlScript, outSuffix)
override def weightShape():Array[Int] = Array(bottomLayerOutputShape._1.toInt, 1)
override def biasShape():Array[Int] = Array(bottomLayerOutputShape._1.toInt, 1)
}
// ------------------------------------------------------------------
class Elementwise(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
override def sourceFileName = null
override def init(dmlScript: StringBuilder): Unit = {}
if(param.getEltwiseParam.hasOperation && param.getEltwiseParam.getOperation != EltwiseOp.SUM)
throw new LanguageException("Currently only elementwise sum operation supported")
override def forward(dmlScript: StringBuilder, isPrediction: Boolean): Unit = {
addAndAssign(dmlScript, out, param.getBottomList.map(b => net.getCaffeLayer(b).out).toList)
}
override def backward(dmlScript: StringBuilder, outSuffix:String): Unit = assignDoutToDX(dmlScript, outSuffix)
override def outputShape = {
if(_out == null) _out = net.getCaffeLayer(net.getBottomLayers(param.getName).take(1).toSeq.get(0)).outputShape
_out
}
var _out:(String, String, String) = null
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
}
class Concat(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
override def sourceFileName = null
override def init(dmlScript: StringBuilder): Unit = {}
var _childLayers:List[CaffeLayer] = null
// Utility function to create string of format:
// fn(fn(fn(_childLayers(0).out, _childLayers(1).out), _childLayers(2).out), ...)
// This is useful because we do not support multi-input cbind and rbind in DML.
def _getMultiFn(fn:String):String = {
if(_childLayers == null) _childLayers = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
var tmp = fn + "(" + _childLayers(0).out + ", " + _childLayers(1).out + ")"
for(i <- 2 until _childLayers.size) {
tmp = fn + "(" + tmp + ", " + _childLayers(i).out + ")"
}
tmp
}
/*
* Computes the forward pass for a concatenation layer.
*
* Inputs:
* - n_i * c_i * h * w for each input blob i from 1 to K.
*
* Outputs:
* - out: Outputs, of shape
* - if axis = 0: (n_1 + n_2 + ... + n_K) * c_1 * h * w, and all input c_i should be the same.
* - if axis = 1: n_1 * (c_1 + c_2 + ... + c_K) * h * w, and all input n_i should be the same.
*/
override def forward(dmlScript: StringBuilder, isPrediction: Boolean): Unit = {
if(param.getConcatParam.getAxis == 0) {
// rbind the inputs
assign(dmlScript, out, _getMultiFn("rbind"))
}
else if(param.getConcatParam.getAxis == 1) {
// cbind the inputs
assign(dmlScript, out, _getMultiFn("cbind"))
}
else {
throw new DMLRuntimeException("Incorrect axis parameter for the layer " + param.getName)
}
}
def startIndex(outSuffix:String):String = "concat_start_index_" + outSuffix
def endIndex(outSuffix:String):String = "concat_start_index_" + outSuffix
def getConcatIndex(bottomLayerOut:String, outSuffix:String):String =
startIndex(outSuffix) + " = " + endIndex(outSuffix) + " + 1; " +
endIndex(outSuffix) + " = " + startIndex(outSuffix) + " + nrow(" + bottomLayerOut + "); "
/*
* Computes the backward pass for a concatenation layer.
*
* The top gradients are deconcatenated back to the inputs.
*
*/
override def backward(dmlScript: StringBuilder, outSuffix:String): Unit = {
val bottomLayers = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
val dOutVar = "dOut" + id + outSuffix
// concat_end_index = 0
dmlScript.append(dOutVar + " = " + dout + "; concat_end_index" + outSuffix + " = 0; ")
val indexString = "concat_start_index" + outSuffix + " : concat_end_index" + outSuffix
val doutVarAssignment = if(param.getConcatParam.getAxis == 0) " = " + dOutVar + "[" + indexString + ", ]; "
else " = " + dOutVar + "[," + indexString + " ]; "
// concat_start_index = concat_end_index + 1
// concat_end_index = concat_start_index + $$ - 1
val initializeIndexString = "concat_start_index" + outSuffix + " = concat_end_index" + outSuffix + " + 1; concat_end_index" + outSuffix +
" = concat_start_index" + outSuffix + " + $$ - 1; "
if(param.getConcatParam.getAxis == 0) {
bottomLayers.map(l => {
dmlScript.append(initializeIndexString.replaceAll("$$", nrow(l.out)))
// X1 = Z[concat_start_index:concat_end_index,]
.append( dX(l.id) + outSuffix + doutVarAssignment)
})
}
else {
bottomLayers.map(l => {
dmlScript.append(initializeIndexString.replaceAll("$$", int_mult(l.outputShape._1, l.outputShape._2, l.outputShape._3) ))
// X1 = Z[concat_start_index:concat_end_index,]
.append( dX(l.id) + outSuffix + doutVarAssignment)
})
}
dmlScript.append("\n")
}
def sumChannels():String = {
val channels = _childLayers.map(_.outputShape._1)
try {
channels.reduce((c1, c2) => (c1.toInt + c2.toInt).toString())
}
catch {
case _:Throwable => sum(new StringBuilder, channels).toString
}
}
override def outputShape = {
if(_out == null) {
if(_childLayers == null) _childLayers = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(param.getConcatParam.getAxis == 0) {
_out = _childLayers(0).outputShape
}
else if(param.getConcatParam.getAxis == 1) {
_out = (sumChannels(), _childLayers(0).outputShape._2, _childLayers(0).outputShape._3)
}
else {
throw new DMLRuntimeException("Incorrect axis parameter for the layer " + param.getName)
}
}
_out
}
var _out:(String, String, String) = null
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
}
class SoftmaxWithLoss(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with IsLossLayer {
// -------------------------------------------------
override def sourceFileName = if(!isSegmentationProblem()) "softmax" else "softmax2d"
override def init(dmlScript:StringBuilder) = {}
def isSegmentationProblem():Boolean = {
try {
return outputShape._2.toInt != 1 && outputShape._3.toInt != 1
} catch {
case _:Throwable => throw new RuntimeException("Cannot infer the output dimensions:" + outputShape)
}
}
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = {
if(!isSegmentationProblem()) {
invokeForward(dmlScript, List[String](out), scores)
}
else {
invokeForward(dmlScript, List[String](out), scores, outputShape._1)
}
}
override def backward(dmlScript:StringBuilder, outSuffix:String) = {
if(!isSegmentationProblem()) {
invoke(dmlScript, "cross_entropy_loss::", List[String]("dProbs" + outSuffix), "backward", false, out, "yb")
dmlScript.append("; ")
invoke(dmlScript, "softmax::", List[String]("dOut" + id + outSuffix), "backward", false, "dProbs", scores)
val bottomLayerIDs = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l).id)
dmlScript.append("; ")
bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + "dOut" + id + outSuffix + "; "))
dmlScript.append("\n")
}
else {
throw new RuntimeException("backward for SoftmaxWithLoss is not implemented for segmentation problem")
}
}
override def computeLoss(dmlScript:StringBuilder, numTabs:Int) = {
if(!isSegmentationProblem()) {
val tabBuilder = new StringBuilder
for(i <- 0 until numTabs) tabBuilder.append("\t")
val tabs = tabBuilder.toString
dmlScript.append("tmp_loss = cross_entropy_loss::forward(" + commaSep(out, "yb") + ")\n")
dmlScript.append(tabs).append("loss = loss + tmp_loss\n")
dmlScript.append(tabs).append("true_yb = rowIndexMax(yb)\n")
dmlScript.append(tabs).append("predicted_yb = rowIndexMax(" + out + ")\n")
dmlScript.append(tabs).append("accuracy = mean(predicted_yb == true_yb)*100\n")
}
else {
throw new RuntimeException("Computation of loss for SoftmaxWithLoss is not implemented for segmentation problem")
}
}
def scores():String = {
val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
if(ret.size == 1) return ret.get(0).out
else if(ret.size == 2) {
val ret1 = if(!ret.get(0).out.equals("Xb")) ret.get(0).out else "";
val ret2 = if(!ret.get(1).out.equals("Xb")) ret.get(1).out else "";
if(!ret1.equals("") && !ret2.equals("")) throw new LanguageException("Atleast one of the output of previous layer should be Xb")
else if(!ret1.equals("")) return ret1
else return ret2
}
else
throw new LanguageException("More than 2 bottom layers is not supported")
}
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
// -------------------------------------------------
override def bottomLayerOutputShape:(String, String, String) = {
if(computedBottomLayerOutputShape == null) {
val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).filter(l => !l.isInstanceOf[Data]).toList
if(ret.size != 1) throw new LanguageException("Expected exactly 1 bottom non-Data layer for " + param.getName)
computedBottomLayerOutputShape = ret(0).outputShape
}
computedBottomLayerOutputShape
}
}
class ReLU(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
// TODO: Leaky ReLU: negative_slope [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0.
// -------------------------------------------------
override def sourceFileName = "relu"
override def init(dmlScript:StringBuilder) = { }
/*
* Computes the forward pass for a ReLU nonlinearity layer.
*
* Performs an element-wise evaluation of `f(input) = max(0, input)`.
*
* Inputs:
* - X: Inputs, of shape (any, any).
*
* Outputs:
* - out: Outputs, of same shape as `X`.
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = invokeForward(dmlScript, List[String](out), X)
/*
* Computes the backward pass for a ReLU nonlinearity layer.
*
* Essentially performs a pass-through of the upstream gradient
* for cells > 0.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of same shape as `X`.
* - X: Previous input data matrix, of shape (any, any).
*
* Outputs:
* - dX: Gradient wrt `X`, of same shape as `X`.
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) = invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id), dout, X)
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
// -------------------------------------------------
}
class Softmax(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
// -------------------------------------------------
override def sourceFileName = "softmax"
override def init(dmlScript:StringBuilder) = { }
/*
* Computes the forward pass for a softmax classifier. The inputs
* are interpreted as unnormalized, log-probabilities for each of
* N examples, and the softmax function transforms them to normalized
* probabilities.
*
* This can be interpreted as a generalization of the sigmoid
* function to multiple classes.
*
* `probs_ij = e^scores_ij / sum(e^scores_i)`
*
* Inputs:
* - scores: Inputs, of shape (N, D).
*
* Outputs:
* - probs: Outputs, of shape (N, D).
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = invokeForward(dmlScript, List[String](out), X)
/*
* Computes the backward pass for a softmax classifier.
*
* Note that dscores_ij has multiple source branches:
*
* ```
* dprobs_ij/dscores_ij = probs_ij * (1 - probs_ij)
* dprobs_ik/dscores_ij = -probs_ik * probs_ij, for all k != j
*
* dloss/dscores_ij =
* (dloss/dprobs_ij * dprobs_ij/dscores_ij)
* + sum_{k!=j}(dloss/dprobs_ik * dprobs_ik/dscores_ij)
* ```
*
* Inputs:
* - dprobs: Gradient wrt `probs` from upstream, of shape (N, D).
* - scores: Inputs, of shape (N, D).
*
* Outputs:
* - dscores: Gradient wrt `scores`, of shape (N, D).
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) = invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id), dout, X)
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
// -------------------------------------------------
}
class Threshold(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
override def sourceFileName = null
override def init(dmlScript:StringBuilder) = { }
val threshold = if(param.getThresholdParam.hasThreshold) param.getThresholdParam.getThreshold else 0
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = assign(dmlScript, out, X + " > " + threshold)
override def backward(dmlScript:StringBuilder, outSuffix:String) = throw new DMLRuntimeException("Backward operation for Threshold layer is not supported.")
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
}
class Dropout(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
// -------------------------------------------------
override def sourceFileName = "dropout"
override def init(dmlScript:StringBuilder) = { }
/*
* Computes the forward pass for an inverted dropout layer.
*
* Drops the inputs element-wise with a probability p, and divides
* by p to maintain the expected values of those inputs (which are
* the outputs of neurons) at test time.
*
* Inputs:
* - X: Inputs, of shape (any, any).
* - p: Probability of keeping a neuron output.
* - seed: [Optional: -1] Random number generator seed to allow for
* deterministic evaluation. Set to -1 for a random seed.
*
* Outputs:
* - out: Outputs, of same shape as `X`.
* - mask: Dropout mask used to compute the output.
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) =
if(!isPrediction)
invokeForward(dmlScript, List[String](out, mask), X, p, seed)
else
assign(dmlScript, out, X) // Forward-pass not required to be performed during prediction for Dropout layer
/*
* Computes the backward pass for an inverted dropout layer.
*
* Applies the mask to the upstream gradient, and divides by p to
* maintain the expected values at test time.
*
* Inputs:
* - dout: Gradient wrt `out`, of same shape as `X`.
* - X: Inputs, of shape (any, any).
* - p: Probability of keeping a neuron output.
* - mask: Dropout mask used to compute the output.
*
* Outputs:
* - dX: Gradient wrt `X`, of same shape as `X`.
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) = invokeBackward(dmlScript, outSuffix,
List[String]("dOut" + id), dout, X, p, mask)
// -------------------------------------------------
def mask = "mask" + id
// dropout ratio
def p = if(param.getDropoutParam.hasDropoutRatio()) param.getDropoutParam.getDropoutRatio.toString else "0.5"
def seed = "-1"
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
}
class InnerProduct(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
// -------------------------------------------------
// TODO: bias_filler [default type: 'constant' value: 0]; bias_term [default true]: specifies whether to learn and apply a set of additive biases to the filter outputs
override def sourceFileName = "affine"
/*
* Initialize the parameters of this layer.
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
* We use the heuristic by He et al., which limits the magnification
* of inputs/gradients during forward/backward passes by scaling
* unit-Gaussian weights by a factor of sqrt(2/n), under the
* assumption of relu neurons.
* - http://arxiv.org/abs/1502.01852
*
* Inputs:
* - D: Dimensionality of the input features (number of features).
* - M: Number of neurons in this layer.
*
* Outputs:
* - W: Weights, of shape (D, M).
* - b: Biases, of shape (1, M).
*/
override def init(dmlScript:StringBuilder) = invokeInit(dmlScript, List[String](weight, bias), numFeatures, numNeurons)
/*
* Computes the forward pass for an affine (fully-connected) layer
* with M neurons. The input data has N examples, each with D
* features.
*
* Inputs:
* - X: Inputs, of shape (N, D).
* - W: Weights, of shape (D, M).
* - b: Biases, of shape (1, M).
*
* Outputs:
* - out: Outputs, of shape (N, M).
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) =
invokeForward(dmlScript, List[String](out), X, weight, bias)
/*
* Computes the backward pass for a fully-connected (affine) layer
* with M neurons.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of shape (N, M).
* - X: Inputs, of shape (N, D).
* - W: Weights, of shape (D, M).
* - b: Biases, of shape (1, M).
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, D).
* - dW: Gradient wrt `W`, of shape (D, M).
* - db: Gradient wrt `b`, of shape (1, M).
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) =
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, X, weight, bias)
// -------------------------------------------------
// num_output (c_o): the number of filters
def numNeurons = param.getInnerProductParam.getNumOutput.toString
def numFeatures = int_mult(bottomLayerOutputShape._1, bottomLayerOutputShape._2, bottomLayerOutputShape._3)
// n * c_o * 1 * 1
override def outputShape = ( param.getInnerProductParam.getNumOutput.toString, "1", "1" )
override def weightShape():Array[Int] = Array(numFeatures.toInt, numNeurons.toInt)
override def biasShape():Array[Int] = Array(1, numNeurons.toInt)
}
class MaxPooling(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer {
// -------------------------------------------------
override def sourceFileName = "max_pool2d_builtin"
override def init(dmlScript:StringBuilder) = {}
/*
* Computes the forward pass for a 2D spatial max pooling layer.
* The input data has N examples, each represented as a 3D volume
* unrolled into a single vector.
*
* This implementation uses a built-in operator for higher
* performance.
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* A typical value is 0.
* - padw: Padding for left and right sides.
* A typical value is 0.
*
* Outputs:
* - out: Outputs, of shape (N, C*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) =
invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id),
X, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
/*
* Computes the backward pass for a 2D spatial max pooling layer.
* The input data has N examples, each represented as a 3D volume
* unrolled into a single vector.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of
* shape (N, C*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
* - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* A typical value is 0.
* - padw: Padding for left and right sides.
* A typical value is 0.
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) =
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id), dout, Hout, Wout, X, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
// n * c * h_o * w_o, where h_o and w_o are computed in the same way as convolution.
override def outputShape = ( numChannels, Hout, Wout )
// -------------------------------------------------
def Hin = bottomLayerOutputShape._2
def Win = bottomLayerOutputShape._3
def Hout = ConvolutionUtils.getConv2dOutputMap(bottomLayerOutputShape._2, kernel_h, stride_h, pad_h)
def Wout = ConvolutionUtils.getConv2dOutputMap(bottomLayerOutputShape._3, kernel_w, stride_w, pad_w)
def poolingParam = param.getPoolingParam
def numChannels = bottomLayerOutputShape._1
// kernel_size (or kernel_h and kernel_w): specifies height and width of each filter
def kernel_h = if(poolingParam.hasKernelH) poolingParam.getKernelH.toString
else poolingParam.getKernelSize.toString
def kernel_w = if(poolingParam.hasKernelW) poolingParam.getKernelW.toString
else poolingParam.getKernelSize.toString
// stride (or stride_h and stride_w) [default 1]: specifies the intervals at which to apply the filters to the input
def stride_h = if(poolingParam.hasStrideH) poolingParam.getStrideH.toString
else if(poolingParam.hasStride) poolingParam.getStride.toString
else "1"
def stride_w = if(poolingParam.hasStrideW) poolingParam.getStrideW.toString
else if(poolingParam.hasStride) poolingParam.getStride.toString
else "1"
// pad (or pad_h and pad_w) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
def pad_h = if(poolingParam.hasPadH) poolingParam.getPadH.toString
else if(poolingParam.hasPad) poolingParam.getPad.toString
else "0"
def pad_w = if(poolingParam.hasPadW) poolingParam.getPadW.toString
else if(poolingParam.hasPad) poolingParam.getPad.toString
else "0"
override def weightShape():Array[Int] = null
override def biasShape():Array[Int] = null
}
class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
def isDepthWise():Boolean = {
if(param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1 && numChannels.toInt % param.getConvolutionParam.getGroup != 0)
throw new DMLRuntimeException("The number of groups=" + param.getConvolutionParam.getGroup + " is not supported as it is not divisible by number of channels" + numChannels + ".")
param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1
}
def depthMultiplier():String = if(isDepthWise) (numChannels.toInt / param.getConvolutionParam.getGroup).toString else throw new DMLRuntimeException("Incorrect usage of depth")
// -------------------------------------------------
override def sourceFileName = if(isDepthWise) "conv2d_builtin_depthwise" else "conv2d_builtin"
/*
* Initialize the parameters of this layer.
*
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
* We use the heuristic by He et al., which limits the magnification
* of inputs/gradients during forward/backward passes by scaling
* unit-Gaussian weights by a factor of sqrt(2/n), under the
* assumption of relu neurons.
* - http://arxiv.org/abs/1502.01852
*
* Inputs without depthwise:
* - F: Number of filters.
* - C: Number of input channels (dimensionality of depth).
* - Hf: Filter height.
* - Wf: Filter width.
*
* Inputs with depthwise:
* - C: Number of input channels (dimensionality of depth).
* - M: Number of filters per input channel (i.e. depth multiplier).
* - Hf: Filter height.
* - Wf: Filter width.
*
* Outputs:
* - W: Weights, of shape (F, C*Hf*Wf).
* - b: Biases, of shape (F, 1).
*/
override def init(dmlScript:StringBuilder) = {
if(isDepthWise)
invokeInit(dmlScript, List[String](weight, bias), numChannels, depthMultiplier, kernel_h, kernel_w)
else
invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
}
/*
* Computes the forward pass for a 2D spatial convolutional layer with
* F filters. The input data has N examples, each represented as a 3D
* volume unrolled into a single vector.
*
* This implementation uses a built-in operator for higher
* performance.
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - W: Weights, of shape (F, C*Hf*Wf).
* - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* (only for depthwise) - M: Number of filters per input channel (i.e. depth multiplier).
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* For same output height as input, set `padh = (Hf - 1) / 2`,
* assuming `strideh = 1`.
* More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
* preserves the spatial dimensions of the input.
* - padw: Padding for left and right sides.
* For same output width as input, set `padw = (Wf - 1) / 2`,
* assuming `stridew = 1`.
* More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
* preserves the spatial dimensions of the input.
*
* Outputs:
* - out: Outputs, of shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
*/
override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = {
if(isDepthWise)
invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id),
X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
else
invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id),
X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
}
/*
* Computes the backward pass for a 2D spatial convolutional layer
* with F filters.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of
* shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
* - X: Inputs, of shape (N, C*Hin*Win).
* - W: Weights, of shape (F, C*Hf*Wf).
* - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* (only for depthwise) - M: Number of filters per input channel (i.e. depth multiplier).
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* For same output height as input, set `padh = (Hf - 1) / 2`,
* assuming `strideh = 1`.
* More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
* preserves the spatial dimensions of the input.
* - padw: Padding for left and right sides.
* For same output width as input, set `padw = (Wf - 1) / 2`,
* assuming `stridew = 1`.
* More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
* preserves the spatial dimensions of the input.
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
* - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
* - db: Gradient wrt `b`, of shape (F, 1).
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) = {
if(isDepthWise)
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
else
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
}
// if not depthwise, n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
// else (N, C*M*Hout*Wout)
override def outputShape = {
if(isDepthWise) ( (numChannels.toInt*depthMultiplier.toInt).toString, Hout, Wout )
else ( numKernels, Hout, Wout )
}
// -------------------------------------------------
def numChannels = bottomLayerOutputShape._1
def Hin = bottomLayerOutputShape._2
def Win = bottomLayerOutputShape._3
def Hout = ConvolutionUtils.getConv2dOutputMap(bottomLayerOutputShape._2, kernel_h, stride_h, pad_h)
def Wout = ConvolutionUtils.getConv2dOutputMap(bottomLayerOutputShape._3, kernel_w, stride_w, pad_w)
// -------------------------------------------------
def convParam = param.getConvolutionParam
// if depthwise (C, M*Hf*Wf) else (F, C*Hf*Wf)
override def weightShape():Array[Int] = {
if(isDepthWise) Array(numChannels.toInt, int_mult(depthMultiplier, kernel_h, kernel_w).toInt)
else Array(numKernels.toInt, int_mult(numChannels, kernel_h, kernel_w).toInt)
}
// if depthwise (C*M, 1) else (F, 1)
override def biasShape():Array[Int] = {
if(isDepthWise) Array(numChannels.toInt*depthMultiplier.toInt, 1)
else Array(numKernels.toInt, 1)
}
// num_output (c_o): the number of filters
def numKernels = convParam.getNumOutput.toString
// kernel_size (or kernel_h and kernel_w): specifies height and width of each filter
def kernel_h = if(convParam.hasKernelH) convParam.getKernelH.toString
else if(convParam.getKernelSizeCount > 0) convParam.getKernelSize(0).toString
else throw new LanguageException("Incorrect kernel parameters")
def kernel_w = if(convParam.hasKernelW) convParam.getKernelW.toString
else if(convParam.getKernelSizeCount > 0) convParam.getKernelSize(0).toString
else throw new LanguageException("Incorrect kernel parameters")
// stride (or stride_h and stride_w) [default 1]: specifies the intervals at which to apply the filters to the input
def stride_h = if(convParam.hasStrideH) convParam.getStrideH.toString
else if(convParam.getStrideCount > 0) convParam.getStride(0).toString
else "1"
def stride_w = if(convParam.hasStrideW) convParam.getStrideW.toString
else if(convParam.getStrideCount > 0) convParam.getStride(0).toString
else "1"
// pad (or pad_h and pad_w) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
def pad_h = if(convParam.hasPadH) convParam.getPadH.toString
else if(convParam.getPadCount > 0) convParam.getPad(0).toString
else "0"
def pad_w = if(convParam.hasPadW) convParam.getPadW.toString
else if(convParam.getPadCount > 0) convParam.getPad(0).toString
else "0"
}
class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
def isDepthWise():Boolean = {
if(param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1 && numChannels.toInt % param.getConvolutionParam.getGroup != 0)
throw new DMLRuntimeException("The number of groups=" + param.getConvolutionParam.getGroup + " is not supported as it is not divisible by number of channels" + numChannels + ".")
param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1
}
def depthMultiplier():String = if(isDepthWise) (numChannels.toInt / param.getConvolutionParam.getGroup).toString else throw new DMLRuntimeException("Incorrect usage of depth")
override def sourceFileName: String = if(isDepthWise) "conv2d_transpose_depthwise" else "conv2d_transpose"
/*
* Utility function to initialize the parameters of this layer.
*
* We use the heuristic by He et al., which limits the magnification
* of inputs/gradients during forward/backward passes by scaling
* unit-Gaussian weights by a factor of sqrt(2/n), under the
* assumption of relu neurons.
* - http://arxiv.org/abs/1502.01852
*
* Inputs without depthwise:
* - F: Number of filters.
* - C: Number of input channels (dimensionality of depth).
* - Hf: Filter height.
* - Wf: Filter width.
*
* Inputs with depthwise:
* - C: Number of input channels (dimensionality of depth).
* - M: Depth of each filter (C must be divisible by M).
* - Hf: Filter height.
* - Wf: Filter width.
*
* Outputs:
* - W: Weights, of shape (C, F*Hf*Wf).
* - b: Biases, of shape (F, 1).
*/
override def init(dmlScript: StringBuilder): Unit = {
if(isDepthWise)
invokeInit(dmlScript, List[String](weight, bias), numChannels, depthMultiplier, kernel_h, kernel_w)
else
invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
}
private def C_DivideBy_M():Int = numChannels.toInt / depthMultiplier.toInt
// if depthwise (C/M, M*Hf*Wf), else (C, F*Hf*Wf)
override def weightShape():Array[Int] = {
if(isDepthWise)
Array(C_DivideBy_M, int_mult(depthMultiplier, kernel_h, kernel_w).toInt)
else
Array(numChannels.toInt, int_mult(numKernels, kernel_h, kernel_w).toInt)
}
// if depthwise (C/M, 1), else (F, 1)
override def biasShape():Array[Int] = {
if(isDepthWise)
Array(C_DivideBy_M, 1)
else
Array(numKernels.toInt, 1)
}
private def numGroups:Int = if(param.getConvolutionParam.hasGroup) param.getConvolutionParam.getGroup else 1
/*
* Computes the forward pass for a 2D spatial transpose convolutional
* layer with F filters. The input data has N examples, each
* represented as a 3D tensor flattened into a single vector.
*
* Inputs:
* - X: Inputs, of shape (N, C*Hin*Win).
* - W: Weights, of shape (C, F*Hf*Wf).
* - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* (only for depthwise): - M: Depth of each filter (C must be divisible by M).
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* - padw: Padding for left and right sides.
* - out_padh: extra padding for top side. This should
* lie in [0, strideh-1].
* - out_padw: extra padding for right side. This should
* lie in [0, stridew-1].
*
* Outputs:
* - out: Outputs, of shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
*/
override def forward(dmlScript: StringBuilder,isPrediction: Boolean): Unit = {
if(isDepthWise)
invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id),
X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, "0", "0")
else
invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id),
X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, "0", "0")
}
/*
* Computes the backward pass for a 2D spatial transpose
* convolutional layer with F filters.
*
* Inputs:
* - dout: Gradient wrt `out` from upstream, of
* shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
* - X: Inputs, of shape (N, C*Hin*Win).
* - W: Weights, of shape (C, F*Hf*Wf).
* - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* (only for depthwise): - M: Depth of each filter (C must be divisible by M).
* - Hf: Filter height.
* - Wf: Filter width.
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
* - padw: Padding for left and right sides.
*
* Outputs:
* - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
* - dW: Gradient wrt `W`, of shape (C, F*Hf*Wf).
* - db: Gradient wrt `b`, of shape (F, 1).
*/
override def backward(dmlScript:StringBuilder, outSuffix:String) = {
if(isDepthWise)
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias),
dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
else
invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias),
dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
}
// if not depthwise n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
// else (N, C/M*Hout*Wout)
override def outputShape = if(isDepthWise) ( C_DivideBy_M().toString, Hout, Wout ) else ( numChannels, Hout, Wout )
// -------------------------------------------------
def numChannels = bottomLayerOutputShape._1
def Hin = bottomLayerOutputShape._2
def Win = bottomLayerOutputShape._3
// Hout = strideh * (Hin-1) - 2*padh + Hf + out_padh
def Hout:String = try {
(stride_h.toInt * (Hin.toInt-1) - 2*pad_h.toInt + kernel_h.toInt).toString()
}
catch {
case _:Throwable => stride_h + " * " + "(" + Hin + "-1) - 2*" + pad_h + " + " + kernel_h
}
// Wout = stridew * (Win-1) - 2*padw + Wf + out_padw
def Wout:String = try {
(stride_w.toInt * (Win.toInt-1) - 2*pad_w.toInt + kernel_w.toInt).toString()
}
catch {
case _:Throwable => stride_w + " * " + "(" + Win + "-1) - 2*" + pad_w + " + " + kernel_w
}
// -------------------------------------------------
def convParam = param.getConvolutionParam
// num_output (c_o): the number of filters
def numKernels = convParam.getNumOutput.toString
// kernel_size (or kernel_h and kernel_w): specifies height and width of each filter
def kernel_h = if(convParam.hasKernelH) convParam.getKernelH.toString
else if(convParam.getKernelSizeCount > 0) convParam.getKernelSize(0).toString
else throw new LanguageException("Incorrect kernel parameters")
def kernel_w = if(convParam.hasKernelW) convParam.getKernelW.toString
else if(convParam.getKernelSizeCount > 0) convParam.getKernelSize(0).toString
else throw new LanguageException("Incorrect kernel parameters")
// stride (or stride_h and stride_w) [default 1]: specifies the intervals at which to apply the filters to the input
def stride_h = if(convParam.hasStrideH) convParam.getStrideH.toString
else if(convParam.getStrideCount > 0) convParam.getStride(0).toString
else "1"
def stride_w = if(convParam.hasStrideW) convParam.getStrideW.toString
else if(convParam.getStrideCount > 0) convParam.getStride(0).toString
else "1"
// pad (or pad_h and pad_w) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
def pad_h = if(convParam.hasPadH) convParam.getPadH.toString
else if(convParam.getPadCount > 0) convParam.getPad(0).toString
else "0"
def pad_w = if(convParam.hasPadW) convParam.getPadW.toString
else if(convParam.getPadCount > 0) convParam.getPad(0).toString
else "0"
}