com.intel.analytics.bigdl.nn.VolumetricMaxPooling.scala Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.nn
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, TensorModule}
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.serializer._
import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter
import org.codehaus.jackson.map.DeserializationContext
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
import scala.reflect._
import scala.reflect.runtime.universe
/**
* Applies 3D max-pooling operation in kTxkWxkH regions by step size dTxdWxdH.
* The number of output features is equal to the number of input planes / dT.
* The input can optionally be padded with zeros. Padding should be smaller than
* half of kernel size. That is, padT < kT/2, padW < kW/2 and padH < kH/2
* @param kT The kernel size
* @param kW The kernel width
* @param kH The kernel height
* @param dT The step in the time dimension
* @param dW The step in the width dimension
* @param dH The step in the height dimension
* @param padT The padding in the time dimension
* @param padW The padding in the width dimension
* @param padH The padding in the height dimension
* @tparam T The numeric type in the criterion, usually which are [[Float]] or [[Double]]
*/
@SerialVersionUID(-4330398221120919890L)
class VolumetricMaxPooling[T: ClassTag](
val kT: Int, val kW: Int, val kH: Int,
val dT: Int, val dW: Int, val dH: Int,
val padT: Int = 0, val padW: Int = 0, val padH: Int = 0)
(implicit ev: TensorNumeric[T]) extends TensorModule[T] {
var ceilMode = false
private var indices = Tensor[Float]()
def this(kT: Int, kW: Int, kH: Int)(implicit ev: TensorNumeric[T]) {
this(kT, kW, kH, kT, kW, kH)
}
/**
* set ceil mode
* @return this
*/
def ceil(): VolumetricMaxPooling[T] = {
ceilMode = true
this
}
/**
* set floor mode
* @return this
*/
def floor(): VolumetricMaxPooling[T] = {
ceilMode = false
this
}
require(kT > 0 && kW > 0 && kH > 0,
s"kernel size should be greater than zero, but got kT: $kT kH: $kH kW: $kW")
require(dT > 0 && dW > 0 && dH > 0,
s"stride should be greater than zero, but got dT: $dT dH: $dH dW: $dW")
require(kT / 2 >= padT && kW / 2 >= padW && kH / 2 >= padH,
"pad should be smaller than half of kernel size, but got " +
s"kT: $kT kH: $kH kW: $kW, padT: $padT, padW: $padW, padH: $padH")
/**
* Computes the output using the current parameter set of the class and input. This function
* returns the result which is stored in the output field.
* @param input
* @return
*/
override def updateOutput(input: Tensor[T]): Tensor[T] = {
require(input.dim() == 4 || input.dim() == 5,
s"4D or 5D (batch mode) tensor expected for input, but got: ${ input.dim() }")
require(input.isContiguous(), "input is not contiguous")
val dimt = input.dim() - 2
val dimh = input.dim() - 1
val dimw = input.dim()
val nslices = input.size(input.dim() - 3)
val itime = input.size(dimt)
val iheight = input.size(dimh)
val iwidth = input.size(dimw)
var otime: Int = 0
var oheight: Int = 0
var owidth: Int = 0
if (ceilMode) {
otime = math.ceil(1.0 * (itime - kT + 2 * padT) / dT).toInt + 1
oheight = math.ceil(1.0 * (iheight - kH + 2 * padH) / dH).toInt + 1
owidth = math.ceil(1.0 * (iwidth - kW + 2 * padW) / dW).toInt + 1
}
else {
otime = math.floor(1.0 * (itime - kT + 2 * padT) / dT).toInt + 1
oheight = math.floor(1.0 * (iheight - kH + 2 * padH) / dH).toInt + 1
owidth = math.floor(1.0 * (iwidth - kW + 2 * padW) / dW).toInt + 1
}
if (padT != 0 || padW != 0 || padH != 0) {
// ensure that the last pooling starts inside the image
if ((otime - 1) * dT >= itime + padT) otime -= 1
if ((oheight - 1) * dH >= iheight + padH) oheight -= 1
if ((owidth - 1) * dW >= iwidth + padW) owidth -= 1
}
require(otime >= 1 && owidth >= 1 && oheight >= 1,
s"Given input size: (${ nslices }x${ itime }x${ iheight }x${ iwidth })." +
s" Calculated output size:" +
s" (${ nslices }x${ otime }x${ oheight }x${ owidth }). Output size is too small")
if (input.dim() == 4) {
// non-batch mode
output.resize(nslices, otime, oheight, owidth)
indices.resize(nslices, otime, oheight, owidth)
if (classTag[T] == classTag[Double]) {
volumetricMaxPoolingForwardDouble(
input.asInstanceOf[Tensor[Double]].storage().array(), input.storageOffset() - 1,
output.asInstanceOf[Tensor[Double]].storage().array(), output.storageOffset() - 1,
indices.storage().array(), indices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
kT, kW, kH, dT, dW, dH, padT, padW, padH)
} else if (classTag[T] == classTag[Float]) {
volumetricMaxPoolingForwardFloat(
input.asInstanceOf[Tensor[Float]].storage().array(), input.storageOffset() - 1,
output.asInstanceOf[Tensor[Float]].storage().array(), output.storageOffset() - 1,
indices.storage().array(), indices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
kT, kW, kH, dT, dW, dH, padT, padW, padH)
} else {
throw new IllegalArgumentException("currently only support type float or double")
}
} else {
// batch mode
val nBatch = input.size(1)
output.resize(nBatch, nslices, otime, oheight, owidth)
indices.resize(nBatch, nslices, otime, oheight, owidth)
var p = 0
if (classTag[T] == classTag[Double]) {
while (p < nBatch) {
val curInput = input(p + 1)
val curOutput = output(p + 1)
val curIndices = indices(p + 1)
volumetricMaxPoolingForwardDouble(
curInput.asInstanceOf[Tensor[Double]].storage().array(),
curInput.storageOffset() - 1,
curOutput.asInstanceOf[Tensor[Double]].storage().array(),
curOutput.storageOffset() - 1,
curIndices.storage().array(),
curIndices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
kT, kW, kH, dT, dW, dH, padT, padW, padH)
p += 1
}
} else if (classTag[T] == classTag[Float]) {
while (p < nBatch) {
val curInput = input(p + 1)
val curOutput = output(p + 1)
val curIndices = indices(p + 1)
volumetricMaxPoolingForwardFloat(
curInput.asInstanceOf[Tensor[Float]].storage().array(),
curInput.storageOffset() - 1,
curOutput.asInstanceOf[Tensor[Float]].storage().array(),
curOutput.storageOffset() - 1,
curIndices.storage().array(),
curIndices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
kT, kW, kH, dT, dW, dH, padT, padW, padH)
p += 1
}
} else {
throw new IllegalArgumentException("currently only support type float or double")
}
}
output
}
/**
* Computing the gradient of the module with respect to its own input. This is returned in
* gradInput. Also, the gradInput state variable is updated accordingly.
* @param input
* @param gradOutput
* @return
*/
override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
val dimn = input.dim() - 3
val dimt = input.dim() - 2
val dimh = input.dim() - 1
val dimw = input.dim()
val nslices = input.size(dimn)
val itime = input.size(dimt)
val iheight = input.size(dimh)
val iwidth = input.size(dimw)
val otime = gradOutput.size(dimt)
val oheight = gradOutput.size(dimh)
val owidth = gradOutput.size(dimw)
gradInput.resizeAs(input).zero()
require(gradOutput.isContiguous(), "gradOutput is not contiguous")
if (input.dim() == 4) {
// non-batch mode
if (classTag[T] == classTag[Double]) {
volumetricMaxPoolingBackwardDouble(
gradInput.asInstanceOf[Tensor[Double]].storage().array(), gradInput.storageOffset() - 1,
gradOutput.asInstanceOf[Tensor[Double]].storage().array(), gradOutput.storageOffset() - 1,
indices.storage().array(), indices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
dT, dW, dH, padT, padW, padH)
} else if (classTag[T] == classTag[Float]) {
volumetricMaxPoolingBackwardFloat(
gradInput.asInstanceOf[Tensor[Float]].storage().array(), gradInput.storageOffset() - 1,
gradOutput.asInstanceOf[Tensor[Float]].storage().array(), gradOutput.storageOffset() - 1,
indices.storage().array(), indices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
dT, dW, dH, padT, padW, padH)
} else {
throw new IllegalArgumentException("currently only support type float or double")
}
}
else {
// batch mode
val nBatch = input.size(1)
var p = 0
if (classTag[T] == classTag[Double]) {
while (p < nBatch) {
val curGradInput = gradInput(p + 1)
val curGradOutput = gradOutput(p + 1)
val curIndices = indices(p + 1)
volumetricMaxPoolingBackwardDouble(
curGradInput.asInstanceOf[Tensor[Double]].storage().array(),
curGradInput.storageOffset() - 1,
curGradOutput.asInstanceOf[Tensor[Double]].storage().array(),
curGradOutput.storageOffset() - 1,
curIndices.storage().array(), curIndices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
dT, dW, dH, padT, padW, padH)
p += 1
}
} else if (classTag[T] == classTag[Float]) {
while (p < nBatch) {
val curGradInput = gradInput(p + 1)
val curGradOutput = gradOutput(p + 1)
val curIndices = indices(p + 1)
volumetricMaxPoolingBackwardFloat(
curGradInput.asInstanceOf[Tensor[Float]].storage().array(),
curGradInput.storageOffset() - 1,
curGradOutput.asInstanceOf[Tensor[Float]].storage().array(),
curGradOutput.storageOffset() - 1,
curIndices.storage().array(), curIndices.storageOffset() - 1,
nslices, itime, iwidth, iheight, otime, owidth, oheight,
dT, dW, dH, padT, padW, padH)
p += 1
}
} else {
throw new IllegalArgumentException("currently only support type float or double")
}
}
gradInput
}
override def equals(obj: Any): Boolean = {
if (!super.equals(obj)) {
return false
}
if (!obj.isInstanceOf[VolumetricMaxPooling[T]]) {
return false
}
val other = obj.asInstanceOf[VolumetricMaxPooling[T]]
if (this.eq(other)) {
return true
}
kT == other.kT &&
kW == other.kW &&
kH == other.kH &&
dT == other.dT &&
dW == other.dW &&
dH == other.dH &&
padT == other.padT &&
padW == other.padW &&
padH == other.padH &&
ceilMode == other.ceilMode &&
indices == other.indices
}
override def hashCode(): Int = {
val seed = 37
var hash = super.hashCode()
hash = hash * seed + kT.hashCode()
hash = hash * seed + kW.hashCode()
hash = hash * seed + kH.hashCode()
hash = hash * seed + dT.hashCode()
hash = hash * seed + dW.hashCode()
hash = hash * seed + dH.hashCode()
hash = hash * seed + padT.hashCode()
hash = hash * seed + padW.hashCode()
hash = hash * seed + padH.hashCode()
hash = hash * seed + ceilMode.hashCode()
hash = hash * seed + indices.hashCode()
hash
}
override def toString(): String = {
s"${getPrintName}($kT, $kW, $kH, $dT, $dW, $dH, $padT, $padW, $padH)"
}
override def clearState(): this.type = {
super.clearState()
indices.set()
this
}
private def volumetricMaxPoolingForwardDouble(input: Array[Double], inputOffset: Int,
output: Array[Double], outputOffset: Int,
indices: Array[Float], indicesOffset: Int,
nSlices: Int, iTime: Int, iWidth: Int, iHeight: Int, oTime: Int, oWidth: Int, oHeight: Int,
kT: Int, kW: Int, kH: Int, dT: Int, dW: Int, dH: Int, padT: Int, padW: Int, padH: Int): Unit = {
var k = 0
while (k < nSlices) {
var ti = 0
while (ti < oTime) {
var i = 0
while (i < oHeight) {
var j = 0
while (j < oWidth) {
var tstart = ti * dT - padT
var hstart = i * dH - padH
var wstart = j * dW - padW
val kernelT = math.min(tstart + kT, kT)
val kernelH = math.min(hstart + kH, kH)
val kernelW = math.min(wstart + kW, kW)
tstart = math.max(tstart, 0)
hstart = math.max(hstart, 0)
wstart = math.max(wstart, 0)
val inputStart = inputOffset + k * iTime * iWidth * iHeight +
tstart * iWidth * iHeight + hstart * iWidth + wstart
var maxindex = 0 // default is 0
var maxval = Double.MinValue
var mx = 0
var my = 0
var mz = 0
var z = 0
while (z < kernelT) {
var y = 0
while (y < kernelH) {
var x = 0
while (x < kernelW) {
if ((tstart + z < iTime) && (hstart + y < iHeight) && (wstart + x < iWidth)) {
// k, z, y, x input indexers
val value = input(z * iWidth * iHeight + y * iWidth + x + inputStart)
if (value > maxval) {
maxval = value
// Store indices w.r.t the kernel dimension
mz = z + kT - kernelT
my = y + kH - kernelH
mx = x + kW - kernelW
}
}
x += 1
}
y += 1
}
z += 1
}
output(outputOffset + k * oTime * oWidth * oHeight
+ ti * oWidth * oHeight + i * oWidth + j) = maxval
maxindex += ((mz & 0xff) << 24)
maxindex += ((my & 0xff) << 16)
maxindex += ((mx & 0xff) << 8)
indices(indicesOffset + k * oTime * oWidth * oHeight
+ ti * oWidth * oHeight + i * oWidth + j) = maxindex
j += 1
}
i += 1
}
ti += 1
}
k += 1
}
}
private def volumetricMaxPoolingForwardFloat(input: Array[Float], inputOffset: Int,
output: Array[Float], outputOffset: Int,
indices: Array[Float], indicesOffset: Int,
nSlices: Int, iTime: Int, iWidth: Int, iHeight: Int, oTime: Int, oWidth: Int, oHeight: Int,
kT: Int, kW: Int, kH: Int, dT: Int, dW: Int, dH: Int, padT: Int, padW: Int, padH: Int): Unit = {
var k = 0
while (k < nSlices) {
var ti = 0
while (ti < oTime) {
var i = 0
while (i < oHeight) {
var j = 0
while (j < oWidth) {
var tstart = ti * dT - padT
var hstart = i * dH - padH
var wstart = j * dW - padW
val kernelT = math.min(tstart + kT, kT)
val kernelH = math.min(hstart + kH, kH)
val kernelW = math.min(wstart + kW, kW)
tstart = math.max(tstart, 0)
hstart = math.max(hstart, 0)
wstart = math.max(wstart, 0)
val inputStart = inputOffset + k * iTime * iWidth * iHeight +
tstart * iWidth * iHeight + hstart * iWidth + wstart
var maxindex = 0 // default is 0
var maxval = Float.MinValue
var mx = 0
var my = 0
var mz = 0
var z = 0
while (z < kernelT) {
var y = 0
while (y < kernelH) {
var x = 0
while (x < kernelW) {
if ((tstart + z < iTime) && (hstart + y < iHeight) && (wstart + x < iWidth)) {
// k, z, y, x input indexers
val value = input(z * iWidth * iHeight + y * iWidth + x + inputStart)
if (value > maxval) {
maxval = value
// Store indices w.r.t the kernel dimension
mz = z + kT - kernelT
my = y + kH - kernelH
mx = x + kW - kernelW
}
}
x += 1
}
y += 1
}
z += 1
}
output(outputOffset + k * oTime * oWidth * oHeight
+ ti * oWidth * oHeight + i * oWidth + j) = maxval
maxindex += ((mz & 0xff) << 24)
maxindex += ((my & 0xff) << 16)
maxindex += ((mx & 0xff) << 8)
indices(indicesOffset + k * oTime * oWidth * oHeight
+ ti * oWidth * oHeight + i * oWidth + j) = maxindex
j += 1
}
i += 1
}
ti += 1
}
k += 1
}
}
private def volumetricMaxPoolingBackwardDouble(gradInput: Array[Double], gradInputOffset: Int,
gradOutput: Array[Double], gradOutputOffset: Int,
indices: Array[Float], indicesOffset: Int,
nslices: Int, itime: Int, iwidth: Int, iheight: Int,
otime: Int, owidth: Int, oheight: Int,
dT: Int, dW: Int, dH: Int, padT: Int, padW: Int, padH: Int): Unit = {
var k = 0
while (k < nslices) {
val gradInputK = gradInputOffset + k * itime * iwidth * iheight
val gradOutputK = gradOutputOffset + k * otime * owidth * oheight
val indicesK = indicesOffset + k * otime * owidth * oheight
var ti = 0
while (ti < otime) {
var i = 0
while (i < oheight) {
var j = 0
while (j < owidth) {
val maxIndex = indices(indicesK + ti * oheight * owidth + i * owidth + j).toInt
val maxti = ((maxIndex >> 24) & 0xff) + ti * dT - padT
val maxi = ((maxIndex >> 16) & 0xff) + i * dH - padH
val maxj = ((maxIndex >> 8) & 0xff) + j * dW - padW
gradInput(maxti * iheight * iwidth + maxi * iwidth + maxj + gradInputK) +=
gradOutput(ti * oheight * owidth + i * owidth + j + gradOutputK)
j += 1
}
i += 1
}
ti += 1
}
k += 1
}
}
private def volumetricMaxPoolingBackwardFloat(gradInput: Array[Float], gradInputOffset: Int,
gradOutput: Array[Float], gradOutputOffset: Int,
indices: Array[Float], indicesOffset: Int,
nslices: Int, itime: Int, iwidth: Int, iheight: Int,
otime: Int, owidth: Int, oheight: Int,
dT: Int, dW: Int, dH: Int, padT: Int, padW: Int, padH: Int): Unit = {
var k = 0
while (k < nslices) {
val gradInputK = gradInputOffset + k * itime * iwidth * iheight
val gradOutputK = gradOutputOffset + k * otime * owidth * oheight
val indicesK = indicesOffset + k * otime * owidth * oheight
var ti = 0
while (ti < otime) {
var i = 0
while (i < oheight) {
var j = 0
while (j < owidth) {
val maxIndex = indices(indicesK + ti * oheight * owidth + i * owidth + j).toInt
val maxti = ((maxIndex >> 24) & 0xff) + ti * dT - padT
val maxi = ((maxIndex >> 16) & 0xff) + i * dH - padH
val maxj = ((maxIndex >> 8) & 0xff) + j * dW - padW
gradInput(maxti * iheight * iwidth + maxi * iwidth + maxj + gradInputK) +=
gradOutput(ti * oheight * owidth + i * owidth + j + gradOutputK)
j += 1
}
i += 1
}
ti += 1
}
k += 1
}
}
}
object VolumetricMaxPooling extends ModuleSerializable {
def apply[@specialized(Float, Double) T: ClassTag]
(kT: Int, kW: Int, kH: Int, dT: Int, dW: Int, dH: Int,
padT: Int = 0, padW: Int = 0, padH: Int = 0)(implicit ev: TensorNumeric[T])
: VolumetricMaxPooling[T] = new VolumetricMaxPooling[T](kT, kW, kH, dT, dW, dH, padT, padW, padH)
def apply[@specialized(Float, Double) T: ClassTag]
(kT: Int, kW: Int, kH: Int)(implicit ev: TensorNumeric[T])
: VolumetricMaxPooling[T] = new VolumetricMaxPooling[T](kT, kW, kH)
override def doLoadModule[T: ClassTag](context : DeserializeContext)
(implicit ev: TensorNumeric[T]) : AbstractModule[Activity, Activity, T] = {
val maxPooling = super.doLoadModule(context).asInstanceOf[VolumetricMaxPooling[T]]
val attrMap = context.bigdlModule.getAttrMap
maxPooling.ceilMode = DataConverter.getAttributeValue(context,
attrMap.get("ceilMode")).asInstanceOf[Boolean]
maxPooling.indices = DataConverter.getAttributeValue(context,
attrMap.get("indices")).asInstanceOf[Tensor[Float]]
maxPooling
}
override def doSerializeModule[T: ClassTag](context: SerializeContext[T],
volumetricMaxBuilder : BigDLModule.Builder)
(implicit ev: TensorNumeric[T]) : Unit = {
val maxPooling = context.moduleData.module.asInstanceOf[VolumetricMaxPooling[T]]
super.doSerializeModule(context, volumetricMaxBuilder)
val ceilModeBuilder = AttrValue.newBuilder
DataConverter.setAttributeValue(context, ceilModeBuilder,
maxPooling.ceilMode, universe.typeOf[Boolean])
volumetricMaxBuilder.putAttr("ceilMode", ceilModeBuilder.build)
val indicesBuilder = AttrValue.newBuilder
DataConverter.setAttributeValue(context,
indicesBuilder, maxPooling.indices, ModuleSerializer.tensorType)
volumetricMaxBuilder.putAttr("indices", indicesBuilder.build)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy