com.intel.analytics.bigdl.nn.SpatialDilatedConvolution.scala Maven / Gradle / Ivy

/*
 * Copyright 2016 The BigDL Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.Module
import com.intel.analytics.bigdl.nn.abstractnn.{Initializable, TensorModule}
import com.intel.analytics.bigdl.optim.Regularizer
import com.intel.analytics.bigdl.tensor.{DenseTensorBLAS, DoubleType, FloatType, Tensor}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.RandomGenerator._
import com.intel.analytics.bigdl.utils.{Shape, T, Table}

import scala.reflect.ClassTag

/**
 * Apply a 2D dilated convolution over an input image.
 *
 * The input tensor is expected to be a 3D or 4D(with batch) tensor.
 *
 * If input is a 3D tensor nInputPlane x height x width,
 * owidth  = floor(width + 2 * padW - dilationW * (kW-1) - 1) / dW + 1
 * oheight = floor(height + 2 * padH - dilationH * (kH-1) - 1) / dH + 1
 *
 * Reference Paper: Yu F, Koltun V. Multi-scale context aggregation by dilated convolutions[J].
 * arXiv preprint arXiv:1511.07122, 2015.
 *
 * @param nInputPlane The number of expected input planes in the image given into forward().
 * @param nOutputPlane The number of output planes the convolution layer will produce.
 * @param kW The kernel width of the convolution.
 * @param kH The kernel height of the convolution.
 * @param dW The step of the convolution in the width dimension. Default is 1.
 * @param dH The step of the convolution in the height dimension. Default is 1.
 * @param padW The additional zeros added per width to the input planes. Default is 0.
 * @param padH The additional zeros added per height to the input planes. Default is 0.
 * @param dilationW The number of pixels to skip. Default is 1.
 * @param dilationH The number of pixels to skip. Default is 1.
 * @param wRegularizer: instance of [[Regularizer]]
 *                    (eg. L1 or L2 regularization), applied to the input weights matrices.
 * @param bRegularizer: instance of [[Regularizer]]
 *                    applied to the bias.
 */

@SerialVersionUID(- 933818099759912492L)
class SpatialDilatedConvolution[T: ClassTag](
  val nInputPlane: Int,
  val nOutputPlane: Int,
  val kW: Int,
  val kH: Int,
  val dW: Int = 1,
  val dH: Int = 1,
  val padW: Int = 0,
  val padH: Int = 0,
  val dilationW: Int = 1,
  val dilationH: Int = 1,
  var wRegularizer: Regularizer[T] = null,
  var bRegularizer: Regularizer[T] = null
)(implicit ev: TensorNumeric[T]) extends TensorModule[T] with Initializable {

  val weight: Tensor[T] = Tensor[T](nOutputPlane, nInputPlane, kH, kW)
  val gradWeight = Tensor[T](nOutputPlane, nInputPlane, kH, kW)
  val gradBias = Tensor[T](nOutputPlane)

  val bias: Tensor[T] = Tensor[T](nOutputPlane)
  @transient private var fInput: Tensor[T] = null
  @transient private var fGradInput: Tensor[T] = null

  {
    val stdv = 1.0 / math.sqrt(kW * kH * nInputPlane)
    val wInit = RandomUniform(-stdv, stdv)
    val bInit = RandomUniform(-stdv, stdv)

    setInitMethod(wInit, bInit)
  }

  private var im2colTime = 0L
  private var col2imTime = 0L

  def getIm2ColTime(): Double = im2colTime

  def getCol2ImgTime(): Double = col2imTime

  override def reset(): Unit = {
    weightInitMethod.init(weight, VariableFormat.OUT_IN_KW_KH)
    biasInitMethod.init(bias, VariableFormat.ONE_D)
    zeroGradParameters()
  }

  private def shapeCheck(
    input: Tensor[T], gradOutput: Tensor[T],
    weight: Tensor[T], bias: Tensor[T],
    kH: Int, kW: Int, dH: Int, dW: Int, padH: Int, padW: Int,
    dilationH: Int, dilationW: Int) {

    require(weight.nDimension == 4,
      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " +
        s"but got: ${weight.nDimension()}")
    require(kW > 0 && kH > 0,
      s"kernel size should be greater than zero, but got kH: $kH kW: $kW")
    require(dW > 0 && dH > 0,
      s"stride should be greater than zero, but got dH: $dH dW: $dW")
    require(weight.nDimension == 2 || weight.nDimension == 4,
      s"2D or 4D weight tensor expected, but got: ${weight.nDimension()}")

    if (null != bias) {
      require(bias.nDimension() == 1 && bias.size(1) == weight.size(1))
    }

    val nDim = input.nDimension
    val dimF = if (nDim == 4) 2 else 1
    val dimH = if (nDim == 4) 3 else 2
    val dimW = if (nDim == 4) 4 else 3

    require(nDim == 3 || nDim == 4,
      "SpatialDilatedConvolution: " + ErrorInfo.constrainInputAs3DOrBatch)

    val inputHeight = input.size(dimH)
    val inputWidth = input.size(dimW)
    val outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1
    val outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1

    require(outputWidth >= 1 || outputHeight >= 1,
      s"Given input size: ($nInputPlane x $inputHeight x $inputWidth)" +
        s"Calculated output size: ($nOutputPlane x $outputHeight x $outputWidth). " +
        s"Output size is too small")

    require(input.dim() == nDim && input.size(dimF) == nInputPlane)

    if (null != gradOutput) {
      require(gradOutput.nDimension() == nDim &&
        gradOutput.size(dimF) == nOutputPlane &&
        gradOutput.size(dimH) == outputHeight &&
        gradOutput.size(dimW) == outputWidth
      )
    }
  }

  override def computeOutputShape(inputShape: Shape): Shape = {
    val input = inputShape.toSingle().toArray
    require(input.length == 4,
      s"AtrousConvolution2D requires 4D input, but got input dim ${input.length}")
    val outputWidth = (input(3) + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1
    val outputHeight = (input(2) + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1
    Shape(input(0), nOutputPlane, outputHeight, outputWidth)
  }

  override def updateOutput(input: Tensor[T]): Tensor[T] = {
    shapeCheck(input, null, weight, bias,
      kH, kW, dH, dW, padH, padW, dilationH, dilationW)
    require(input.isContiguous())

    val isBatch = if (input.nDimension() == 3) {
      // Force batch
      input.resize(1, input.size(1), input.size(2), input.size(3))
      false
    } else {
      true
    }

    val inputWidth = input.size(4)
    val inputHeight = input.size(3)
    val outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1
    val outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1

    // Batch size + input planes
    val batchSize = input.size(1)

    // Resize output
    output.resize(batchSize, nOutputPlane, outputHeight, outputWidth)
    output.zero()

    if (null == fInput) {
      fInput = Tensor[T]()
    }
    // Resize temporary columns
    val columns = fInput
    columns.resize(nInputPlane*kW*kH, outputHeight*outputWidth)

    if (null == fGradInput) {
      fGradInput = Tensor[T]()
    }
    // Define a buffer of ones, for bias accumulation
    val ones = fGradInput
    if (ones.nDimension != 2 || ones.size(1)*ones.size(2) < outputHeight*outputWidth) {
      // Resize plane and fill with ones...
      ones.resize(outputHeight, outputWidth)
      ones.fill(ev.fromType[Int](1))
    }

    // For each element in batch, do:
    var elt = 1
    while (elt <= batchSize) {
      // Matrix multiply per output:
      val input_n = input.select(1, elt)
      val output_n = output.select(1, elt)

      // Do Bias first:
      // M,N,K are dims of matrix A and B
      var m = nOutputPlane
      var n = outputHeight * outputWidth
      var k = 1

      // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
      if (null != bias) {
        DenseTensorBLAS.gemm[T](
          't', 'n',
          n, m, k,
          ev.fromType[Int](1),
          ones.storage().array(), ones.storageOffset() - 1, k,
          bias.storage().array(), bias.storageOffset() - 1, k,
          ev.fromType[Int](0),
          output_n.storage().array(), output_n.storageOffset() - 1, n
        )
      } else {
        output_n.zero()
      }

      // Extract columns:
      val before = System.nanoTime()
      ev.getType() match {
        case DoubleType => NNPrimitive.im2colWithDilationDouble(
          input_n.asInstanceOf[Tensor[Double]], columns.asInstanceOf[Tensor[Double]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case FloatType => NNPrimitive.im2colWithDilationFloat(
          input_n.asInstanceOf[Tensor[Float]], columns.asInstanceOf[Tensor[Float]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case t => throw new NotImplementedError(s"$t is not supported")
      }
      im2colTime += System.nanoTime() - before

      // M,N,K are dims of matrix A and B
      m = nOutputPlane
      n = columns.size(2)
      k = nInputPlane*kH*kW

      // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
      DenseTensorBLAS.gemm[T](
        'n', 'n',
        n, m, k,
        ev.fromType[Int](1),
        columns.storage().array(), columns.storageOffset() - 1, n,
        weight.storage().array(), weight.storageOffset() - 1, k,
        ev.fromType[Int](1),
        output_n.storage().array(), output_n.storageOffset() - 1, n
      )
      elt += 1
    }

    // Resize output
    if (!isBatch) {
      output.resize(nOutputPlane, outputHeight, outputWidth)
      input.resize(nInputPlane, inputHeight, inputWidth)
    }
    output
  }

  override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
    shapeCheck(input, gradOutput, weight, null,
      kH, kW, dH, dW, padH, padW, dilationH, dilationW)

    val isBatch = if (input.nDimension() == 3) {
      // Force batch
      input.resize(1, input.size(1), input.size(2), input.size(3))
      gradOutput.resize(1, gradOutput.size(1), gradOutput.size(2), gradOutput.size(3))
      false
    } else {
      true
    }

    val inputWidth = input.size(4)
    val inputHeight = input.size(3)
    val outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1
    val outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1

    // Batch size + input planes
    val batchSize = input.size(1)

    // Resize output
    gradInput.resize(batchSize, nInputPlane, inputHeight, inputWidth).zero()

    // Resize temporary columns
    val gradColumns = fInput
    gradColumns.resize(nInputPlane*kW*kH, outputHeight*outputWidth);
    gradColumns.zero()

    // For each element in batch, do:
    var elt = 1
    while (elt <= batchSize) {
      // Matrix multiply per sample:
      val gradInput_n = gradInput.select(1, elt)
      val gradOutput_n = gradOutput.select(1, elt)

      // M,N,K are dims of matrix A and B
      val m = nInputPlane*kW*kH
      val n = gradColumns.size(2)
      val k = nOutputPlane

      // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
      DenseTensorBLAS.gemm[T](
        'n', 't',
        n, m, k,
        ev.fromType[Int](1),
        gradOutput_n.storage().array(), gradOutput_n.storageOffset() - 1, n,
        weight.storage().array(), weight.storageOffset() - 1, m,
        ev.fromType[Int](0),
        gradColumns.storage().array(), gradColumns.storageOffset() - 1, n
      )

      // Unpack columns back into input:
      val before = System.nanoTime()
      ev.getType() match {
        case DoubleType => NNPrimitive.col2imWithDilationDouble(
          gradColumns.asInstanceOf[Tensor[Double]], gradInput_n.asInstanceOf[Tensor[Double]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case FloatType => NNPrimitive.col2imWithDilationFloat(
          gradColumns.asInstanceOf[Tensor[Float]], gradInput_n.asInstanceOf[Tensor[Float]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case t => throw new NotImplementedError(s"$t is not supported")
      }
      col2imTime += System.nanoTime() - before
      elt += 1
    }

    // Resize output
    if (!isBatch) {
      gradOutput.resize(nOutputPlane, outputHeight, outputWidth)
      input.resize(nInputPlane, inputHeight, inputWidth)
      gradInput.resize(nInputPlane, inputHeight, inputWidth)
    }

    gradInput
  }

  override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T]): Unit = {
    shapeCheck(input, gradOutput, gradWeight, gradBias,
      kH, kW, dH, dW, padH, padW, dilationH, dilationW)

    val isBatch = if (input.nDimension() == 3) {
      // Force batch
      input.resize(1, input.size(1), input.size(2), input.size(3))
      gradOutput.resize(1, gradOutput.size(1), gradOutput.size(2), gradOutput.size(3))
      false
    } else {
      true
    }

    val inputWidth = input.size(4)
    val inputHeight = input.size(3)
    val outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1
    val outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1

    // Batch size + input planes
    val batchSize = input.size(1)

    // Define a buffer of ones, for bias accumulation
    val ones = fGradInput
    if (ones.nDimension != 2 || ones.size(1)*ones.size(2) < outputHeight*outputWidth) {
      // Resize plane and fill with ones...
      ones.resize(outputHeight, outputWidth)
      ones.fill(ev.fromType[Int](1))
    }

    // Resize temporary columns
    val columns = fInput
    columns.resize(nInputPlane*kW*kH, outputHeight*outputWidth)

    // For each element in batch, do:
    var elt = 1
    while (elt <= batchSize) {
      // Matrix multiply per output:
      val input_n = input.select(1, elt)
      val gradOutput_n = gradOutput.select(1, elt)

      // Extract columns:
      val before = System.nanoTime()
      ev.getType() match {
        case DoubleType => NNPrimitive.im2colWithDilationDouble(
          input_n.asInstanceOf[Tensor[Double]], columns.asInstanceOf[Tensor[Double]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case FloatType => NNPrimitive.im2colWithDilationFloat(
          input_n.asInstanceOf[Tensor[Float]], columns.asInstanceOf[Tensor[Float]],
          nInputPlane, inputHeight, inputWidth,
          kH, kW,
          padH, padW,
          dH, dW,
          dilationH, dilationW
        )
        case t => throw new NotImplementedError(s"$t is not supported")
      }
      im2colTime += System.nanoTime() - before

      // M,N,K are dims of matrix A and B
      var m = nOutputPlane
      val n = nInputPlane*kW*kH
      var k = columns.size(2)

      if (scaleW != 0) {
        // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
        DenseTensorBLAS.gemm[T](
          't', 'n',
          n, m, k,
          ev.fromType[Double](scaleW),
          columns.storage().array(), columns.storageOffset() - 1, k,
          gradOutput_n.storage().array(), gradOutput_n.storageOffset() - 1, k,
          ev.fromType[Int](1),
          gradWeight.storage().array(), gradWeight.storageOffset() - 1, n
        )
      }


      // Do Bias:
      // M,N,K are dims of matrix A and B
      m = nOutputPlane
      k = outputHeight * outputWidth

      // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
      if (null != gradBias && scaleB != 0) {
        ev.gemv(
          't',
          k, m,
          ev.fromType[Double](scaleB),
          gradOutput_n.storage().array(), gradOutput_n.storageOffset() - 1, k,
          ones.storage().array(), ones.storageOffset() - 1, 1,
          ev.fromType[Int](1),
          gradBias.storage().array(), gradBias.storageOffset() - 1, 1
        )
      }
      elt += 1
    }

    // Resize
    if (!isBatch) {
      gradOutput.resize(nOutputPlane, outputHeight, outputWidth)
      input.resize(nInputPlane, inputHeight, inputWidth)
    }

    if (null != wRegularizer) {
      wRegularizer.accRegularization(weight, gradWeight, scaleW)
    }
    if (null != bRegularizer) {
      bRegularizer.accRegularization(bias, gradBias, scaleB)
    }
  }

  override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = {
    (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias))
  }

  override def equals(obj: Any): Boolean = {

    if (!super.equals(obj)) {
      return false
    }

    if (!obj.isInstanceOf[SpatialDilatedConvolution[T]]) {
      return false
    }
    val other = obj.asInstanceOf[SpatialDilatedConvolution[T]]
    if (this.eq(other)) {
      return true
    }

    nInputPlane == other.nInputPlane &&
      nOutputPlane == other.nOutputPlane &&
      kW == other.kW &&
      kH == other.kH &&
      dW == other.dW &&
      dH == other.dH &&
      padW == other.padW &&
      padH == other.padH &&
      dilationW == other.dilationW &&
      dilationH == other.dilationH &&
      weight == other.weight &&
      bias == other.bias &&
      gradWeight == other.gradWeight &&
      gradBias == other.gradBias
  }

  override def hashCode() : Int = {
    val seed = 37
    var hash = super.hashCode()
    hash = hash * seed + nInputPlane.hashCode()
    hash = hash * seed + nOutputPlane.hashCode()
    hash = hash * seed + kW.hashCode()
    hash = hash * seed + kH.hashCode()
    hash = hash * seed + dW.hashCode()
    hash = hash * seed + dH.hashCode()
    hash = hash * seed + padW.hashCode()
    hash = hash * seed + padH.hashCode()
    hash = hash * seed + dilationW.hashCode()
    hash = hash * seed + dilationH.hashCode()
    hash = hash * seed + weight.hashCode()
    hash = hash * seed + bias.hashCode()
    hash = hash * seed + gradWeight.hashCode()
    hash = hash * seed + gradBias.hashCode()

    hash
  }

  override def toString(): String = {
    s"${getPrintName}($nInputPlane -> $nOutputPlane, " +
      s"$kW x $kH, $dW, $dH, $padW, $padH, $dilationH, $dilationW)"
  }
}

object SpatialDilatedConvolution extends quantized.Quantizable {
  def apply[@specialized(Float, Double) T: ClassTag](
      nInputPlane: Int,
      nOutputPlane: Int,
      kW: Int,
      kH: Int,
      dW: Int = 1,
      dH: Int = 1,
      padW: Int = 0,
      padH: Int = 0,
      dilationW: Int = 1,
      dilationH: Int = 1,
      wRegularizer: Regularizer[T] = null,
      bRegularizer: Regularizer[T] = null
  )(implicit ev: TensorNumeric[T]) : SpatialDilatedConvolution[T] = {
    new SpatialDilatedConvolution[T](nInputPlane, nOutputPlane, kW, kH, dW, dH,
      padW, padH, dilationW, dilationH,
      wRegularizer, bRegularizer)
  }
  def quantize[T: ClassTag](module: Module[T])(implicit ev: TensorNumeric[T]): Module[T] = {
    val conv = module.asInstanceOf[SpatialDilatedConvolution[T]]
    quantized.SpatialDilatedConvolution[T](
      conv.nInputPlane, conv.nOutputPlane, conv.kW, conv.kH, conv.dW,
      conv.dH, conv.padW, conv.padH, conv.dilationW, conv.dilationW, initWeight = conv.weight,
      initBias = conv.bias).setName(conv.getName())
  }
}