com.intel.analytics.bigdl.nn.Proposal.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigdl-SPARK_3.0 Show documentation
The newest version!
/*
 * Copyright 2016 The BigDL Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.transform.vision.image.util.BboxUtil
import com.intel.analytics.bigdl.utils.Table

/**
 * Outputs object detection proposals by applying estimated bounding-box
 * transformations to a set of regular boxes (called "anchors").
 * rois: holds R regions of interest, each is a 5-tuple
 * (n, x1, y1, x2, y2) specifying an image batch index n and a rectangle (x1, y1, x2, y2)
 * scores: holds scores for R regions of interest
 *
 */
@SerialVersionUID(5313615238114647805L)
class Proposal(preNmsTopNTest: Int, postNmsTopNTest: Int, val ratios: Array[Float],
  val scales: Array[Float], rpnPreNmsTopNTrain: Int, rpnPostNmsTopNTrain: Int)(
  implicit ev: TensorNumeric[Float]) extends AbstractModule[Table, Tensor[Float], Float] {

  private val anchorUtil: Anchor = Anchor(ratios, scales)
  @transient private var nms: Nms = _
  @transient private var bboxDeltas: Tensor[Float] = _
  @transient private var scores: Tensor[Float] = _
  @transient private var keep: Array[Int] = _
  @transient private var sortedScores: Tensor[Float] = _
  @transient private var sortedInds: Tensor[Float] = _
  @transient private var filteredProposals: Tensor[Float] = _
  // Proposal height and width both need to be greater than minSize (at orig image scale)
  private val minSize = 16

  private def init(): Unit = {
    if (nms == null) {
      nms = new Nms()
      bboxDeltas = Tensor[Float]
      scores = Tensor[Float]
      sortedScores = Tensor[Float]
      sortedInds = Tensor[Float]
      filteredProposals = Tensor[Float]
    }
  }

  /**
   * Algorithm:
   * for each (H, W) location i
   * generate A anchor boxes centered on cell i
   * apply predicted bbox deltas at cell i to each of the A anchors
   * clip predicted boxes to image
   * remove predicted boxes with either height or width < threshold
   * sort all (proposal, score) pairs by score from highest to lowest
   * take top pre_nms_topN proposals before NMS
   * apply NMS with threshold to remaining proposals
   * take after_nms_topN proposals after NMS
   * return the top proposals (-> RoIs top, scores top)
   * @param input input(1): cls scores
   * input(2): bbox pred
   * input(3): im_info
   * @return output
   * output(1): rpn_rois
   * output(2): rpn_scores
   */
  override def updateOutput(input: Table): Tensor[Float] = {
    val inputScore = input[Tensor[Float]](1)
    val imInfo = input[Tensor[Float]](3)
    require(inputScore.size(1) == 1 && imInfo.size(1) == 1, "currently only support single batch")
    init()
    // transpose from (1, 4A, H, W) to (H * W * A, 4)
    transposeAndReshape(input[Tensor[Float]](2), 4, bboxDeltas)

    // select scores for object (while the remaining is the score for background)
    // transpose from (1, 2A, H, W) to (H * W * A)
    val scoresOri = inputScore.narrow(2, anchorUtil.anchorNum + 1, anchorUtil.anchorNum)
    transposeAndReshape(scoresOri, 1, scores)


    // Generate proposals from bbox deltas and shifted anchors
    // Enumerate all shifts
    val anchors = anchorUtil.generateAnchors(inputScore.size(4), inputScore.size(3))
    // Convert anchors into proposals via bbox transformations
    val proposals = BboxUtil.bboxTransformInv(anchors, bboxDeltas)
    // clip predicted boxes to image
    // original faster rcnn way
    // minimum box width & height
    val minBoxH = minSize * imInfo.valueAt(1, 3)
    val minBoxW = minSize * imInfo.valueAt(1, 4)
    var keepN = BboxUtil.clipBoxes(proposals, imInfo.valueAt(1, 1), imInfo.valueAt(1, 2), minBoxH
      , minBoxW, scores)

    val preNmsTopN = if (isTraining()) rpnPreNmsTopNTrain else preNmsTopNTest
    val postNmsTopN = if (isTraining()) rpnPostNmsTopNTrain else postNmsTopNTest
    val topNum = Math.min(preNmsTopN, keepN)
    scores.topk(topNum, dim = 1, increase = false,
      result = sortedScores, indices = sortedInds)
    if (keep == null || keep.length < sortedInds.nElement()) {
      keep = new Array[Int](sortedInds.nElement())
    }
    var k = 1
    while (k <= sortedInds.nElement()) {
      keep(k - 1) = sortedInds.valueAt(k).toInt - 1
      k += 1
    }
    filteredProposals.resize(topNum, proposals.size(2))
    k = 1
    while (k <= topNum) {
      filteredProposals.update(k, proposals(keep(k - 1) + 1))
      k += 1
    }

    // apply nms (e.g. threshold = 0.7)
    // take after_nms_topN (e.g. 300)
    // return the top proposals (-> RoIs topN
    keepN = nms.nms(sortedScores, filteredProposals, 0.7f, keep, sorted = true)
    if (postNmsTopN > 0) {
      keepN = Math.min(keepN, postNmsTopN)
    }

    var i = 1
    var j = 2

    output.resize(keepN, filteredProposals.size(2) + 1)
    while (i <= keepN) {
      output.setValue(i, 1, 0)
      j = 2
      while (j <= output.size(2)) {
        output.setValue(i, j, filteredProposals.valueAt(keep(i - 1), j - 1))
        j += 1
      }
      i += 1
    }
    output
  }

  // Transpose and reshape predicted bbox transformations to get them
  // into the same order as the anchors:
  // bbox deltas will be (1, 4 * A, H, W) format
  // transpose to (1, H, W, 4 * A)
  // reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
  // in slowest to fastest order
  private def transposeAndReshape(mat: Tensor[Float], cols: Int,
    out: Tensor[Float]): Tensor[Float] = {
    if (cols == 1) {
      out.resize(mat.nElement())
    } else {
      out.resize(mat.nElement() / cols, cols)
    }
    val matArr = mat.storage().array()
    val matOffset = mat.storageOffset() - 1
    val st2 = mat.stride(2)
    val st3 = mat.stride(3)
    val outArr = out.storage().array()
    var outOffset = out.storageOffset() - 1
    var ind = 0
    var r = 0
    while (r < mat.size(3)) {
      var c = 0
      val offset3 = r * st3
      while (c < mat.size(4)) {
        var i = 0
        while (i < mat.size(2)) {
          var j = 0
          while (j < cols) {
            outArr(outOffset) = matArr(matOffset + (i + j) * st2 + offset3 + c)
            outOffset += 1
            j += 1
          }
          i += cols
          ind += 1
        }
        c += 1
      }
      r += 1
    }
    out
  }

  override def updateGradInput(input: Table, gradOutput: Tensor[Float]): Table = {
    gradInput = null
    gradInput
  }
}

object Proposal {
  def apply(preNmsTopN: Int, postNmsTopN: Int, ratios: Array[Float], scales: Array[Float],
    rpnPreNmsTopNTrain: Int = 12000, rpnPostNmsTopNTrain: Int = 2000)
    (implicit ev: TensorNumeric[Float]): Proposal
  = new Proposal(preNmsTopN, postNmsTopN, ratios, scales, rpnPreNmsTopNTrain, rpnPostNmsTopNTrain)
}