Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.intel.analytics.bigdl.transform.vision.image.util.BboxUtil.scala Maven / Gradle / Ivy
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.transform.vision.image.util
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import org.apache.log4j.Logger
object BboxUtil {
val logger = Logger.getLogger(getClass)
def decodeRois(output: Tensor[Float]): Tensor[Float] = {
// ignore if decoded
if (output.nElement() < 6 || output.dim() == 2) return output
val num = output.valueAt(1).toInt
require(num >= 0)
if (num == 0) {
Tensor[Float]()
} else {
output.narrow(1, 2, num * 6).view(num, 6)
}
}
// inplace scale
def scaleBBox(bboxes: Tensor[Float], height: Float, width: Float): Unit = {
if (bboxes.nElement() == 0) return
bboxes.select(2, 1).mul(width)
bboxes.select(2, 2).mul(height)
bboxes.select(2, 3).mul(width)
bboxes.select(2, 4).mul(height)
}
/**
* Note that the output are stored in input deltas
* @param boxes (N, 4)
* @param deltas (N, 4a)
* @return
*/
def bboxTransformInv(boxes: Tensor[Float], deltas: Tensor[Float],
normalized: Boolean = false): Tensor[Float] = {
if (boxes.size(1) == 0) {
return boxes
}
val output = Tensor[Float]().resizeAs(deltas).copy(deltas)
require(boxes.size(2) == 4,
s"boxes size ${boxes.size().mkString(",")} do not satisfy N*4 size")
require(output.size(2) % 4 == 0,
s"and deltas size ${output.size().mkString(",")} do not satisfy N*4a size")
val boxesArr = boxes.storage().array()
var offset = boxes.storageOffset() - 1
val rowLength = boxes.stride(1)
val deltasArr = output.storage().array()
var i = 0
val repeat = output.size(2) / boxes.size(2)
var deltasoffset = output.storageOffset() - 1
while (i < boxes.size(1)) {
val x1 = boxesArr(offset)
val y1 = boxesArr(offset + 1)
val width = if (!normalized) boxesArr(offset + 2) - x1 + 1 else boxesArr(offset + 2) - x1
val height = if (!normalized) boxesArr(offset + 3) - y1 + 1 else boxesArr(offset + 3) - y1
var j = 0
while (j < repeat) {
j += 1
// dx1*width + centerX
val predCtrX = deltasArr(deltasoffset) * width + x1 + width / 2
// dy1*height + centerY
val predCtrY = deltasArr(deltasoffset + 1) * height + y1 + height / 2
// exp(dx2)*width/2
val predW = Math.exp(deltasArr(deltasoffset + 2)).toFloat * width / 2
// exp(dy2)*height/2
val predH = Math.exp(deltasArr(deltasoffset + 3)).toFloat * height / 2
deltasArr(deltasoffset) = predCtrX - predW
deltasArr(deltasoffset + 1) = predCtrY - predH
deltasArr(deltasoffset + 2) = predCtrX + predW
deltasArr(deltasoffset + 3) = predCtrY + predH
deltasoffset += rowLength
}
offset += rowLength
i += 1
}
output
}
/**
* Clip boxes to image boundaries.
* set the score of all boxes with any side smaller than minSize to 0
* @param boxes N * 4a
* @param height height of image
* @param width width of image
* @param minH min height limit
* @param minW min width limit
* @param scores scores for boxes
* @return the number of boxes kept (score > 0)
*/
def clipBoxes(boxes: Tensor[Float], height: Float, width: Float, minH: Float = 0,
minW: Float = 0, scores: Tensor[Float] = null): Int = {
require(boxes.size(2) % 4 == 0, "boxes should have the shape N*4a")
val boxesArr = boxes.storage().array()
var offset = boxes.storageOffset() - 1
val scoresArr = if (scores != null) scores.storage().array() else null
var scoreOffset = if (scores != null) scores.storageOffset() - 1 else -1
var i = 0
var count = 0
val h = height - 1
val w = width - 1
val repeat = boxes.size(2) / 4
while (i < boxes.size(1)) {
var r = 0
while (r < repeat) {
boxesArr(offset) = Math.max(Math.min(boxesArr(offset), w), 0)
boxesArr(offset + 1) = Math.max(Math.min(boxesArr(offset + 1), h), 0)
boxesArr(offset + 2) = Math.max(Math.min(boxesArr(offset + 2), w), 0)
boxesArr(offset + 3) = Math.max(Math.min(boxesArr(offset + 3), h), 0)
if (scores != null) {
val width = boxesArr(offset + 2) - boxesArr(offset) + 1
if (width < minW) {
scoresArr(scoreOffset) = 0
} else {
val height = boxesArr(offset + 3) - boxesArr(offset + 1) + 1
if (height < minH) scoresArr(scoreOffset) = 0
else count += 1
}
scoreOffset += 1
}
r += 1
offset += 4
}
i += 1
}
count
}
def getLocPredictions(loc: Tensor[Float], numPredsPerClass: Int, numClasses: Int,
shareLocation: Boolean, locPredsBuf: Array[Array[Tensor[Float]]] = null)
: Array[Array[Tensor[Float]]] = {
// the outer array is the batch, each img contains an array of results, grouped by class
val locPreds = if (locPredsBuf == null) {
val out = new Array[Array[Tensor[Float]]](loc.size(1))
var i = 0
while (i < loc.size(1)) {
out(i) = new Array[Tensor[Float]](numClasses)
var c = 0
while (c < numClasses) {
out(i)(c) = Tensor[Float](numPredsPerClass, 4)
c += 1
}
i += 1
}
out
} else {
locPredsBuf
}
var i = 0
val locData = loc.storage().array()
var locDataOffset = loc.storageOffset() - 1
while (i < loc.size(1)) {
val labelBbox = locPreds(i)
var p = 0
while (p < numPredsPerClass) {
val startInd = p * numClasses * 4 + locDataOffset
var c = 0
while (c < numClasses) {
val label = if (shareLocation) labelBbox.length - 1 else c
val boxData = labelBbox(label).storage().array()
val boxOffset = p * 4 + labelBbox(label).storageOffset() - 1
val offset = startInd + c * 4
boxData(boxOffset) = locData(offset)
boxData(boxOffset + 1) = locData(offset + 1)
boxData(boxOffset + 2) = locData(offset + 2)
boxData(boxOffset + 3) = locData(offset + 3)
c += 1
}
p += 1
}
locDataOffset += numPredsPerClass * numClasses * 4
i += 1
}
locPreds
}
def getConfidenceScores(conf: Tensor[Float], numPredsPerClass: Int, numClasses: Int,
confBuf: Array[Array[Tensor[Float]]] = null)
: Array[Array[Tensor[Float]]] = {
val confPreds = if (confBuf == null) {
val out = new Array[Array[Tensor[Float]]](conf.size(1))
var i = 0
while (i < conf.size(1)) {
out(i) = new Array[Tensor[Float]](numClasses)
var c = 0
while (c < numClasses) {
out(i)(c) = Tensor[Float](numPredsPerClass)
c += 1
}
i += 1
}
out
}
else confBuf
val confData = conf.storage().array()
var confDataOffset = conf.storageOffset() - 1
var i = 0
while (i < conf.size(1)) {
val labelScores = confPreds(i)
var p = 0
while (p < numPredsPerClass) {
val startInd = p * numClasses + confDataOffset
var c = 0
while (c < numClasses) {
labelScores(c).setValue(p + 1, confData(startInd + c))
c += 1
}
p += 1
}
confDataOffset += numPredsPerClass * numClasses
i += 1
}
confPreds
}
def getPriorBboxes(prior: Tensor[Float], nPriors: Int): (Tensor[Float], Tensor[Float]) = {
val array = prior.storage()
val aOffset = prior.storageOffset()
val priorBoxes = Tensor(array, aOffset, Array(nPriors, 4))
val priorVariances = Tensor(array, aOffset + nPriors * 4, Array(nPriors, 4))
(priorBoxes, priorVariances)
}
def decodeBboxesAll(allLocPreds: Array[Array[Tensor[Float]]], priorBoxes: Tensor[Float],
priorVariances: Tensor[Float], nClasses: Int, bgLabel: Int, clipBoxes: Boolean,
varianceEncodedInTarget: Boolean, shareLocation: Boolean,
output: Array[Array[Tensor[Float]]] = null)
: Array[Array[Tensor[Float]]] = {
val batch = allLocPreds.length
val allDecodeBboxes = if (output == null) {
val all = new Array[Array[Tensor[Float]]](batch)
var i = 0
while (i < batch) {
all(i) = new Array[Tensor[Float]](nClasses)
i += 1
}
all
} else {
require(output.length == batch)
output
}
var i = 0
while (i < batch) {
val decodedBoxes = allDecodeBboxes(i)
var c = 0
while (c < nClasses) {
// Ignore background class.
if (shareLocation || c != bgLabel) {
// Something bad happened if there are no predictions for current label.
if (allLocPreds(i)(c).nElement() == 0) {
logger.warn(s"Could not find location predictions for label $c")
}
val labelLocPreds = allLocPreds(i)(c)
decodedBoxes(c) = decodeBoxes(priorBoxes, priorVariances, clipBoxes,
labelLocPreds, varianceEncodedInTarget, labelLocPreds)
}
c += 1
}
allDecodeBboxes(i) = decodedBoxes
i += 1
}
allDecodeBboxes
}
def decodeBoxes(priorBoxes: Tensor[Float], priorVariances: Tensor[Float],
isClipBoxes: Boolean, bboxes: Tensor[Float],
varianceEncodedInTarget: Boolean, output: Tensor[Float] = null): Tensor[Float] = {
require(priorBoxes.size(1) == priorVariances.size(1))
require(priorBoxes.size(1) == bboxes.size(1))
val numBboxes = priorBoxes.size(1)
if (numBboxes > 0) {
require(priorBoxes.size(2) == 4)
}
val decodedBboxes = if (output == null) Tensor[Float](numBboxes, 4)
else output.resizeAs(priorBoxes)
var i = 1
while (i <= numBboxes) {
decodeSingleBbox(i, priorBoxes,
priorVariances, isClipBoxes, bboxes, varianceEncodedInTarget, decodedBboxes)
i += 1
}
decodedBboxes
}
private def decodeSingleBbox(i: Int, priorBox: Tensor[Float], priorVariance: Tensor[Float],
isClipBoxes: Boolean, bbox: Tensor[Float], varianceEncodedInTarget: Boolean,
decodedBoxes: Tensor[Float]): Unit = {
val x1 = priorBox.valueAt(i, 1)
val y1 = priorBox.valueAt(i, 2)
val x2 = priorBox.valueAt(i, 3)
val y2 = priorBox.valueAt(i, 4)
val priorWidth = x2 - x1
require(priorWidth > 0)
val priorHeight = y2 - y1
require(priorHeight > 0)
val pCenterX = (x1 + x2) / 2
val pCenterY = (y1 + y2) / 2
var decodeCenterX = 0f
var decodeCenterY = 0f
var decodeWidth = 0f
var decodedHeight = 0f
if (varianceEncodedInTarget) {
// variance is encoded in target, we simply need to retore the offset
// predictions.
decodeCenterX = bbox.valueAt(i, 1) * priorWidth + pCenterX
decodeCenterY = bbox.valueAt(i, 2) * priorHeight + pCenterY
decodeWidth = Math.exp(bbox.valueAt(i, 3)).toFloat * priorWidth
decodedHeight = Math.exp(bbox.valueAt(i, 4)).toFloat * priorHeight
} else {
// variance is encoded in bbox, we need to scale the offset accordingly.
decodeCenterX = priorVariance.valueAt(i, 1) * bbox.valueAt(i, 1) * priorWidth + pCenterX
decodeCenterY = priorVariance.valueAt(i, 2) * bbox.valueAt(i, 2) * priorHeight + pCenterY
decodeWidth = Math.exp(priorVariance.valueAt(i, 3) * bbox.valueAt(i, 3)).toFloat * priorWidth
decodedHeight = Math.exp(priorVariance.valueAt(i, 4) * bbox.valueAt(i, 4))
.toFloat * priorHeight
}
decodedBoxes.setValue(i, 1, decodeCenterX - decodeWidth / 2)
decodedBoxes.setValue(i, 2, decodeCenterY - decodedHeight / 2)
decodedBoxes.setValue(i, 3, decodeCenterX + decodeWidth / 2)
decodedBoxes.setValue(i, 4, decodeCenterY + decodedHeight / 2)
if (isClipBoxes) {
clipBoxes(decodedBoxes)
}
}
def clipBoxes(bboxes: Tensor[Float]): Tensor[Float] = {
bboxes.cmax(0).apply1(x => Math.min(1, x))
}
/**
*
* @param scoresNms N
* @param bboxNms N * 4
* @param scoresAll M
* @param bboxAll M * 4
* @return
*/
def bboxVote(scoresNms: Tensor[Float], bboxNms: Tensor[Float],
scoresAll: Tensor[Float], bboxAll: Tensor[Float],
areasBuf: Tensor[Float] = null): RoiLabel = {
var accBox: Tensor[Float] = null
var accScore = 0f
var box: Tensor[Float] = null
val areasAll = if (areasBuf == null) {
Tensor[Float]
} else areasBuf
getAreas(bboxAll, areasAll)
var i = 1
while (i <= scoresNms.size(1)) {
box = bboxNms(i)
if (accBox == null) {
accBox = Tensor[Float](4)
} else {
accBox.fill(0f)
}
accScore = 0f
var m = 1
while (m <= scoresAll.size(1)) {
val boxA = bboxAll(m)
val iw = Math.min(box.valueAt(3), boxA.valueAt(3)) -
Math.max(box.valueAt(1), boxA.valueAt(1)) + 1
val ih = Math.min(box.valueAt(4), boxA.valueAt(4)) -
Math.max(box.valueAt(2), boxA.valueAt(2)) + 1
if (iw > 0 && ih > 0) {
val ua = getArea(box) + areasAll.valueAt(m) - iw * ih
val ov = iw * ih / ua
if (ov >= 0.5) {
accBox.add(scoresAll.valueAt(m), boxA)
accScore += scoresAll.valueAt(m)
}
}
m += 1
}
var x = 1
while (x <= 4) {
bboxNms.setValue(i, x, accBox.valueAt(x) / accScore)
x += 1
}
i += 1
}
RoiLabel(scoresNms, bboxNms)
}
private def getArea(box: Tensor[Float]): Float = {
require(box.dim() == 1 && box.nElement() >= 4)
(box.valueAt(3) - box.valueAt(1) + 1) * (box.valueAt(4) - box.valueAt(2) + 1)
}
/**
* get the areas of boxes
* @param boxes N * 4 tensor
* @param areas buffer to store the results
* @return areas array
*/
def getAreas(boxes: Tensor[Float], areas: Tensor[Float], startInd: Int = 1,
normalized: Boolean = false): Tensor[Float] = {
if (boxes.nElement() == 0) return areas
require(boxes.size(2) >= 4)
areas.resize(boxes.size(1))
val boxesArr = boxes.storage().array()
val offset = boxes.storageOffset() - 1
val rowLength = boxes.stride(1)
var i = 0
var boffset = offset + startInd - 1
while (i < boxes.size(1)) {
val x1 = boxesArr(boffset)
val y1 = boxesArr(boffset + 1)
val x2 = boxesArr(boffset + 2)
val y2 = boxesArr(boffset + 3)
if (normalized) areas.setValue(i + 1, (x2 - x1) * (y2 - y1))
else areas.setValue(i + 1, (x2 - x1 + 1) * (y2 - y1 + 1))
boffset += rowLength
i += 1
}
areas
}
def getGroundTruths(result: Tensor[Float]): Map[Int, Tensor[Float]] = {
val indices = getGroundTruthIndices(result).toArray.sortBy(_._1)
var gtMap = Map[Int, Tensor[Float]]()
var ind = 0
val iter = indices.iterator
while (iter.hasNext) {
val x = iter.next()
val gt = result.narrow(1, x._2._1, x._2._2)
// -1 represent those images without label
if (gt.size(1) > 1 || gt.valueAt(1, 2) != -1) {
gtMap += (ind -> gt)
}
ind += 1
}
gtMap
// indices.map(x => x._1 -> result.narrow(1, x._2._1, x._2._2))
}
def getGroundTruthIndices(result: Tensor[Float]): Map[Int, (Int, Int)] = {
var indices = Map[Int, (Int, Int)]()
if (result.nElement() == 0) return indices
var prev = -1f
var i = 1
var start = 1
if (result.size(1) == 1) {
indices += (result.valueAt(i, 1).toInt -> (1, 1))
return indices
}
while (i <= result.size(1)) {
if (prev != result.valueAt(i, 1)) {
if (prev >= 0) {
indices += (prev.toInt -> (start, i - start))
}
start = i
}
prev = result.valueAt(i, 1)
if (i == result.size(1)) {
indices += (prev.toInt -> (start, i - start + 1))
}
i += 1
}
indices
}
private def decodeSignalBoxWithWeight(encodeBox: Tensor[Float], bbox: Tensor[Float],
weight: Array[Float], decodeBox: Tensor[Float]): Unit = {
require(bbox.nDimension() == 1 && encodeBox.nDimension() == 1 && decodeBox.dim() == 1,
s"Only support decode single bbox, but " +
s"get ${bbox.nDimension()}, ${encodeBox.nDimension()}, ${decodeBox.dim()}")
require(encodeBox.nElement() == decodeBox.nElement(), s"element number of encode tensor" +
s" and decode tensor should be same, but get ${encodeBox.nElement()} ${decodeBox.nElement()}")
val TO_REMOVE = 1 // refer to pytorch, maybe it will be removed in future
val x1 = bbox.valueAt(1)
val y1 = bbox.valueAt(2)
val x2 = bbox.valueAt(3)
val y2 = bbox.valueAt(4)
val priorWidth = x2 - x1 + TO_REMOVE
val priorHight = y2 - y1 + TO_REMOVE
val pCenterX = x1 + priorWidth/ 2
val pCenterY = y1 + priorHight / 2
val wx = weight(0)
val wy = weight(1)
val ww = weight(2)
val wh = weight(3)
encodeBox.resize(Array(encodeBox.nElement() / 4, 4))
// copy for contigious
val dx = encodeBox.select(2, 1).contiguous().div(wx)
val dy = encodeBox.select(2, 2).contiguous().div(wy)
val dw = encodeBox.select(2, 3).contiguous().div(ww)
val dh = encodeBox.select(2, 4).contiguous().div(wh)
// not change original input
encodeBox.resize(encodeBox.nElement())
// clamp, dw, dh
val bboxClip = 62.5f
clamp(dw, 0.0f, bboxClip)
clamp(dh, 0.0f, bboxClip)
val pred_ctr_x = dx * priorWidth + pCenterX
val pred_ctr_y = dy * priorHight + pCenterY
val pred_w = dw.exp().mul(priorWidth).mul(0.5f)
val pred_h = dh.exp().mul(priorHight).mul(0.5f)
// todo: memory optimzation
val buffer1 = Tensor[Float]().resizeAs(pred_ctr_x).copy(pred_ctr_x).sub(pred_w)
val buffer2 = Tensor[Float]().resizeAs(pred_ctr_y).copy(pred_ctr_y).sub(pred_h)
val buffer3 = Tensor[Float]().resizeAs(pred_ctr_x).copy(pred_ctr_x).add(pred_w).add(-1.0f)
val buffer4 = Tensor[Float]().resizeAs(pred_ctr_y).copy(pred_ctr_y).add(pred_h).add(-1.0f)
val arrBuffer1 = buffer1.storage().array()
val arrBuffer2 = buffer2.storage().array()
val arrBuffer3 = buffer3.storage().array()
val arrBuffer4 = buffer4.storage().array()
val arrBox = decodeBox.storage().array()
val offset = decodeBox.storageOffset() - 1
var i = 0
var j = 0
while (i < arrBuffer1.length) {
arrBox(j + offset) = arrBuffer1(i)
arrBox(j + 1 + offset) = arrBuffer2(i)
arrBox(j + 2 + offset) = arrBuffer3(i)
arrBox(j + 3 + offset) = arrBuffer4(i)
i += 1
j += 4
}
}
def decodeWithWeight(encodeBox: Tensor[Float], bbox: Tensor[Float],
weight: Array[Float], decodeBox: Tensor[Float]): Unit = {
require(encodeBox.size(1) == bbox.size(1))
require(encodeBox.size(1) == decodeBox.size(1))
val numBboxes = bbox.size(1)
if (numBboxes > 0) require(bbox.size(2) == 4)
var i = 1
while (i <= numBboxes) {
decodeSignalBoxWithWeight(encodeBox.select(1, i), bbox.select(1, i),
weight, decodeBox.select(1, i))
i += 1
}
}
private def clamp(input: Tensor[Float], min: Float, max: Float): Unit = {
require(input.isContiguous(), "input for clamp should be contiguous")
val arr = input.storage().array()
val offset = input.storageOffset() - 1
var i = 0
while (i < arr.length) {
val value = arr(i)
if (value < min) arr(i) = value
if (value > max) arr(i) = max
i += 1
}
}
}