com.intel.analytics.bigdl.transform.vision.image.MTImageFeatureToBatch.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of bigdl-SPARK_3.0 Show documentation
The newest version!
/*
 * Copyright 2016 The BigDL Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.intel.analytics.bigdl.transform.vision.image

import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
import java.util.concurrent.atomic.AtomicInteger
import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
import com.intel.analytics.bigdl.nn.abstractnn.Activity
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.utils.{Engine, T, Table}

object MTImageFeatureToBatch {
  /**
   * The transformer from ImageFeature to mini-batches
   * @param width width of the output images
   * @param height height of the output images
   * @param batchSize batch size
   * @param transformer pipeline for pre-processing, finally outputting ImageFeature
   * @param toRGB if converted to RGB, default format is BGR
   * @return
   */
  def apply(width: Int, height: Int, batchSize: Int,
            transformer: FeatureTransformer, toRGB: Boolean = false)
  : MTImageFeatureToBatch = {
      new ClassificationMTImageFeatureToBatch (
        width, height, batchSize, transformer, toRGB)
  }

  private[image] def checkLabels[T](labelData: Array[T]): Array[T] = {
    if (labelData.length == 0) {
      labelData
    } else {
      val hasLabel = labelData.head != null
      for (i <- 1 until labelData.length) {
        val curHasLabel = labelData(i) != null
        require(curHasLabel == hasLabel, "The input data must either be all labeled or" +
          " be all unlabeled")
      }
      if (hasLabel) labelData else null
    }
  }

  private[image] def arraySlice[T](array: Array[T], batchSize: Int) = {
    if (array.length == batchSize) array else array.slice(0, batchSize)
  }
}

import MTImageFeatureToBatch._

object RoiImageFeatureToBatch {
  /**
   * The transformer from ImageFeature to mini-batches, and extract ROI labels for segmentation
   * if roi labels are set. The sizes of the images can be different.
   * @param batchSize global batch size
   * @param transformer pipeline for pre-processing
   * @param toRGB if converted to RGB, default format is BGR
   * @param sizeDivisible when it's greater than 0, height and wide should be divisible by this size
   *
   */
  def withResize(batchSize: Int, transformer: FeatureTransformer,
    toRGB : Boolean = false, sizeDivisible: Int = -1)
  : MTImageFeatureToBatch =
        new RoiImageFeatureToBatchWithResize(sizeDivisible, batchSize, transformer, toRGB)


  /**
   * The transformer from ImageFeature to mini-batches, and extract ROI labels for segmentation
   * if roi labels are set. The sizes of the images must be the same.
   * @param width width of the output images
   * @param height height of the output images
   * @param batchSize global batch size
   * @param transformer pipeline for pre-processing
   * @param toRGB if converted to RGB, default format is BGR
   *
   */
  def apply(width: Int, height: Int, batchSize: Int,
    transformer: FeatureTransformer, toRGB: Boolean = false) : MTImageFeatureToBatch = {
    new RoiImageFeatureToBatch(width, height, batchSize, transformer, toRGB)
  }
}

/**
 * An abstract class to convert ImageFeature iterator to MiniBatches. This transformer will be run
 * on each image feature. "processImageFeature" will be called to buffer the image features. When
 * there are enough buffered image features to form a batch, "createBatch" will be called.
 * You should override processImageFeature to buffer each image feature, and createBatch
 * to convert the buffered data into a mini-batch
 * @param totalBatchSize global batch size
 * @param transformer pipeline for pre-processing
 */
abstract class MTImageFeatureToBatch private[bigdl](
  totalBatchSize: Int, transformer: FeatureTransformer)
  extends Transformer[ImageFeature, MiniBatch[Float]] {

  protected val batchSize: Int = Utils.getBatchSize(totalBatchSize)

  protected val parallelism: Int = Engine.coreNumber()

  private def getPosition(count: AtomicInteger): Int = {
    val position = count.getAndIncrement()
    if (position < batchSize) position else -1
  }

  private lazy val transformers = (1 to parallelism).map(
    _ => new PreFetch -> transformer.cloneTransformer()
  ).toArray

  protected def processImageFeature(img: ImageFeature, position: Int)

  protected def createBatch(batchSize: Int): MiniBatch[Float]

  override def apply(prev: Iterator[ImageFeature]): Iterator[MiniBatch[Float]] = {
    val iterators = transformers.map(_.apply(prev))

    new Iterator[MiniBatch[Float]] {
      override def hasNext: Boolean = {
        iterators.map(_.hasNext).reduce(_ || _)
      }

      override def next(): MiniBatch[Float] = {
        val count = new AtomicInteger(0)
        val batch = Engine.default.invokeAndWait((0 until parallelism).map(tid => () => {
          var position = 0
          var record = 0
          while (iterators(tid).hasNext && {
            position = getPosition(count)
            position != -1
          }) {
            val img = iterators(tid).next()
            processImageFeature(img, position)
            record += 1
          }
          record
        })).sum
        createBatch(batch)
      }
    }
  }
}


private class PreFetch extends Transformer[ImageFeature, ImageFeature] {
  override def apply(prev: Iterator[ImageFeature]): Iterator[ImageFeature] = {
    new Iterator[ImageFeature] {
      private var buffer: ImageFeature = null.asInstanceOf[ImageFeature]

      override def hasNext: Boolean = {
        if (buffer != null) {
          true
        } else {
          buffer = prev.next()
          if (buffer == null) false else true
        }
      }

      override def next(): ImageFeature = {
        if (buffer == null) {
          prev.next()
        } else {
          val tmp = buffer
          buffer = null.asInstanceOf[ImageFeature]
          tmp
        }
      }
    }
  }
}

/**
 * A transformer pipeline wrapper to create labeled Minibatch in multiple threads for classification
 * @param width final image width
 * @param height final image height
 * @param totalBatchSize global batch size
 * @param transformer pipeline for pre-processing
 * @param toRGB  if converted to RGB, default format is BGR
 */
class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
  extends MTImageFeatureToBatch(totalBatchSize, transformer) {

  private val frameLength = height * width
  private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
  private val labelData: Array[Float] = new Array[Float](batchSize)
  private val featureTensor: Tensor[Float] = Tensor[Float]()
  private val labelTensor: Tensor[Float] = Tensor[Float]()

  override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
    img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB)
    labelData(position) = img.getLabel.asInstanceOf[Tensor[Float]].valueAt(1)
  }

  override protected def createBatch(batch: Int): MiniBatch[Float] = {
    if (labelTensor.nElement() != batch) {
      featureTensor.set(Storage[Float](featureData),
        storageOffset = 1, sizes = Array(batch, 3, height, width))
      labelTensor.set(Storage[Float](labelData),
        storageOffset = 1, sizes = Array(batch))
    }

    MiniBatch(featureTensor, labelTensor)
  }
}


object RoiImageInfo {
  // the keys in the target table
  // fields from RoiLabel
  val CLASSES = "classes"
  val BBOXES = "bboxes"
  val MASKS = "masks"
  // ISCROWD and ORIGSIZE are stored in ImageFeature
  val ISCROWD = "is_crowd"
  val ORIGSIZE = "orig_size"
  val SCORES = "scores"
  val IMGINFO = "imginfo"

  /**
   * Get the output score tensor from the table.
   *    (1 x N) tensor for N detections
   *
   * @param tab
   * @return
   */
  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)

  /**
   * Get the class label tensor from the table. See RoiLabel.classes
   *    the categories for each detections (see RoiLabel.clasees field)
   *    (1 x N), or (2 x N) Tensor[Float]
   *
   * @param tab
   * @return
   */
  def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)

  /**
   * Get the bbox tensor from the table. See RoiLabel.bboxes
   * @param tab
   * @return
   */
  def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)

  /**
   * Get the (optional) mask data from the table. See RoiLabel.masks
   * @param tab
   * @return
   */
  def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)

  /**
   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
   * @param tab
   * @return
   */
  def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)

  /**
   * Get the size of the image before resizing
   * @return (height, width, channel)
   */
  def getOrigSize(tab: Table): (Int, Int, Int) = tab[(Int, Int, Int)](ORIGSIZE)

  /**
   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
   * @param tab
   * @return
   */
  def getImageInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)

}
/**
 * A batch of images with flattened RoiLabels
 * the getTarget() returns a Table with key from 1 to batchSize. Each key in the table is mapped to
 * a Table for the annotation of an image in the batch. The annotation table holds the annotation
 * info for one image (assume the image has N detections). The annotation table has
 *
 * Key                Value
 * RoiImageInfo.CLASSES   the categories for each detections (see RoiLabel.clasees field)
 *                    (1 x N), or (2 x N) Tensor[Float]
 * RoiImageInfo.BBOXES    the bboxes, (N x 4) Tensor[Float]
 * RoiImageInfo.MASKS     (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
 *                    elements. The inner tensor holds the data for segmentation
 * RoiImageInfo.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
 *                    -1: unknown, 0: not crowd, 1: is crowd
 * RoiImageInfo.IMGINFO  with shape (batchSize, 4), contains all images info
 *                 (height, width, original height, original width)
 */
class RoiMiniBatch(val input: Tensor[Float], val target: Array[RoiLabel],
  val isCrowd: Array[Tensor[Float]], val imageInfo: Tensor[Float] = null)
  extends MiniBatch[Float] {

  override def size(): Int = input.size(1)

  override def getInput(): Activity = {
    if (imageInfo == null) input else T(input, imageInfo)
  }

  override def getTarget(): Table = {
    require(target != null, "The target should not be null")
    val tables = (target, isCrowd, 1 to isCrowd.length).zipped.map { case (roiLabel, crowd, i) =>
      val ret = roiLabel.toTable
        .update(RoiImageInfo.ISCROWD, crowd)
      if (imageInfo != null) {
        ret.update(RoiImageInfo.IMGINFO, imageInfo.select(1, i))
      }
      ret
    }
    T.seq(tables)
  }

  override def slice(offset: Int, length: Int): MiniBatch[Float] = {
    val subInput = input.narrow(1, offset, length)
    val subTarget = if (target != null) {
      target.slice(offset - 1, offset + length - 1) // offset starts from 1
    } else {
      null
    }
    val subIsCrowd = isCrowd.slice(offset - 1, offset + length - 1) // offset starts from 1
    val subSize = if (imageInfo != null) imageInfo.narrow(1, offset, length) else null
    RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
  }

  override def set(samples: Seq[Sample[Float]])(implicit ev: TensorNumeric[Float])
  : RoiMiniBatch.this.type = {
    throw new NotImplementedError("do not use Sample here")
  }
}

object RoiMiniBatch {
  def apply(data: Tensor[Float], target: Array[RoiLabel],
    isCrowd: Array[Tensor[Float]], imageInfo: Tensor[Float] = null):
  RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
}


/**
 * A transformer pipeline wrapper to create RoiMiniBatch in multiple threads
 * The output "target" is a Table. The keys are from 1 to sizeof(batch). The values are
 * the tables for each RoiLabel. Each Roi label table, contains fields of RoiLabel class.
 * The sizes of the input images should be the same
 * @param width final image width
 * @param height final image height
 * @param totalBatchSize global batch size
 * @param transformer pipeline for pre-processing
 * @param toRGB  if converted to RGB, default format is BGR
 */
class RoiImageFeatureToBatch private[bigdl](width: Int, height: Int,
  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
  extends MTImageFeatureToBatch(totalBatchSize, transformer) {

  private val frameLength = height * width
  private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
  private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
  private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
  private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
  private var featureTensor: Tensor[Float] = Tensor[Float]()

  override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
    img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB)
    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
    val label = img.getLabel.asInstanceOf[RoiLabel]
    if (label != null) {
      require(isCrowd != null && label.bboxes.size(1) == isCrowd.size(1), "The number" +
        " of detections " +
        "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
    } else {
      require(isCrowd == null, "ImageFeature's ISCROWD should be not be set if the label is empty")
    }
    isCrowdData(position) = isCrowd
    labelData(position) = label
    imgInfoData.setValue(position + 1, 1, img.getHeight())
    imgInfoData.setValue(position + 1, 2, img.getWidth())
    imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
    imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
  }

  override protected def createBatch(curBatchSize: Int): MiniBatch[Float] = {
    if (featureTensor.nElement() != curBatchSize) {
      featureTensor.set(Storage[Float](featureData),
        storageOffset = 1, sizes = Array(curBatchSize, 3, height, width))
    }

    val labels = checkLabels(arraySlice(labelData, curBatchSize))
    val crowd = if (labels != null) arraySlice(isCrowdData, curBatchSize) else null
    RoiMiniBatch(featureTensor, labels, crowd,
      imgInfoData.narrow(1, 1, curBatchSize))
  }
}

/**
 * A transformer pipeline wrapper to create RoiMiniBatch in multiple threads.
 * Image features may have different sizes, so firstly we need to calculate max size in one batch,
 * then padding all features to one batch with max size.
 * @param sizeDivisible when it's greater than 0,
 *                      height and wide will be round up to multiple of this divisible size
 * @param totalBatchSize global batch size
 * @param transformer pipeline for pre-processing
 * @param toRGB
 */
class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, totalBatchSize: Int,
  transformer: FeatureTransformer, toRGB: Boolean = false)
  extends MTImageFeatureToBatch(totalBatchSize, transformer) {

  private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
  private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
  private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
  private var featureTensor: Tensor[Float] = null
  private val imageBuffer = new Array[Tensor[Float]](batchSize)

  private def getFrameSize(batchSize: Int): (Int, Int) = {
    var maxHeight = 0
    var maxWide = 0
    for (i <- 0 until batchSize) {
      maxHeight = math.max(maxHeight, imageBuffer(i).size(2))
      maxWide = math.max(maxWide, imageBuffer(i).size(3))
    }

    if (sizeDivisible > 0) {
      maxHeight = (math.ceil(maxHeight.toFloat / sizeDivisible) * sizeDivisible).toInt
      maxWide = (math.ceil(maxWide.toFloat / sizeDivisible) * sizeDivisible).toInt
    }
    (maxHeight, maxWide)
  }

  override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
    if (imageBuffer(position) == null) imageBuffer(position) = Tensor[Float]()
    imageBuffer(position).resize(3, img.getHeight(), img.getWidth())
    // save img to buffer
    img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
    val label = img.getLabel.asInstanceOf[RoiLabel]
    if (label != null) {
      require(isCrowd != null && label.bboxes.size(1) == isCrowd.size(1), "The number of " +
        "detections in ImageFeature's ISCROWD should be equal to the number of detections in the " +
        "RoiLabel")
    } else {
      require(isCrowd == null, "ImageFeature's ISCROWD should be not be set if the label is empty")
    }
    isCrowdData(position) = isCrowd
    labelData(position) = label
    imgInfoData.setValue(position + 1, 1, img.getHeight())
    imgInfoData.setValue(position + 1, 2, img.getWidth())
    imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
    imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
  }

  override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
    val (height, wide) = getFrameSize(batchSize)
    if (featureTensor == null) featureTensor = Tensor()
    featureTensor.resize(batchSize, 3, height, wide).fill(0.0f)
    // copy img buffer to feature tensor
    for (i <- 0 until batchSize) {
      featureTensor.select(1, i + 1).narrow(2, 1, imageBuffer(i).size(2))
        .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
    }

    val labels = checkLabels(arraySlice(labelData, batchSize))
    val crowd = if (labels != null) arraySlice(isCrowdData, batchSize) else null
    RoiMiniBatch(featureTensor, labels, crowd, imgInfoData.narrow(1, 1, batchSize))
  }
}