com.intel.analytics.bigdl.dataset.segmentation.COCODataset.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of bigdl Show documentation
There is a newer version: 0.13.0
/*
 * Copyright 2016 The BigDL Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.intel.analytics.bigdl.dataset.segmentation

import com.google.gson.{Gson, GsonBuilder, JsonDeserializationContext, JsonDeserializer, JsonElement, TypeAdapter}
import com.google.gson.annotations.SerializedName
import com.google.gson.reflect.TypeToken
import com.google.gson.stream.{JsonReader, JsonWriter}
import com.intel.analytics.bigdl.DataSet
import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.dataset.DataSet.SeqFileFolder
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.transform.vision.image.{ImageFeature, RoiImageInfo}
import com.intel.analytics.bigdl.utils.{T, Table}
import java.io.{BufferedReader, FileReader}
import java.lang.reflect.Type
import java.nio.ByteBuffer
import java.nio.file.{Files, Path, Paths}
import org.apache.spark.SparkContext
import scala.collection.mutable.ArrayBuffer

private[bigdl] class COCOSerializeContext {
  private val converter4 = ByteBuffer.allocate(4)
  private val converter8 = ByteBuffer.allocate(8)
  private val buffer = new ArrayBuffer[Byte]()

  def dump(v: Float): Unit = {
    converter4.clear()
    converter4.putFloat(v)
    buffer.appendAll(converter4.array())
  }

  def dump(v: Double): Unit = {
    converter8.clear()
    converter8.putDouble(v)
    buffer.appendAll(converter8.array())
  }


  def dump(v: Int): Unit = {
    converter4.clear()
    converter4.putInt(v)
    buffer.appendAll(converter4.array())
  }

  def dump(v: Long): Unit = {
    converter8.clear()
    converter8.putLong(v)
    buffer.appendAll(converter8.array())
  }

  def dump(v: Boolean): Unit = {
    val d: Byte = if (v) 1 else 0
    buffer.append(d)
  }

  def dump(v: String): Unit = {
    val bytes = v.getBytes
    dump(bytes)
  }

  def clear(): Unit = buffer.clear()

  def dump(v: Array[Byte]): Unit = {
    dump(v.length)
    buffer.appendAll(v)
  }

  def toByteArray: Array[Byte] = buffer.toArray
}


private[bigdl] class COCODeserializer(buffer: ByteBuffer) {
  private def getFloat: Float = buffer.getFloat
  private def getDouble: Double = buffer.getDouble
  def getInt: Int = buffer.getInt
  private def getLong: Long = buffer.getLong
  private def getBoolean: Boolean = buffer.get != 0

  def getString: String = {
    val len = getInt
    val arr = new Array[Byte](len)
    buffer.get(arr)
    new String(arr)
  }
  case class SimpleAnnotation(categoryIdx: Int, area: Float, bbox1: Float, bbox2: Float,
    bbox3: Float, bbox4: Float, isCrowd: Boolean, masks: SegmentationMasks)

  // returns an image's height, width, all annotations
  def getAnnotations: (Int, Int, Array[SimpleAnnotation]) = {
    val height = getInt
    val width = getInt
    val nAnnotations = getInt
    val anno = (0 until nAnnotations).map(_ => getAnnotation(height, width))
    (height, width, anno.toArray)
  }

  private def getAnnotation(height: Int, width: Int): SimpleAnnotation = {
    val categoryIdx = getInt
    val area = getFloat
    val bbox1 = getFloat
    val bbox2 = getFloat
    val bbox3 = getFloat
    val bbox4 = getFloat
    val isCrowd = getBoolean
    val masks = if (isCrowd) {
      // is RLE
      val countLen = getInt
      val arr = new Array[Int](countLen)
      for (i <- 0 until countLen) {
        arr(i) = getInt
      }
      RLEMasks(arr, height, width)
    } else {
      val firstDimLen = getInt
      val poly = new Array[Array[Float]](firstDimLen)
      for (i <- 0 until firstDimLen) {
        val secondDimLen = getInt
        val inner = new Array[Float](secondDimLen)
        for (j <- 0 until secondDimLen) {
          inner(j) = getFloat
        }
        poly(i) = inner
      }
      PolyMasks(poly, height, width)
    }
    SimpleAnnotation(categoryIdx, area, bbox1, bbox2, bbox3, bbox4, isCrowd, masks)
  }
}

case class COCODataset(info: COCODatasetInfo, images: Array[COCOImage],
  annotations: Array[COCOAnotationOD],
  licenses: Array[COCOLicence], categories: Array[COCOCategory]) {

  private lazy val cateId2catIdx = scala.collection.mutable.Map[Long, Int]()
  private lazy val imageId2Image = images.toIterator.map(img => (img.id, img)).toMap

  private[segmentation] def init(imgRoot: String): Unit = {
    categories.zipWithIndex.foreach { case (cate, idx) =>
      cateId2catIdx(cate.id) = idx + 1 // the ids starts from 1, because 0 is for background
    }
    annotations.foreach(anno => {
      require(imageId2Image.contains(anno.imageId), s"Cannot find image_id ${anno.imageId}")
      val img = imageId2Image(anno.imageId)
      anno._categoryIdx = cateId2catIdx(anno.categoryId)
      anno.image = img
      img.annotations += anno
      anno.segmentation match {
        case poly: COCOPoly =>
          anno.segmentation = COCOPoly(poly.poly, img.height, img.width)
        case _ =>
      }
    })
    images.foreach(_.imgRootPath = imgRoot)

  }

  /**
   * Find a COCOImage by the image id
   * @param id image id
   * @return the COCOImage with the given id
   */
  def getImageById(id: Long): COCOImage = imageId2Image(id)

  /**
   * Convert COCO categoryId into category index.
   * COCO dataset's categoryId is not continuous from 1 to number of categories.
   * This function maps every categoryId to a number from 1 to number of categories - The result is
   * called category index. The category index 0 is reserved for "background" class.
   * @param id categoryId
   * @return category index
   */
  def categoryId2Idx(id: Long): Int = cateId2catIdx(id)

  /**
   * Get the category data by the category index
   * @param idx category index
   * @return category data
   */
  def getCategoryByIdx(idx: Int): COCOCategory = categories(idx - 1)

  /**
   * Convert the images & ground truths into ImageFeatures.
   * The image feature is in the same format of what COCODataset.loadFromSeqFile returns
   * @return
   */
  def toImageFeatures: Iterator[ImageFeature] = images.toIterator.map(_.toImageFeature)
}

case class COCODatasetInfo(
  year: Int,
  version: String,
  description: String,
  contributor: String,
  url: String
  ) {
  @SerializedName("date_created") var dateCreated: String = _
}

case class COCOImage(
  id: Long,
  height: Int,
  width : Int,
  license: Int
) {
  @transient lazy val annotations: ArrayBuffer[COCOAnotationOD] = new ArrayBuffer[COCOAnotationOD]
  @transient private[segmentation] var imgRootPath: String = _
  @SerializedName("flickr_url") var flickrUrl: String = _
  @SerializedName("coco_url") var cocoUrl: String = _
  @SerializedName("date_captured") var dateCaptured: String = _
  @SerializedName("file_name") var fileName: String = _

  def dumpTo(context: COCOSerializeContext): Unit = {
    context.dump(height)
    context.dump(width)
    context.dump(annotations.size)
    annotations.foreach(_.dumpTo(context))
  }

  /**
   * Get the path of the image in local file system
   * @return
   */
  def path: Path = Paths.get(imgRootPath, fileName)

  /**
   * Read the data from the image file
   * @return
   */
  def data: Array[Byte] = Files.readAllBytes(path)

  /**
   * Convert the image's image data and ground truth into an image feature.
   * The image feature is in the same format of what COCODataset.loadFromSeqFile returns
   * @return an ImageFeature with ground truth & image data
   */
  def toImageFeature: ImageFeature = {
    val labelClasses = Tensor(annotations.map(_.categoryIdx.toFloat).toArray,
      Array(annotations.length))
    val bboxes = Tensor(
      annotations.toIterator.flatMap(ann => {
        val x1 = ann.bbox._1
        val y1 = ann.bbox._2
        val x2 = ann.bbox._3
        val y2 = ann.bbox._4
        Iterator(x1, y1, x2, y2)
      }).toArray,
      Array(annotations.length, 4))
    val isCrowd = Tensor(annotations.map(ann => if (ann.isCrowd) 1f else 0f).toArray,
      Array(annotations.length))
    val masks = annotations.map(ann => ann.segmentation.asInstanceOf[SegmentationMasks]).toArray

    val rawdata = SeqFileFolder.decodeRawImageToBGR(this.data)
    require(rawdata.length == height * width * 3)
    val imf = ImageFeature(rawdata, RoiLabel(labelClasses, bboxes, masks), fileName)
    imf(ImageFeature.originalSize) = (height, width, 3)
    imf(RoiImageInfo.ISCROWD) = isCrowd
    imf
  }

  /**
   * Convert the image's ground truth label & masks into Table for RoiMiniBatch
   * @return a table with ground truth label & masks for the image
   */
  def toTable: Table = {
    val img = this
    val bboxes = Tensor(
      img.annotations.toIterator.flatMap(ann => {
        val x1 = ann.bbox._1
        val y1 = ann.bbox._2
        val x2 = ann.bbox._3
        val y2 = ann.bbox._4
        Iterator(x1, y1, x2, y2)
      }).toArray,
      Array(img.annotations.length, 4))

    T()
      .update(RoiImageInfo.ISCROWD,
        Tensor(img.annotations.map(ann => if (ann.isCrowd) 1f else 0f).toArray,
          Array(img.annotations.length))
      )
      .update(RoiImageInfo.ORIGSIZE, (img.height, img.width, 3))
      .update(RoiImageInfo.MASKS,
        img.annotations.map(ann => ann.segmentation.asInstanceOf[SegmentationMasks].toRLE).toArray)
      .update(RoiImageInfo.BBOXES, bboxes)
      .update(RoiImageInfo.CLASSES,
        Tensor(img.annotations.map(ann => ann.categoryIdx.toFloat).toArray,
          Array(img.annotations.length)))
  }
}

/**
 * An annotation for an image (OD in the name for Object Detection)
 * @param id
 * @param imageId the Id of the image
 * @param categoryId the Id of the category. Note that categoryId is not continuous from 0 to
 *                   the number of categories. You can use COCODataset.cateId2Idx to convert an
 *                   categoryId to a compact category index.
 * @param segmentation the segmentation data
 * @param area  area
 * @param bbox  the bounding box, (xmin, ymin, xmax, ymax)
 * @param isCrowd if the annotation is a crowd. e.g. a crowd of people. If true, segmentation is
 *                an COCORLE object
 * @param image the reference to the image
 */
case class COCOAnotationOD(id: Long, imageId: Long, categoryId: Long,
  var segmentation: COCOSegmentation, area: Float,
  bbox: (Float, Float, Float, Float), isCrowd: Boolean, @transient var image: COCOImage = null) {

  @transient private[segmentation] var _categoryIdx: Long = -1
  def categoryIdx: Long = _categoryIdx

  def dumpTo(context: COCOSerializeContext): Unit = {
    require(_categoryIdx != -1, "COCOAnotationOD should be initialized")
    context.dump(_categoryIdx.toInt)
    context.dump(area)
    context.dump(bbox._1)
    context.dump(bbox._2)
    context.dump(bbox._3)
    context.dump(bbox._4)
    context.dump(isCrowd)
    segmentation.dumpTo(context)
  }
}

case class COCOLicence(
  id: Long, name: String, url: String
)

case class COCOCategory(
  id: Long, name: String) {
  @SerializedName("supercategory") var superCategory: String = _
}

trait COCOSegmentation {
  def dumpTo(context: COCOSerializeContext): Unit
}

case class COCOPoly(_poly: Array[Array[Float]], _height: Int, _width: Int)
  extends PolyMasks(_poly, _height, _width) with COCOSegmentation {
  override def dumpTo(context: COCOSerializeContext): Unit = {
    context.dump(poly.length)
    poly.foreach(p => {
      context.dump(p.length)
      p.foreach(xy => {
        context.dump(xy)
      })
    })
  }
}

 case class COCORLE(_counts: Array[Int], _height: Int, _width: Int)
   extends RLEMasks(_counts, _height, _width) with COCOSegmentation {
   override def dumpTo(context: COCOSerializeContext): Unit = {
     context.dump(counts.length)
     counts.foreach(p => {
       context.dump(p)
     })
   }
 }

object COCODataset {
  private[bigdl] val MAGIC_NUM = 0x1f3d4e5a
  private[segmentation] class AnnotationDeserializer extends
    JsonDeserializer[COCOAnotationOD] {
    private lazy val intArrAdapter = COCODataset.gson.getAdapter(classOf[Array[Int]])
    private lazy val polyAdapter = COCODataset.gson.getAdapter(classOf[Array[Array[Float]]])
    override def deserialize(json: JsonElement, ty: Type,
      context: JsonDeserializationContext): COCOAnotationOD = {
      val obj = json.getAsJsonObject
      val id = obj.get("id").getAsLong
      val imageId = obj.get("image_id").getAsLong
      val categoryId = obj.get("category_id").getAsLong
      val area = obj.get("area").getAsFloat
      val rawBbox = obj.get("bbox").getAsJsonArray
      require(rawBbox.size() == 4, "The bbox in the COCO annotation data should have 4 elements")
      val (x1, y1, w, h) = (rawBbox.get(0).getAsFloat, rawBbox.get(1).getAsFloat,
        rawBbox.get(2).getAsFloat, rawBbox.get(3).getAsFloat)
      val bbox = (x1, y1, x1 + w - 1, y1 + h - 1)
      val isCrowd = if (obj.get("iscrowd").getAsInt == 1) true else false
      val seg = if (isCrowd) {
        val segJson = obj.getAsJsonObject("segmentation")
        val cnts = intArrAdapter.fromJsonTree(segJson.get("counts"))
        val size = intArrAdapter.fromJsonTree(segJson.get("size"))
        require(size.length == 2, "The size in the COCO annotation data should have 2 elements")
        COCORLE(cnts, size(0), size(1))
      } else {
        val polys = polyAdapter.fromJsonTree(obj.get("segmentation"))
        COCOPoly(polys, -1, -1)
      }
      COCOAnotationOD(id, imageId, categoryId, seg, area, bbox, isCrowd)
    }
  }

  private lazy val gson = {
    val gsonBuilder = new GsonBuilder()
    val theType = new TypeToken[COCOAnotationOD]() {}.getType
    val deserializer = new AnnotationDeserializer
    gsonBuilder.registerTypeAdapter(theType, deserializer)
    gsonBuilder.create()
  }

  /**
   * Load COCO dataset from local file
   * @param jsonPath the JSON metadata file path
   * @param imageRoot the root path of the image files
   * @return the loaded COCO dataset
   */
  def load(jsonPath: String, imageRoot: String = "."): COCODataset = {
    val d = gson.fromJson(
      new BufferedReader(new FileReader(jsonPath)), classOf[COCODataset])
    d.init(imageRoot)
    d
  }

  /**
   * Load COCO dataset from Hadoop sequence files
   * @param url sequence files folder path on HDFS/Local
   * @param sc spark context
   * @param partitionNum partition number, default: Engine.nodeNumber() * Engine.coreNumber()
   * @return ImageFeatures for the dataset.
   *         Key in ImageFeature    Value               Type
   *         ImageFeature.bytes     decoded image data  Array[Byte]
   *         ImageFeature.uri       Image file name     String
   *         ImageFeature.label     Label & masks       RoiLabel
   *         RoiImageInfo.ISCROWD   isCrowd             Tensor[Float]
   */
  def loadFromSeqFile(url: String, sc: SparkContext,
    partitionNum: Option[Int] = None): DataSet[ImageFeature] = {
    SeqFileFolder.filesToRoiImageFeatures(url, sc, partitionNum)
  }
}