com.intel.analytics.bigdl.optim.DistriValidator.scala Maven / Gradle / Ivy

/*
 * Copyright 2016 The BigDL Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.intel.analytics.bigdl.optim

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.dataset.{DistributedDataSet, MiniBatch}
import com.intel.analytics.bigdl.optim.DistriValidator._
import com.intel.analytics.bigdl.utils.{Engine, MklBlas}
import org.apache.log4j.Logger

object DistriValidator {
  val logger = Logger.getLogger(this.getClass)
}

/**
 * Validate model on a distributed cluster.
 *
 * @param model model to be validated
 * @param dataSet validation dataset
 */
class DistriValidator[T] private[optim](
  model: Module[T],
  dataSet: DistributedDataSet[MiniBatch[T]]
) extends Validator[T, MiniBatch[T]](model, dataSet) {

  /**
   * Applies vMethods to the model and validation dataset.
   * @param vMethods
   * @return
   */
  override def test(vMethods: Array[ValidationMethod[T]])
  : Array[(ValidationResult, ValidationMethod[T])] = {

    val rdd = dataSet.data(train = false)
    val broadcastModel = rdd.sparkContext.broadcast(model.evaluate(), vMethods)
    val _subModelNumber = Engine.getEngineType match {
      case MklBlas => Engine.coreNumber()
      case _ => throw new IllegalArgumentException
    }
    val nExecutor = Engine.nodeNumber()
    val executorCores = Engine.coreNumber()
    rdd.mapPartitions(dataIter => {
      Engine.setNodeAndCore(nExecutor, executorCores)
      val localModel = broadcastModel.value._1
      val localMethod = broadcastModel.value._2
      logger.info("model thread pool size is " + Engine.model.getPoolSize)
      val workingModels = (1 to _subModelNumber)
        .map(_ => localModel.cloneModule().evaluate()).toArray
      val vMethodsArr = (1 to _subModelNumber).map(i => localMethod.map(_.clone())).toArray
      dataIter.map(batch => {
        val stackSize = batch.size() / _subModelNumber
        val extraSize = batch.size() % _subModelNumber
        val parallelism = if (stackSize == 0) extraSize else _subModelNumber
        Engine.default.invokeAndWait(
          (0 until parallelism).map(b =>
            () => {
              val offset = b * stackSize + math.min(b, extraSize) + 1
              val length = stackSize + (if (b < extraSize) 1 else 0)
              val currentMiniBatch = batch.slice(offset, length)
              val input = currentMiniBatch.getInput()
              val target = currentMiniBatch.getTarget()
              val output = workingModels(b).forward(input)
              val validatMethods = vMethodsArr(b)
              validatMethods.map(validation => {
                validation(output, target)
              })
            }
          )
        ).reduce((left, right) => {
          left.zip(right).map { case (l, r) =>
            l + r
          }
        })
      })
    }).reduce((left, right) => {
      left.zip(right).map { case (l, r) =>
        l + r
      }
    }).zip(vMethods)
  }
}