All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intel.analytics.zoo.models.python.PythonZooModel.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2018 Analytics Zoo Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.intel.analytics.zoo.models.python

import java.util.{List => JList, Map => JMap}

import com.intel.analytics.bigdl.{Criterion}
import com.intel.analytics.bigdl.dataset.PaddingParam
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.nn.keras.KerasLayer
import com.intel.analytics.bigdl.optim.{OptimMethod, ValidationMethod, ValidationResult}
import com.intel.analytics.bigdl.python.api.{EvaluatedResult, JTensor, Sample}
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.transform.vision.image.ImageFeature
import com.intel.analytics.bigdl.utils.{Shape, Table}
import com.intel.analytics.zoo.common.PythonZoo
import com.intel.analytics.zoo.feature.common.Preprocessing
import com.intel.analytics.zoo.feature.image._
import com.intel.analytics.zoo.feature.text.TextSet
import com.intel.analytics.zoo.models.anomalydetection.{AnomalyDetector, FeatureLabelIndex}
import com.intel.analytics.zoo.models.common.{KerasZooModel, Ranker, ZooModel}
import com.intel.analytics.zoo.models.image.common.{ImageConfigure, ImageModel}
import com.intel.analytics.zoo.models.image.objectdetection._
import com.intel.analytics.zoo.models.image.imageclassification.{ImageClassifier, LabelReader => IMCLabelReader}
import com.intel.analytics.zoo.models.recommendation.{NeuralCF, Recommender, UserItemFeature, UserItemPrediction}
import com.intel.analytics.zoo.models.recommendation._
import com.intel.analytics.zoo.models.seq2seq.{RNNDecoder, RNNEncoder, Seq2seq}
import com.intel.analytics.zoo.models.textclassification.TextClassifier
import com.intel.analytics.zoo.models.textmatching.KNRM
import com.intel.analytics.zoo.pipeline.api.keras.layers.{Embedding, WordEmbedding}
import com.intel.analytics.zoo.pipeline.api.keras.models.KerasNet
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame}

import scala.reflect.ClassTag
import scala.collection.JavaConverters._

object PythonZooModel {

  def ofFloat(): PythonZooModel[Float] = new PythonZooModel[Float]()

  def ofDouble(): PythonZooModel[Double] = new PythonZooModel[Double]()
}

class PythonZooModel[T: ClassTag](implicit ev: TensorNumeric[T]) extends PythonZoo[T] {

  def saveZooModel(
      model: ZooModel[Activity, Activity, T],
      path: String,
      weightPath: String = null,
      overWrite: Boolean = false): ZooModel[Activity, Activity, T] = {
    model.saveModel(path, weightPath, overWrite)
  }

  def createZooTextClassifier(
      classNum: Int,
      embedding: Embedding[T],
      sequenceLength: Int = 500,
      encoder: String = "cnn",
      encoderOutputDim: Int = 256,
      model: AbstractModule[Activity, Activity, T]): TextClassifier[T] = {
    TextClassifier[T](classNum, embedding, sequenceLength, encoder, encoderOutputDim, model)
  }

  def loadTextClassifier(
      path: String,
      weightPath: String = null): TextClassifier[T] = {
    TextClassifier.loadModel(path, weightPath)
  }

  def textClassifierCompile(
      model: TextClassifier[T],
      optimizer: OptimMethod[T],
      loss: Criterion[T],
      metrics: JList[ValidationMethod[T]] = null): Unit = {
    model.compile(optimizer, loss,
      if (metrics == null) null else metrics.asScala.toList)
  }

  def textClassifierFit(
      model: TextClassifier[T],
      x: TextSet,
      batchSize: Int,
      nbEpoch: Int,
      validationData: TextSet): Unit = {
    model.fit(x, batchSize, nbEpoch, validationData)
  }

  def textClassifierPredict(
      model: TextClassifier[T],
      x: TextSet,
      batchPerThread: Int): TextSet = {
    model.predict(x, batchPerThread)
  }

  def textClassifierEvaluate(
      model: TextClassifier[T],
      x: TextSet,
      batchSize: Int): JList[EvaluatedResult] = {
    val resultArray = model.evaluate(x, batchSize)
    processEvaluateResult(resultArray)
  }

  private def processEvaluateResult(
    resultArray: Array[(ValidationResult, ValidationMethod[T])]): JList[EvaluatedResult] = {
    resultArray.map { result =>
      EvaluatedResult(result._1.result()._1, result._1.result()._2,
        result._2.toString())
    }.toList.asJava
  }

  def textClassifierSetCheckpoint(
      model: TextClassifier[T],
      path: String,
      overWrite: Boolean = true): Unit = {
    model.setCheckpoint(path, overWrite)
  }

  def textClassifierSetTensorBoard(
      model: TextClassifier[T],
      logDir: String,
      appName: String): Unit = {
    model.setTensorBoard(logDir, appName)
  }

  def createZooAnomalyDetector(
      featureShape: JList[Int],
      hiddenLayers: JList[Int],
      dropouts: JList[Double],
      model: AbstractModule[Activity, Activity, T]): AnomalyDetector[T] = {
    new AnomalyDetector[T](Shape(featureShape.asScala.toArray),
      hiddenLayers.asScala.toArray, dropouts.asScala.toArray)
      .addModel(model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]])
  }

  def loadAnomalyDetector(
      path: String,
      weightPath: String = null): AnomalyDetector[T] = {
      AnomalyDetector.loadModel(path, weightPath)
  }

  def standardScaleDF(df: DataFrame): DataFrame = {
    val fields = df.columns
    com.intel.analytics.zoo.models.anomalydetection.Utils.standardScale(df, fields)
  }

  def unroll(dataRdd: JavaRDD[JList[Double]],
             unrollLength: Int,
             predictStep: Int = 1): JavaRDD[JList[String]] = {
    val rdd: RDD[Array[Float]] = dataRdd.rdd.map(x => x.asScala.toArray.map(_.toFloat))
    val unrolled = AnomalyDetector.unroll[Float](rdd, unrollLength, predictStep)
    toUnrolledJavaRdd(unrolled)
  }

  private def toUnrolledJavaRdd(features: RDD[FeatureLabelIndex[Float]]): JavaRDD[JList[String]] = {
    features.map(x =>
      List(x.feature.map(x => x.mkString("|")).mkString(","), x.label.toString,
        x.index.toString).asJava).toJavaRDD()
  }

  private def toAnomaliesJavaRdd(anomaliesRdd: RDD[(Double, Double, Any)]): JavaRDD[JList[Any]] = {
    anomaliesRdd.map(x =>
      List(x._1, x._2, x._3.asInstanceOf[Any])
        .asJava).toJavaRDD()
  }

  def detectAnomalies(
      yTruth: JavaRDD[Object],
      yPredict: JavaRDD[Object],
      anomalySize: Int = 5): JavaRDD[JList[Any]] = {
    val out: RDD[(Double, Double, Any)] = AnomalyDetector.detectAnomalies[Double](
      yTruth.rdd.map(_.asInstanceOf[Double]), yPredict.rdd.map(_.asInstanceOf[Double]), anomalySize)
    toAnomaliesJavaRdd(out)
  }

  def zooModelSetEvaluateStatus(
    model: ZooModel[Activity, Activity, T]): ZooModel[Activity, Activity, T] = {
    model.setEvaluateStatus()
  }

  def loadObjectDetector(path: String, weightPath: String = null): ObjectDetector[T] = {
    ObjectDetector.loadModel(path, weightPath)
  }

  def loadImageClassifier(path: String, weightPath: String = null): ImageClassifier[T] = {
    ImageClassifier.loadModel(path, weightPath)
  }

  def readPascalLabelMap(): JMap[Int, String] = {
    LabelReader.readPascalLabelMap().asJava
  }

  def readCocoLabelMap(): JMap[Int, String] = {
    LabelReader.readCocoLabelMap().asJava
  }

  def readImagenetLabelMap(): JMap[Int, String] = {
    IMCLabelReader.readImagenetlLabelMap().asJava
  }

  def imageModelPredict(model: ImageModel[T],
    image: ImageSet,
    config: ImageConfigure[T] = null): ImageSet = {
    model.predictImageSet(image, config)
  }

  def getImageConfig(model: ImageModel[T]): ImageConfigure[T] = {
    model.getConfig
  }

  def createImageConfigure(
      preProcessor: Preprocessing[ImageFeature, ImageFeature],
      postProcessor: Preprocessing[ImageFeature, ImageFeature],
      batchPerPartition: Int,
      labelMap: JMap[Int, String],
      paddingParam: PaddingParam[T]): ImageConfigure[T] = {
    val map = if (labelMap == null) null else labelMap.asScala.toMap
    ImageConfigure(preProcessor, postProcessor, batchPerPartition, map, Option(paddingParam))
  }

  def createVisualizer(labelMap: JMap[Int, String], thresh: Float = 0.3f,
                       encoding: String): Preprocessing[ImageFeature, ImageFeature] = {
    Visualizer(labelMap.asScala.toMap, thresh, encoding, Visualizer.visualized) ->
      ImageBytesToMat(Visualizer.visualized) -> ImageMatToFloats(shareBuffer = false)
  }

  def getLabelMap(imageConfigure: ImageConfigure[T]): JMap[Int, String] = {
    if (imageConfigure.labelMap == null) null else imageConfigure.labelMap.asJava
  }

  def createImInfo(): ImInfo = {
    ImInfo()
  }

  def createDecodeOutput(): DecodeOutput = {
    DecodeOutput()
  }

  def createScaleDetection(): ScaleDetection = {
    ScaleDetection()
  }

  def createPaddingParam(): PaddingParam[T] = {
    PaddingParam()
  }

  def createZooNeuralCF(
      userCount: Int,
      itemCount: Int,
      numClasses: Int,
      userEmbed: Int = 20,
      itemEmbed: Int = 20,
      hiddenLayers: JList[Int],
      includeMF: Boolean = true,
      mfEmbed: Int = 20,
      model: AbstractModule[Activity, Activity, T]): NeuralCF[T] = {
    new NeuralCF[T](userCount, itemCount, numClasses, userEmbed, itemEmbed,
      hiddenLayers.asScala.toArray, includeMF, mfEmbed)
      .addModel(model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]])
  }

  def loadNeuralCF(
      path: String,
      weightPath: String = null): NeuralCF[T] = {
    NeuralCF.loadModel(path, weightPath)
  }

  def createZooWideAndDeep(
      modelType: String = "wide_n_deep",
      numClasses: Int,
      hiddenLayers: JList[Int],
      wideBaseDims: JList[Int],
      wideCrossDims: JList[Int],
      indicatorDims: JList[Int],
      embedInDims: JList[Int],
      embedOutDims: JList[Int],
      continuousCols: JList[String],
      model: AbstractModule[Activity, Activity, T]): WideAndDeep[T] = {
    new WideAndDeep[T](modelType,
      numClasses,
      wideBaseDims.asScala.toArray,
      wideCrossDims.asScala.toArray,
      indicatorDims.asScala.toArray,
      embedInDims.asScala.toArray,
      embedOutDims.asScala.toArray,
      continuousCols.asScala.toArray,
      hiddenLayers.asScala.toArray)
        .addModel(model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]])
  }

  def loadWideAndDeep(
      path: String,
      weightPath: String = null): WideAndDeep[T] = {
    WideAndDeep.loadModel(path, weightPath)
  }

  def createZooSessionRecommender(
      itemCount: Int,
      itemEmbed: Int,
      rnnHiddenLayers: JList[Int],
      sessionLength: Int,
      includeHistory: Boolean,
      mlpHiddenLayers: JList[Int],
      historyLength: Int,
      model: AbstractModule[Activity, Activity, T]): SessionRecommender[T] = {
    new SessionRecommender[T](itemCount, itemEmbed, rnnHiddenLayers.asScala.toArray, sessionLength,
      includeHistory, mlpHiddenLayers.asScala.toArray, historyLength)
      .addModel(model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]])
  }

  def loadSessionRecommender(
      path: String,
      weightPath: String = null): SessionRecommender[T] = {
    SessionRecommender.loadModel(path, weightPath)
  }

  def toUserItemFeatureRdd(featureRdd: JavaRDD[Array[Object]]): RDD[UserItemFeature[T]] = {
    featureRdd.rdd.foreach(x =>
      require(x.length == 3, "UserItemFeature should consist of userId, itemId and sample"))
    featureRdd.rdd.map(x =>
      UserItemFeature(x(0).asInstanceOf[Int], x(1).asInstanceOf[Int],
        toJSample(x(2).asInstanceOf[Sample])))
  }

  def toPredictionJavaRdd(predictionRdd: RDD[UserItemPrediction]): JavaRDD[JList[Double]] = {
    predictionRdd.map(x =>
      List(x.userId.toDouble, x.itemId.toDouble, x.prediction.toDouble, x.probability)
        .asJava).toJavaRDD()
  }

  def predictUserItemPair(
      model: Recommender[T],
      featureRdd: JavaRDD[Array[Object]]): JavaRDD[JList[Double]] = {
    val predictionRdd = model.predictUserItemPair(toUserItemFeatureRdd(featureRdd))
    toPredictionJavaRdd(predictionRdd)
  }

  def recommendForUser(
      model: Recommender[T],
      featureRdd: JavaRDD[Array[Object]],
      maxItems: Int): JavaRDD[JList[Double]] = {
    val predictionRdd = model.recommendForUser(toUserItemFeatureRdd(featureRdd), maxItems)
    toPredictionJavaRdd(predictionRdd)
  }

  def recommendForItem(
      model: Recommender[T],
      featureRdd: JavaRDD[Array[Object]],
      maxUsers: Int): JavaRDD[JList[Double]] = {
    val predictionRdd = model.recommendForItem(toUserItemFeatureRdd(featureRdd), maxUsers)
    toPredictionJavaRdd(predictionRdd)
  }

  def recommendForSession(
      model: SessionRecommender[T],
      featureRdd: JavaRDD[Sample],
      maxItems: Int,
      zeroBasedLabel: Boolean): JavaRDD[JList[JList[Float]]] = {
    val predictionRdd: RDD[Array[(Int, Float)]] = model
      .recommendForSession(toJSample(featureRdd), maxItems, zeroBasedLabel)

    predictionRdd.map(x => x.toList.map(y => List(y._1.toFloat, y._2).asJava).asJava).toJavaRDD()
  }

  def getNegativeSamples(indexed: DataFrame): DataFrame = {
    Utils.getNegativeSamples(indexed)
  }

  def zooModelSummary(model: ZooModel[Activity, Activity, T]): Unit = {
    model.summary()
  }

  def zooModelPredictClasses(
      module: ZooModel[Activity, Activity, T],
      x: JavaRDD[Sample],
      batchSize: Int = 32,
      zeroBasedLabel: Boolean = true): JavaRDD[Int] = {
    module.predictClasses(toJSample(x), batchSize, zeroBasedLabel).toJavaRDD()
  }

  def createZooKNRM(
      text1Length: Int,
      text2Length: Int,
      vocabSize: Int,
      embedSize: Int,
      embedWeights: JTensor = null,
      trainEmbed: Boolean = true,
      kernelNum: Int = 21,
      sigma: Double = 0.1,
      exactSigma: Double = 0.001,
      targetMode: String = "ranking",
      model: AbstractModule[Activity, Activity, T]): KNRM[T] = {
    KNRM[T](text1Length, text2Length, vocabSize, embedSize, toTensor(embedWeights),
      trainEmbed, kernelNum, sigma, exactSigma, targetMode, model)
  }

  def loadKNRM(
      path: String,
      weightPath: String = null): KNRM[T] = {
    KNRM.loadModel(path, weightPath)
  }

  def prepareEmbedding(
      embeddingFile: String,
      wordIndex: JMap[String, Int] = null,
      randomizeUnknown: Boolean = false,
      normalize: Boolean = false): JTensor = {
    val (_, _, embedWeights) = WordEmbedding.prepareEmbedding[T](
      embeddingFile, if (wordIndex!= null) wordIndex.asScala.toMap else null,
      randomizeUnknown, normalize)
    toJTensor(embedWeights)
  }

  def createZooSeq2seq(encoder: RNNEncoder[T],
    decoder: RNNDecoder[T],
    inputShape: JList[Int],
    outputShape: JList[Int],
    bridge: KerasLayer[Activity, Activity, T] = null,
    generator: KerasLayer[Activity, Activity, T] = null,
    model: AbstractModule[Table, Tensor[T], T]): Seq2seq[T] = {
    Seq2seq(encoder, decoder, toScalaShape(inputShape),
      toScalaShape(outputShape), bridge, generator, model)
  }

  def evaluateNDCG(
      ranker: Ranker[T],
      x: TextSet,
      k: Int,
      threshold: Double): Double = {
    ranker.evaluateNDCG(x, k, threshold)
  }

  def evaluateMAP(
      ranker: Ranker[T],
      x: TextSet,
      threshold: Double): Double = {
    ranker.evaluateMAP(x, threshold)
  }

  def seq2seqSetCheckpoint(model: Seq2seq[T],
    path: String,
    overWrite: Boolean = true): Unit = {
    model.setCheckpoint(path, overWrite)
  }

  def loadSeq2seq(path: String,
    weightPath: String = null): Seq2seq[T] = {
    Seq2seq.loadModel(path, weightPath)
  }

  def seq2seqCompile(
    model: Seq2seq[T],
    optimizer: OptimMethod[T],
    loss: Criterion[T],
    metrics: JList[ValidationMethod[T]] = null): Unit = {
    model.compile(optimizer, loss,
      if (metrics == null) null else metrics.asScala.toList)
  }

  def seq2seqFit(model: Seq2seq[T],
    x: JavaRDD[Sample],
    batchSize: Int,
    nbEpoch: Int,
    validationData: JavaRDD[Sample] = null): Unit = {
    model.fit(toJSample(x), batchSize, nbEpoch, toJSample(validationData))
  }

  def seq2seqInfer(model: Seq2seq[T],
    input: JTensor,
    startSign: JTensor,
    maxSeqLen: Int = 30,
    stopSign: JTensor = null,
    buildOutput: KerasLayer[Tensor[T], Tensor[T], T]): JTensor = {
    val result =
      model.infer(toTensor(input), toTensor(startSign), maxSeqLen,
        toTensor(stopSign), buildOutput)
    toJTensor(result)
  }

  def getModule(model: KerasZooModel[Activity, Activity, T]): KerasNet[T] = {
    model.model.asInstanceOf[KerasNet[T]]
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy