All Downloads are FREE. Search and download functionalities are using the official Maven repository.

streaming.core.datasource.impl.MLSQLImage.scala Maven / Gradle / Ivy

The newest version!
package streaming.core.datasource.impl

import org.apache.spark.ml.param.Param
import org.apache.spark.sql.mlsql.session.MLSQLException
import org.apache.spark.sql.{DataFrameWriter, Row, SaveMode, functions => F}
import streaming.common.HDFSOperator
import streaming.core.datasource._
import streaming.dsl.ScriptSQLExec
import streaming.dsl.mmlib.algs.param.{BaseParams, WowParams}

/**
  * 2019-03-20 WilliamZhu([email protected])
  */
class MLSQLImage(override val uid: String) extends MLSQLBaseFileSource with WowParams {
  def this() = this(BaseParams.randomUID())


  override def save(writer: DataFrameWriter[Row], config: DataSinkConfig): Unit = {
    val context = ScriptSQLExec.contextGetOrForTest()
    val baseDir = resourceRealPath(context.execListener, Option(context.owner), config.path)

    if (HDFSOperator.fileExists(baseDir)) {
      if (config.mode == SaveMode.Overwrite) {
        HDFSOperator.deleteDir(baseDir)
      }
      if (config.mode == SaveMode.ErrorIfExists) {
        throw new MLSQLException(s"${baseDir} is exists")
      }
    }

    config.config.get(imageColumn.name).map { m =>
      set(imageColumn, m)
    }.getOrElse {
      throw new MLSQLException(s"${imageColumn.name} is required")
    }

    config.config.get(fileName.name).map { m =>
      set(fileName, m)
    }.getOrElse {
      throw new MLSQLException(s"${fileName.name} is required")
    }

    val _fileName = $(fileName)
    val _imageColumn = $(imageColumn)

    val saveImage = (fileName: String, buffer: Array[Byte]) => {
      import streaming.common.HDFSOperator
      HDFSOperator.saveBytesFile(baseDir, fileName, buffer)
      baseDir + "/" + fileName
    }
    val saveImageUdf = F.udf(saveImage)

    config.df.get.select(saveImageUdf(F.col(_fileName), F.col(_imageColumn))).count()
  }

  override def fullFormat: String = "streaming.dsl.mmlib.algs.processing.image"

  override def shortFormat: String = "image"

  final val imageColumn: Param[String] = new Param[String](this, "imageColumn", "for save mode")
  final val fileName: Param[String] = new Param[String](this, "fileName", "for save mode")

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy