All Downloads are FREE. Search and download functionalities are using the official Maven repository.

streaming.udf.JavaRuntimeCompileUDF.scala Maven / Gradle / Ivy

The newest version!
package streaming.udf

import org.apache.spark.sql.MLSQLUtils
import org.apache.spark.sql.types.DataType
import streaming.common.{Md5, SourceCodeCompiler}
import streaming.dsl.mmlib.algs.ScriptUDFCacheKey
import streaming.log.Logging

/**
 * Created by fchen on 2018/11/15.
 */
object JavaRuntimeCompileUDF extends RuntimeCompileUDF with Logging {
  /**
   * udf reture DataType
   * due to java type erasure, it's not good idea get function return type by `method.getReturnType`,
   * a batter idea is find return type form source code.
   */
  override def returnType(scriptCacheKey: ScriptUDFCacheKey): Option[DataType] = {
    val clazz = driverExecute(scriptCacheKey).asInstanceOf[Class[_]]
    val method = SourceCodeCompiler.getMethod(clazz, scriptCacheKey.methodName)
    Option(MLSQLUtils.getJavaDataType(method.getGenericReturnType)._1)
  }

  /**
   * reture udf input argument number
   */
  override def argumentNum(scriptCacheKey: ScriptUDFCacheKey): Int = {
    val clazz = driverExecute(scriptCacheKey).asInstanceOf[Class[_]]
    val method = SourceCodeCompiler.getMethod(clazz, scriptCacheKey.methodName)
    method.getParameterCount
  }

  /**
   * wrap original source code.
   * e.g. in [[ScalaRuntimCompileUDAF]], user pass function code, we should wrap code as a class.
   * so the runtime compiler will compile source code as runtime instance.
   */
  override def wrapCode(scriptCacheKey: ScriptUDFCacheKey): ScriptUDFCacheKey = {

    val className = if (scriptCacheKey.className == null || scriptCacheKey.className.isEmpty) {
      "UDF"
    } else {
      scriptCacheKey.className
    }
    val codeMd5 = Md5.md5Hash(scriptCacheKey.originalCode)
    val packageName = s"streaming.udf.java.sun${codeMd5}"
    val newfun =
      s"""
         |package ${packageName};
         |
         |${scriptCacheKey.originalCode}
         |
            """.stripMargin
    val fullClassName = packageName + "." + className

    scriptCacheKey.copy(wrappedCode = newfun, className = fullClassName)
  }

  override def invokeFunctionFromInstance(scriptCacheKey: ScriptUDFCacheKey)
  : (Seq[Object]) => AnyRef = {
    lazy val clz = driverExecute(scriptCacheKey).asInstanceOf[Class[_]]
    lazy val instance = SourceCodeCompiler.newInstance(clz)
    lazy val method = SourceCodeCompiler.getMethod(clz, scriptCacheKey.methodName)

    val func: (Seq[Object]) => AnyRef = {
      (args: Seq[Object]) => method.invoke(instance, args: _*)
    }
    func
  }

  /**
   * validate the source code
   */
  override def check(sourceCode: String): Boolean = {
    true
  }

  /**
   * how to compile the language source code with jvm.
   *
   * @param scriptCacheKey
   * @return
   */
  override def compile(scriptCacheKey: ScriptUDFCacheKey): AnyRef = {
    logInfo("compile java source code: \n" + scriptCacheKey.wrappedCode)
    SourceCodeCompiler.compileJava(scriptCacheKey.wrappedCode, scriptCacheKey.className)
  }

  override def lang: String = "java"
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy