All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.sourced.gitbase.spark.udf.Language.scala Maven / Gradle / Ivy

The newest version!
package tech.sourced.gitbase.spark.udf

import org.apache.spark.internal.Logging
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
import tech.sourced.enry.Enry

object Language extends CustomUDF with Logging {
  /** Name of the function. */
  override val name: String = "language"

  /** Function to execute when this function is called. */
  override val function: UserDefinedFunction = udf(get _)

  /**
    * Get the language a file is written in for the given path and content.
    * @param path file's path
    * @param content file's content
    * @return `None` if no language could be guessed, Some[String] otherwise.
    */
  def get(path: String, content: Array[Byte]): Option[String] = try {
    val l = Enry.getLanguage(path, content)
    if (l.isEmpty) None else Some(l)
  } catch {
    case e@(_: RuntimeException | _: Exception) =>
      log.error(s"get language for file $path failed", e)
      None
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy