All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.sourced.gitbase.spark.udf.UastExtract.scala Maven / Gradle / Ivy

The newest version!
package tech.sourced.gitbase.spark.udf

import gopkg.in.bblfsh.sdk.v1.uast.generated.Position
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
import com.fasterxml.jackson.databind.ObjectMapper

import scala.collection.JavaConverters.asJavaIterableConverter

object UastExtract extends CustomUDF {
  private val mapper = new ObjectMapper()

  /** Name of the function. */
  override def name: String = "uast_extract"

  /** Function to execute when this function is called. */
  override def function: UserDefinedFunction = udf(extract _)

  def extract(marshaledNodes: Array[Byte], key: String): Option[Array[Byte]] = {
    if (Option(key).getOrElse("") == "" ||
      Option(marshaledNodes).getOrElse(Array.emptyByteArray).length == 0) {
      None
    } else {
      val nodes = BblfshUtils.unmarshalNodes(marshaledNodes).getOrElse(Seq.empty)
      val stringSeq = nodes.flatMap(node => {
        key match {
          case "@type" => Seq(node.internalType)
          case "@token" => Seq(node.token)
          case "@role" => node.roles.map(_.toString)
          case "@startpos" => Seq(node.startPosition.getOrElse(Position()).toProtoString)
          case "@endpos" => Seq(node.startPosition.getOrElse(Position()).toProtoString)
          case _ => Seq.empty
        }
      }).filter(_.nonEmpty)

      Some(mapper.writeValueAsBytes(stringSeq.asJava))
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy