org.apache.spark.sql.prophecy.package.scala Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of prophecy-libs_2.12 Show documentation

Prophecy Spark Libraries

There is a newer version: 6.3.0-3.3.0

package org.apache.spark.sql

import org.apache.spark.annotation.Py4JWhitelist

import java.io.{BufferedReader, ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader}
import java.util.Base64
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
import scala.collection.JavaConverters._

package object prophecy {
  def compress(s: String): String = {
    val baos = new ByteArrayOutputStream()
    val gz   = new GZIPOutputStream(baos)
    gz.write(s.getBytes())
    gz.close()
    Base64.getEncoder.encodeToString(baos.toByteArray)
  }
  def decompress(s: String): String =
    new BufferedReader(
      new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(Base64.getDecoder.decode(s))))
    ).lines().iterator().asScala.mkString("\n")

  @Py4JWhitelist
  def createSparkSession(existingSession: SparkSession): SparkSession = {
    SparkSession
      .builder()
      .appName("Prophecy Pipeline")
      .config("spark.sql.legacy.allowUntypedScalaUDF",  "true")
      .config("spark.databricks.acl.skipCheckingPlans", "org.apache.spark.sql.Interim")
      .enableHiveSupport()
      .getOrCreate()
      .newSession()
  }
}