org.apache.spark.sql.prophecy.package.scala Maven / Gradle / Ivy
package org.apache.spark.sql
import org.apache.spark.annotation.Py4JWhitelist
import java.io.{BufferedReader, ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader}
import java.util.Base64
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
import scala.collection.JavaConverters._
package object prophecy {
def compress(s: String): String = {
val baos = new ByteArrayOutputStream()
val gz = new GZIPOutputStream(baos)
gz.write(s.getBytes())
gz.close()
Base64.getEncoder.encodeToString(baos.toByteArray)
}
def decompress(s: String): String =
new BufferedReader(
new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(Base64.getDecoder.decode(s))))
).lines().iterator().asScala.mkString("\n")
@Py4JWhitelist
def createSparkSession(existingSession: SparkSession): SparkSession = {
SparkSession
.builder()
.appName("Prophecy Pipeline")
.config("spark.sql.legacy.allowUntypedScalaUDF", "true")
.config("spark.databricks.acl.skipCheckingPlans", "org.apache.spark.sql.Interim")
.enableHiveSupport()
.getOrCreate()
.newSession()
}
}