All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.simiacryptus.sparkbook.repl.SparkSessionProvider.scala Maven / Gradle / Ivy

/*
 * Copyright (c) 2019 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.sparkbook.repl

import com.esotericsoftware.kryo.Kryo
import com.simiacryptus.sparkbook.util.Logging
import org.apache.spark.serializer.KryoRegistrator
import org.apache.spark.sql.SparkSession

import scala.concurrent.duration.FiniteDuration

trait SparkSessionProvider extends Logging {
  @transient lazy val spark: SparkSession = SparkSession.getActiveSession.getOrElse({
    val session = SparkSession.builder().getOrCreate()
    SparkSession.setActiveSession(session)
    session
  })

  def await(duration: FiniteDuration)(test: => Boolean): Unit = {
    def epoch = System.currentTimeMillis()

    val timeoutEpoch = epoch + duration.toMillis
    while (test && epoch < timeoutEpoch) Thread.sleep(1000)
  }

  def workerMemory: String = Option(System.getenv("SPARK_WORKER_MEMORY")).getOrElse("60g")

  def hiveRoot: Option[String] = Option(s3bucket).map(bucket => s"s3a://$bucket/data/")

  protected def s3bucket: String = null

  def sc = spark.sparkContext

}

class _KryoRegistrator extends KryoRegistrator {

  override def registerClasses(kryo: Kryo) {

    kryo.register(classOf[(_, _)])
    kryo.register(classOf[::[_]])
    kryo.register(classOf[Array[Double]])
    kryo.register(classOf[Array[Float]])
    kryo.register(classOf[Array[Int]])
    kryo.register(classOf[Array[String]])
    kryo.register(classOf[Array[scala.collection.Seq[Any]]])
    kryo.register(classOf[Array[scala.Tuple2[Any, Any]]])
    kryo.register(classOf[Array[scala.Tuple3[Any, Any, Any]]])
    kryo.register(classOf[java.lang.Class[Any]])
    kryo.register(classOf[java.util.TreeMap[Any, Any]])
    kryo.register(classOf[java.util.HashMap[Any, Any]])
    kryo.register(classOf[java.util.HashSet[Any]])
    kryo.register(classOf[java.util.LinkedHashMap[Any, Any]])
    kryo.register(classOf[java.util.LinkedHashSet[Any]])
    kryo.register(classOf[org.apache.hadoop.io.BytesWritable])
    kryo.register(classOf[Array[org.apache.hadoop.io.BytesWritable]])
    kryo.register(classOf[org.codehaus.jackson.node.BooleanNode])
    kryo.register(classOf[org.codehaus.jackson.node.IntNode])
    kryo.register(classOf[scala.collection.mutable.WrappedArray.ofInt])
    kryo.register(classOf[scala.collection.mutable.WrappedArray.ofRef[_]])
    kryo.register(classOf[scala.collection.Seq[Any]])
    kryo.register(classOf[scala.Tuple3[Any, Any, Any]])
    kryo.register(scala.collection.immutable.Nil.getClass)
    kryo.register(scala.math.Ordering.Double.getClass)
    kryo.register(scala.math.Ordering.Float.getClass)
    kryo.register(scala.math.Ordering.Int.getClass)
    kryo.register(scala.math.Ordering.Long.getClass)
    kryo.register(scala.None.getClass)
    kryo.register(classOf[Array[org.apache.spark.sql.types.StructField]])
    kryo.register(classOf[Array[org.apache.spark.sql.types.StructType]])
    kryo.register(classOf[Array[org.apache.spark.sql.types.StringType]])
    kryo.register(classOf[Array[org.apache.spark.sql.types.IntegerType]])
    kryo.register(classOf[Array[org.apache.spark.sql.types.DoubleType]])
    kryo.register(classOf[Array[org.apache.spark.sql.types.ArrayType]])
    kryo.register(classOf[org.apache.spark.sql.types.StructField])
    kryo.register(classOf[org.apache.spark.sql.types.StructType])
    kryo.register(classOf[org.apache.spark.sql.types.StringType])
    kryo.register(classOf[org.apache.spark.sql.types.IntegerType])
    kryo.register(classOf[org.apache.spark.sql.types.DoubleType])
    kryo.register(classOf[org.apache.spark.sql.types.ArrayType])
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy