org.apache.spark.sql.cassandra.package.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-scylladb-connector_2.13 Show documentation
spark-scylladb-connector
The newest version!
package org.apache.spark.sql

import scala.language.implicitConversions
import com.datastax.spark.connector.util.{ConfigParameter, DeprecatedConfigParameter}
import org.apache.spark.sql.streaming.DataStreamWriter

package object cassandra {

  /** A data frame format used to access Cassandra through Connector */
  val CassandraFormat = "org.apache.spark.sql.cassandra"

  /** Returns a map of options which configure the path to Cassandra table as well as whether pushdown is enabled
    * or not */
  def cassandraOptions(
      table: String,
      keyspace: String,
      cluster: String = CassandraSourceRelation.defaultClusterName,
      pushdownEnable: Boolean = true): Map[String, String] =
    Map(
      DefaultSource.CassandraDataSourceClusterNameProperty -> cluster,
      DefaultSource.CassandraDataSourceKeyspaceNameProperty -> keyspace,
      DefaultSource.CassandraDataSourceTableNameProperty -> table,
      DefaultSource.CassandraDataSourcePushdownEnableProperty -> pushdownEnable.toString)

  implicit class DataFrameReaderWrapper(val dfReader: DataFrameReader) extends AnyVal {
    /** Sets the format used to access Cassandra through Connector */
    def cassandraFormat: DataFrameReader = {
      dfReader.format(CassandraFormat)
    }

    /** Sets the format used to access Cassandra through Connector and configure a path to Cassandra table. */
    def cassandraFormat(
        table: String,
        keyspace: String,
        cluster: String = CassandraSourceRelation.defaultClusterName,
        pushdownEnable: Boolean = true): DataFrameReader = {

      cassandraFormat.options(cassandraOptions(table, keyspace, cluster, pushdownEnable))
    }
  }

  implicit class DataFrameWriterWrapper[T](val dfWriter: DataFrameWriter[T]) extends AnyVal {
    /** Sets the format used to access Cassandra through Connector */
    def cassandraFormat: DataFrameWriter[T] = {
      dfWriter.format(CassandraFormat)
    }

    /** Sets the format used to access Cassandra through Connector and configure a path to Cassandra table. */
    def cassandraFormat(
        table: String,
        keyspace: String,
        cluster: String = CassandraSourceRelation.defaultClusterName,
        pushdownEnable: Boolean = true): DataFrameWriter[T] = {

      cassandraFormat.options(cassandraOptions(table, keyspace, cluster, pushdownEnable))
    }

    private def getSource(): String ={
      val dfSourceField = classOf[DataFrameWriter[_]].getDeclaredField("source")
      dfSourceField.setAccessible(true)
      dfSourceField.get(dfWriter).asInstanceOf[String]
    }

    def withTTL(constant: Int): DataFrameWriter[T] = {
      withTTL(constant.toString)
    }

    def withTTL(column: String): DataFrameWriter[T] = {
      val source: String = getSource()
      if (source != CassandraFormat) throw new IllegalArgumentException(
          s"Write destination must be $CassandraFormat for setting TTL. Destination was $source")
      dfWriter.option(CassandraSourceRelation.TTLParam.name, column)
    }

    def withWriteTime(constant: Long): DataFrameWriter[T] = {
      withWriteTime(constant.toString)
    }

    def withWriteTime(column: String): DataFrameWriter[T] = {
      val source: String = getSource()
      if (source != CassandraFormat) throw new IllegalArgumentException(
        s"Write destination must be $CassandraFormat for setting WriteTime. Destination was $source")
      dfWriter.option(CassandraSourceRelation.WriteTimeParam.name, column)
    }

  }

  implicit class DataStreamWriterWrapper[T](val dsWriter: DataStreamWriter[T]) extends AnyVal {
    /** Sets the format used to access Cassandra through Connector */
    def cassandraFormat: DataStreamWriter[T] = {
      dsWriter.format(CassandraFormat)
    }

    /** Sets the format used to access Cassandra through Connector and configure a path to Cassandra table. */
    def cassandraFormat(
      table: String,
      keyspace: String,
      cluster: String = CassandraSourceRelation.defaultClusterName,
      pushdownEnable: Boolean = true): DataStreamWriter[T] = {

      cassandraFormat.options(cassandraOptions(table, keyspace, cluster, pushdownEnable))
    }

    private def getSource(): String ={
      val dfSourceField = classOf[DataStreamWriter[_]].getDeclaredField("source")
      dfSourceField.setAccessible(true)
      dfSourceField.get(dsWriter).asInstanceOf[String]
    }

    def withTTL(constant: Int): DataStreamWriter[T] = {
      withTTL(constant.toString)
    }

    def withTTL(column: String): DataStreamWriter[T] = {
      val source = getSource()
      if (source != CassandraFormat) throw new IllegalArgumentException(
        s"Write destination must be $CassandraFormat for setting TTL. Destination was $source")
      dsWriter.option(CassandraSourceRelation.TTLParam.name, column)
    }

    def withWriteTime(constant: Long): DataStreamWriter[T] = {
      withWriteTime(constant.toString)
    }

    def withWriteTime(column: String): DataStreamWriter[T] = {
      val source = getSource()
      if (source != CassandraFormat) throw new IllegalArgumentException(
        s"Write destination must be $CassandraFormat for setting WriteTime. Destination was $source")
      dsWriter.option(CassandraSourceRelation.WriteTimeParam.name, column)
    }
  }

  @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
  implicit class CassandraSQLContextFunctions(val sqlContext: SQLContext) extends AnyVal {

    import org.apache.spark.sql.cassandra.CassandraSQLContextParams._

    /** Set current used cluster name */
    @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
    def setCluster(cluster: String): SQLContext = {
      sqlContext.setConf(SqlClusterParam.name, cluster)
      sqlContext
    }

    /** Get current used cluster name */
    @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
    def getCluster: String = sqlContext.getConf(SqlClusterParam.name, SqlClusterParam.default)

    /** Set the Spark Cassandra Connector configuration parameters */
    @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
    def setCassandraConf(options: Map[String, String]): SQLContext = {
      //noinspection ScalaDeprecation
      setCassandraConf(SqlClusterParam.default, options)
      sqlContext
    }

    /** Set the Spark Cassandra Connector configuration parameters which will be used when accessing
      * a given cluster */
    @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
    def setCassandraConf(
        cluster: String,
        options: Map[String, String]): SQLContext = {
      checkOptions(options)
      for ((k, v) <- options) sqlContext.setConf(s"$cluster/$k", v)
      sqlContext
    }

    /** Set the Spark Cassandra Connector configuration parameters which will be used when accessing
      * a given keyspace in a given cluster */
    @deprecated("Use SparkSession instead of SQLContext", "2.0.0")
    def setCassandraConf(
        cluster: String,
        keyspace: String,
        options: Map[String, String]): SQLContext = {
      checkOptions(options)
      for ((k, v) <- options) sqlContext.setConf(s"$cluster:$keyspace/$k", v)
      sqlContext
    }
  }

  def ttl(column: Column): Column = {
      Column(CassandraTTL(column.expr))
  }

  def ttl(column: String): Column = {
      ttl(Column(column))
  }

  def writeTime(column: Column): Column = {
      Column(CassandraWriteTime(column.expr))
  }

  def writeTime(column: String): Column = {
      writeTime(Column(column))
  }

  implicit class CassandraSparkSessionFunctions(val sparkSession: SparkSession) extends AnyVal {

    import org.apache.spark.sql.cassandra.CassandraSQLContextParams._

    /** Set current used cluster name */
    def setCluster(cluster: String): SparkSession = {
      sparkSession.conf.set(SqlClusterParam.name, cluster)
      sparkSession
    }

    /** Get current used cluster name */
    def getCluster: String = sparkSession.conf.get(SqlClusterParam.name, SqlClusterParam.default)

    /** Set the Spark Cassandra Connector configuration parameters */
    def setCassandraConf(options: Map[String, String]): SparkSession = {
      setCassandraConf(SqlClusterParam.default, options)
      sparkSession
    }

    /** Set the Spark Cassandra Connector configuration parameters which will be used when accessing
      * a given cluster */
    def setCassandraConf(
        cluster: String,
        options: Map[String, String]): SparkSession = {
      checkOptions(options)
      for ((k, v) <- options) sparkSession.conf.set(s"$cluster/$k", v)
      sparkSession
    }

    /** Set the Spark Cassandra Connector configuration parameters which will be used when accessing
      * a given keyspace in a given cluster */
    def setCassandraConf(
        cluster: String,
        keyspace: String,
        options: Map[String, String]): SparkSession = {
      checkOptions(options)
      for ((k, v) <- options) sparkSession.conf.set(s"$cluster:$keyspace/$k", v)
      sparkSession
    }
  }

  object CassandraSQLContextParams {
    // Should use general used database than Cassandra specific keyspace?
    // Other source tables don't have keyspace concept. We should make
    // an effort to set CassandraSQLContext a more database like to join
    // tables from other sources. Keyspace is equivalent to database in SQL world
    val ReferenceSection = "Cassandra SQL Context Options"

    val SqlClusterParam = ConfigParameter[String](
      name = "spark.cassandra.sql.cluster",
      section = ReferenceSection,
      default = "default",
      description = "Sets the default Cluster to inherit configuration from")

    private[cassandra] def checkOptions(options: Map[String, String]): Unit = {
      val AllValidOptions = DeprecatedConfigParameter.names ++ ConfigParameter.names
      options.keySet.foreach { name =>
        require(AllValidOptions.contains(name),
          s"Unrelated parameter. You can only set the following parameters: ${AllValidOptions.mkString(", ")}")
      }
    }
  }

}