All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tuplejump.calliope.sql.CassandraAwareSQLContextFunctions.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Tuplejump Software Pvt. Ltd. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Tuplejump Software Pvt. Ltd. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package com.tuplejump.calliope.sql

import org.apache.spark.sql.{CassandraRelation, SchemaRDD, SQLContext}
import scala.collection.JavaConversions._


trait CassandraAwareSQLContextFunctions {
  self: SQLContext =>

  val cassandraProperties = CassandraProperties(sparkContext)

  import cassandraProperties._

  /**
   * Create an SchemaRDD for the mentioned Cassandra Table using configured host and port
   * @param keyspace Keyspace to connect to
   * @param table Table to connect to
   * @return
   */
  def cassandraTable(keyspace: String, table: String): SchemaRDD = {
    cassandraTable(cassandraHost, cassandraNativePort, keyspace, table, false)
  }

  /**
   * Create an SchemaRDD for the mentioned Cassandra Table using configured host and port
   * @param keyspace Keyspace to connect to
   * @param table Table to connect to
   * @param mayUseStargate Should this SchemaRDD use Stargate for applying predicates
   * @return
   */
  def cassandraTable(keyspace: String, table: String, mayUseStargate: Boolean): SchemaRDD = {
    cassandraTable(cassandraHost, cassandraNativePort, keyspace, table, mayUseStargate)
  }

  /**
   * Create an SchemaRDD for the mentioned Cassandra Table
   * @param host Initial node in the cassandra cluster to connect to
   * @param port The Cassandra Native transport port
   * @param keyspace Keyspace to connect to
   * @param table Table to connect to
   * @param mayUseStargate Should this SchemaRDD use Stargate for applying predicates
   * @return
   */
  def cassandraTable(host: String, port: String, keyspace: String, table: String,
                     mayUseStargate: Boolean): SchemaRDD = {
    cassandraTable(host, port, keyspace, table, cassandraUsername, cassandraPassword, mayUseStargate)
  }

  /**
   * Create an SchemaRDD for the mentioned Cassandra Table
   * @param host Initial node in the cassandra cluster to connect to
   * @param port The Cassandra Native transport port
   * @param keyspace Keyspace to connect to
   * @param table Table to connect to
   * @param username Username of the user with access to Cassandra cluster
   * @param password Password of the user to connect to Cassandra
   * @param mayUseStargate Should this SchemaRDD use Stargate for applying predicates
   * @return
   */
  def cassandraTable(host: String, port: String, keyspace: String, table: String,
                     username: String, password: String,
                     mayUseStargate: Boolean): SchemaRDD = {
    cassandraTable(host, port, keyspace, table, Some(username), Some(password), mayUseStargate)
  }

  /*
   * Create an SchemaRDD for the mentioned Cassandra Table
   * @param host
   * @param port
   * @param keyspace
   * @param table
   * @param username
   * @param password
   * @param mayUseStargate
   * @return
   */
  def cassandraTable(host: String, port: String, keyspace: String, table: String,
                     username: Option[String], password: Option[String],
                     mayUseStargate: Boolean): SchemaRDD = {

    //Cassandra Thrift port is not used in this case
    new SchemaRDD(this,
      CassandraRelation(host,
        port,
        cassandraRpcPort,
        keyspace,
        table,
        self,
        username,
        password,
        mayUseStargate,
        Some(sparkContext.hadoopConfiguration)))
  }

  /**
   * Register all the Cassandra keyspace and tables with SparkSQL
   * @param host Host to initiate connection with
   * @param port Native Cassandra transport port
   * @param username Username of the user with access to Cassandra cluster
   * @param password Password of the user to connect to Cassandra
   * @param mayUseStargate Should we be using stargate index for data filtering
   */
  def allCassandraTables(host: String = cassandraHost, port: String = cassandraNativePort,
                         username: Option[String] = cassandraUsername, password: Option[String] = cassandraPassword,
                         mayUseStargate: Boolean = false) {

    val meta = CassandraSchemaHelper.getCassandraMetadata(host, port, username, password)
    meta.getKeyspaces.foreach {
      case keyspace if (!keyspace.getName.startsWith("system")) =>
        keyspace.getTables.foreach {
          table =>
            val ksName: String = keyspace.getName
            val tableName: String = table.getName
            val casRdd = cassandraTable(host, port, ksName, tableName, username, password, mayUseStargate)

            self.catalog.unregisterTable(None, s"$ksName.$tableName")
            casRdd.registerTempTable(s"$ksName.$tableName")

            logInfo(s"Registered C* table: $ksName.$tableName")
        }
      case _ => Nil
    }
  }

  if (loadCassandraTables) {
    allCassandraTables()
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy