com.datastax.spark.connector.japi.SparkContextJavaFunctions Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-cassandra-connector_2.11 Show documentation
Show all versions of spark-cassandra-connector_2.11 Show documentation
A library that exposes YugaByte DB tables as Spark RDDs, writes Spark RDDs to YugaByte DB tables, and executes CQL queries in Spark applications using YugaByte DB's Cassandra-compatible API.
package com.datastax.spark.connector.japi;
import scala.Tuple2;
import org.apache.spark.SparkContext;
import com.datastax.spark.connector.japi.rdd.CassandraJavaPairRDD;
import com.datastax.spark.connector.japi.rdd.CassandraJavaRDD;
import com.datastax.spark.connector.japi.rdd.CassandraTableScanJavaRDD;
import com.datastax.spark.connector.rdd.CassandraRDD;
import com.datastax.spark.connector.rdd.CassandraTableScanRDD;
import com.datastax.spark.connector.rdd.CassandraTableScanRDD$;
import com.datastax.spark.connector.rdd.reader.RowReaderFactory;
import static com.datastax.spark.connector.util.JavaApiHelper.getClassTag;
/**
* Java API wrapper over {@link org.apache.spark.SparkContext} to provide Spark Cassandra Connector functionality.
*
* To obtain an instance of this wrapper, use one of the factory methods in {@link
* com.datastax.spark.connector.japi.CassandraJavaUtil} class.
*/
@SuppressWarnings("UnusedDeclaration")
public class SparkContextJavaFunctions {
public final SparkContext sparkContext;
SparkContextJavaFunctions(SparkContext sparkContext) {
this.sparkContext = sparkContext;
}
/**
* Converts {@code CassandraRDD} into {@code CassandraJavaRDD}.
*/
public CassandraJavaRDD toJavaRDD(CassandraRDD rdd, Class targetClass) {
return new CassandraJavaRDD<>(rdd, getClassTag(targetClass));
}
/**
* Converts {@code CassandraRDD} of {@code Tuple2} into {@code CassandraJavaPairRDD}.
*/
public CassandraJavaPairRDD toJavaPairRDD(CassandraRDD> rdd, Class keyClass, Class valueClass) {
return new CassandraJavaPairRDD<>(rdd, getClassTag(keyClass), getClassTag(valueClass));
}
/**
* Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}.
*
* With this method, each row is converted to a {@code CassandraRow} object.
*
* Example:
*
* CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 };
* CREATE TABLE test.words (word text PRIMARY KEY, count int);
* INSERT INTO test.words (word, count) VALUES ('foo', 20);
* INSERT INTO test.words (word, count) VALUES ('bar', 20);
* ...
*
* // Obtaining RDD of CassandraRow objects:
* CassandraJavaRDD rdd = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("test", "words");
* rdd.first().getString("word"); // foo
* rdd.first().getInt("count"); // 20
*
*
*
* @since 1.0.0
*/
public CassandraTableScanJavaRDD cassandraTable(String keyspace, String table) {
RowReaderFactory rtf = GenericJavaRowReaderFactory.instance;
return cassandraTable(keyspace, table, rtf);
}
/**
* Returns a view of a Cassandra table as a {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD}.
*
* With this method, each row is converted to a object of type {@code T} by a specified row reader factory
* {@code rrf}. Row reader factories can be easily obtained with one of utility methods in {@link
* com.datastax.spark.connector.japi.CassandraJavaUtil}.
*
* @param keyspace the name of the keyspace which contains the accessed table
* @param table the accessed Cassandra table name
* @param rrf a row reader factory to convert rows into target values
* @param target value type
*
* @return {@link com.datastax.spark.connector.japi.rdd.CassandraJavaRDD} of type {@code T}
*
* @since 1.1.0
*/
public CassandraTableScanJavaRDD cassandraTable(
String keyspace,
String table,
RowReaderFactory rrf) {
CassandraTableScanRDD rdd = CassandraTableScanRDD$.MODULE$
.apply(sparkContext, keyspace, table, getClassTag(rrf.targetClass()), rrf);
return new CassandraTableScanJavaRDD<>(rdd, getClassTag(rrf.targetClass()));
}
}