com.datastax.spark.connector.japi.RDDJavaFunctions Maven / Gradle / Ivy
package com.datastax.spark.connector.japi;
import com.datastax.spark.connector.rdd.reader.RowReader;
import com.datastax.spark.connector.writer.RowWriter;
import scala.Option;
import scala.Tuple2;
import scala.reflect.ClassTag;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.classTag;
import static com.datastax.spark.connector.util.JavaApiHelper.toScalaFunction1;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.rdd.RDD;
import com.datastax.spark.connector.ColumnSelector;
import com.datastax.spark.connector.RDDFunctions;
import com.datastax.spark.connector.cql.CassandraConnector;
import com.datastax.spark.connector.cql.CassandraConnector$;
import com.datastax.spark.connector.japi.rdd.CassandraJavaPairRDD;
import com.datastax.spark.connector.rdd.*;
import com.datastax.spark.connector.rdd.partitioner.CassandraPartitionedRDD;
import com.datastax.spark.connector.rdd.reader.RowReaderFactory;
import com.datastax.spark.connector.util.JavaApiHelper;
import com.datastax.spark.connector.writer.RowWriterFactory;
import com.datastax.spark.connector.writer.WriteConf;
/**
* A Java API wrapper over {@link RDD} to provide Spark Cassandra Connector functionality.
*
* To obtain an instance of this wrapper, use one of the factory methods in {@link CassandraJavaUtil}
* class.
*/
public class RDDJavaFunctions extends RDDAndDStreamCommonJavaFunctions {
public final RDD rdd;
public final RDDFunctions rddFunctions;
public RDDJavaFunctions(RDD rdd) {
this.rdd = rdd;
this.rddFunctions = new RDDFunctions<>(rdd);
}
public CassandraConnector defaultConnector() {
return CassandraConnector$.MODULE$.apply(rdd.conf());
}
public SparkConf getConf() {
return rdd.conf();
}
public void saveToCassandra(
String keyspace,
String table,
RowWriterFactory rowWriterFactory,
ColumnSelector columnNames,
WriteConf conf,
CassandraConnector connector
) {
rddFunctions.saveToCassandra(keyspace, table, columnNames, conf, connector, rowWriterFactory);
}
/**
* Applies a function to each item, and groups consecutive items having the same value together.
* Contrary to {@code groupBy}, items from the same group must be already next to each other in the
* original collection. Works locally on each partition, so items from different partitions will
* never be placed in the same group.
*/
public JavaPairRDD> spanBy(final Function f, ClassTag keyClassTag) {
ClassTag>> tupleClassTag = classTag(Tuple2.class);
ClassTag> iterableClassTag = CassandraJavaUtil.classTag(Iterable.class);
RDD>> newRDD = rddFunctions.spanBy(toScalaFunction1(f))
.map(JavaApiHelper.>valuesAsJavaIterable(), tupleClassTag);
return new JavaPairRDD<>(newRDD, keyClassTag, iterableClassTag);
}
/**
* Uses the data from {@code RDD} to join with a Cassandra table without retrieving the entire table.
* Any RDD which can be used to saveToCassandra can be used to joinWithCassandra as well as any RDD
* which only specifies the partition Key of a Cassandra Table. This method executes single partition
* requests against the Cassandra Table and accepts the functional modifiers that a normal
* {@link CassandraTableScanRDD} takes.
*
* By default this method only uses the Partition Key for joining but any combination of columns
* which are acceptable to C* can be used in the join. Specify columns using joinColumns as a
* parameter or the {@code on()} method.
*/
public CassandraJavaPairRDD joinWithCassandraTable(
String keyspaceName,
String tableName,
ColumnSelector selectedColumns,
ColumnSelector joinColumns,
RowReaderFactory rowReaderFactory,
RowWriterFactory rowWriterFactory
) {
ClassTag classTagT = rdd.toJavaRDD().classTag();
ClassTag classTagR = JavaApiHelper.getClassTag(rowReaderFactory.targetClass());
CassandraConnector connector = defaultConnector();
Option clusteringOrder = Option.empty();
Option
© 2015 - 2025 Weber Informatics LLC | Privacy Policy