All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.spark.connector.japi.RDDAndDStreamCommonJavaFunctions Maven / Gradle / Ivy

package com.datastax.spark.connector.japi;

import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.spark.connector.BatchSize;
import com.datastax.spark.connector.ColumnSelector;
import com.datastax.spark.connector.cql.CassandraConnector;
import com.datastax.spark.connector.writer.*;
import org.apache.spark.SparkConf;
import org.joda.time.DateTime;
import org.joda.time.Duration;

import java.io.Serializable;
import java.util.Date;
import java.util.Objects;

import static com.datastax.spark.connector.japi.CassandraJavaUtil.allColumns;

/**
 * Java API wrapper over either {@code RDD} or {@code DStream} to provide Spark Cassandra Connector functionality.
 *
 * 

To obtain an instance of the specific wrapper, use one of the factory methods in {@link * com.datastax.spark.connector.japi.CassandraJavaUtil} class.

*/ @SuppressWarnings("UnusedDeclaration") public abstract class RDDAndDStreamCommonJavaFunctions { /** * Returns the default Cassandra connector instance for the wrapped RDD or DStream. * * @return an instance of {@link com.datastax.spark.connector.cql.CassandraConnector} */ public abstract CassandraConnector defaultConnector(); protected abstract SparkConf getConf(); /** * Returns the default write configuration instance for the wrapped RDD or DStream. * * @return an instance of {@link com.datastax.spark.connector.writer.WriteConf} */ public WriteConf defaultWriteConf() { return WriteConf.fromSparkConf(getConf()); } protected abstract void saveToCassandra(String keyspace, String table, RowWriterFactory rowWriterFactory, ColumnSelector columnNames, WriteConf conf, CassandraConnector connector); /** * @deprecated this method will be removed in future release, please use {@link #writerBuilder(String, String, * com.datastax.spark.connector.writer.RowWriterFactory)} */ @Deprecated public void saveToCassandra(String keyspace, String table, RowWriterFactory rowWriterFactory, ColumnSelector columnNames) { new WriterBuilder(keyspace, table, rowWriterFactory, columnNames, defaultConnector(), defaultWriteConf()).saveToCassandra(); } /** * Creates a writer builder object with specified parameters. * *

Writer builder is used to configure parameters write operation and eventually save the data to Cassandra. By * default the builder is configured to save all the columns with default connector and Spark Cassandra Connector * default parameters.

* *

To obtain an instance of {@link com.datastax.spark.connector.writer.RowWriterFactory} use one of utility * methods in {@link com.datastax.spark.connector.japi.CassandraJavaUtil}.

* * @param keyspaceName the target keyspace name * @param tableName the target table name * @param rowWriterFactory a row writer factory to be used to save objects from RDD or DStream * * @return an instance of {@link com.datastax.spark.connector.japi.RDDAndDStreamCommonJavaFunctions.WriterBuilder} * * @see com.datastax.spark.connector.japi.CassandraJavaUtil#mapToRow(com.datastax.spark.connector.mapper.ColumnMapper) * @see com.datastax.spark.connector.japi.CassandraJavaUtil#mapToRow(Class, java.util.Map) * @see com.datastax.spark.connector.japi.CassandraJavaUtil#mapToRow(Class, org.apache.commons.lang3.tuple.Pair[]) */ public WriterBuilder writerBuilder(String keyspaceName, String tableName, RowWriterFactory rowWriterFactory) { return new WriterBuilder(keyspaceName, tableName, rowWriterFactory, allColumns, defaultConnector(), defaultWriteConf()); } /** * Write builder is a container for various parameters which determine how the objects from RDD or DStream are to * be saved to Cassandra. */ public class WriterBuilder implements Serializable { /** Target keyspace name, where the data are to be saved. */ public final String keyspaceName; /** Target table name, where the data are to be saved. */ public final String tableName; /** A factory for row writer which will be used to map objects from the RDD or DStream into Cassandra rows. */ public final RowWriterFactory rowWriterFactory; /** A column selector which defines which columns are to be saved. */ public final ColumnSelector columnSelector; /** A Cassandra connector which defines a connection to Cassandra server. */ public final CassandraConnector connector; /** A set of write operation parameters which are related mainly to performance and failure tolerance. */ public final WriteConf writeConf; /** * Refer to particular fields description for more information. */ public WriterBuilder(String keyspaceName, String tableName, RowWriterFactory rowWriterFactory, ColumnSelector columnSelector, CassandraConnector connector, WriteConf writeConf) { this.keyspaceName = keyspaceName; this.tableName = tableName; this.rowWriterFactory = rowWriterFactory; this.columnSelector = columnSelector; this.connector = connector; this.writeConf = writeConf; } /** * Returns a copy of this builder with {@link #connector} changed to a specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConnector(CassandraConnector connector) { if (connector != this.connector) return new WriterBuilder(keyspaceName, tableName, rowWriterFactory, columnSelector, connector, writeConf); else return this; } /** * Returns a copy of this builder with {@link #writeConf} changed to a specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withWriteConf(WriteConf writeConf) { if (writeConf != this.writeConf) return new WriterBuilder(keyspaceName, tableName, rowWriterFactory, columnSelector, connector, writeConf); else return this; } /** * Returns a copy of this builder with {@link #rowWriterFactory} changed to a specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withRowWriterFactory(RowWriterFactory factory) { return new WriterBuilder(keyspaceName, tableName, factory, columnSelector, connector, writeConf); } /** * Returns a copy of this builder with {@link #columnSelector} changed to a specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withColumnSelector(ColumnSelector columnSelector) { return new WriterBuilder(keyspaceName, tableName, rowWriterFactory, columnSelector, connector, writeConf); } /** * Returns a copy of this builder with the new write configuration which has batch size changed to a specified * one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withBatchSize(BatchSize batchSize) { if (!Objects.equals(writeConf.batchSize(), batchSize)) return withWriteConf( new WriteConf(batchSize, writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Returns a copy of this builder with the new write configuration which has batch buffer size changed to a specified * one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withBatchGroupingBufferSize(int batchGroupingBufferSize) { if (writeConf.batchGroupingBufferSize() != batchGroupingBufferSize) return withWriteConf( new WriteConf(writeConf.batchSize(), batchGroupingBufferSize, writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Returns a copy of this builder with the new write configuration which has batch level changed to a specified * one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withBatchGroupingKey(BatchGroupingKey batchGroupingKey) { if (!Objects.equals(writeConf.batchGroupingKey(), batchGroupingKey)) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), batchGroupingKey, writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Returns a copy of this builder with the new write configuration which has consistency level changed to a * specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConsistencyLevel(ConsistencyLevel consistencyLevel) { if (writeConf.consistencyLevel() != consistencyLevel) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), consistencyLevel, writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Returns a copy of this builder with the new write configuration which has parallelism level changed to a * specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withParallelismLevel(int parallelismLevel) { if (writeConf.parallelismLevel() != parallelismLevel) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), parallelismLevel, writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Returns a copy of this builder with the new write configuration which has write throughput value changed to a * specified one. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withThroughputMBPS(int throughputMBPS) { if (writeConf.throughputMiBPS() != throughputMBPS) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), throughputMBPS, writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Return a copy of this builder with the new write configuration which has task metrics set to enabled or disabled as specified. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withTaskMetricsEnabled(boolean taskMetricsEnabled) { if (writeConf.taskMetricsEnabled() != taskMetricsEnabled) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), taskMetricsEnabled)); else return this; } /** * Return a copy of this builder with the new write configuration which has ifNotExists set to enabled or disabled as specified. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withIfNotExists(boolean ifNotExists) { if (writeConf.ifNotExists() != ifNotExists) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), ifNotExists, writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } /** * Return a copy of this builder with the new write configuration which has ignoreNulls set to enabled or disabled as specified. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withIgnoreNulls(boolean ignoreNulls) { if (writeConf.ignoreNulls() != ignoreNulls) return withWriteConf( new WriteConf(writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), ignoreNulls, writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), writeConf.timestamp(), writeConf.taskMetricsEnabled())); else return this; } private WriterBuilder withTimestamp(TimestampOption timestamp) { return Objects.equals(writeConf.timestamp(), timestamp) ? this : withWriteConf(new WriteConf( writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), writeConf.ttl(), timestamp, writeConf.taskMetricsEnabled())); } /** * Returns a copy of this builder with the new write configuration which has custom write timestamp * changed to a value specified in microseconds. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConstantTimestamp(long timeInMicroseconds) { return withTimestamp(TimestampOption$.MODULE$.constant(timeInMicroseconds)); } /** * Returns a copy of this builder with the new write configuration which has custom write timestamp * changed to a specified value. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConstantTimestamp(Date timestamp) { long timeInMicroseconds = timestamp.getTime() * 1000L; return withConstantTimestamp(timeInMicroseconds); } /** * Returns a copy of this builder with the new write configuration which has custom write timestamp * changed to a specified value. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConstantTimestamp(DateTime timestamp) { long timeInMicroseconds = timestamp.getMillis() * 1000L; return withConstantTimestamp(timeInMicroseconds); } /** * Returns a copy of this builder with the new write configuration which has write timestamp set * to use automatic value set by Cassandra. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withAutoTimestamp() { return withTimestamp(TimestampOption.defaultValue()); } /** * Returns a copy of this builder with the new write configuration which has write timestamp set * to a placeholder which will be filled-in by mapper. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withPerRowTimestamp(String placeholder) { return withTimestamp(TimestampOption$.MODULE$.perRow(placeholder)); } private WriterBuilder withTTL(TTLOption ttl) { return Objects.equals(writeConf.ttl(), ttl) ? this : withWriteConf(new WriteConf( writeConf.batchSize(), writeConf.batchGroupingBufferSize(), writeConf.batchGroupingKey(), writeConf.consistencyLevel(), writeConf.ifNotExists(), writeConf.ignoreNulls(), writeConf.parallelismLevel(), writeConf.throughputMiBPS(), ttl, writeConf.timestamp(), writeConf.taskMetricsEnabled())); } /** * Returns a copy of this builder with the new write configuration which has TTL set to a given * number of seconds. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConstantTTL(int ttlInSeconds) { return withTTL(TTLOption$.MODULE$.constant(ttlInSeconds)); } /** * Returns a copy of this builder with the new write configuration which has TTL set to a given * duration. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withConstantTTL(Duration ttl) { int secs = (int) ttl.getStandardSeconds(); return withConstantTTL(secs); } /** * Returns a copy of this builder with the new write configuration which has write TTL set * to use automatic value set by Cassandra. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withAutoTTL() { return withTTL(TTLOption.defaultValue()); } /** * Returns a copy of this builder with the new write configuration which has TTL set to a placeholder * which will be filled-in by mapper. * *

If the same instance is passed as the one which is currently set, no copy of this builder is created.

* * @return this instance or copy to allow method invocation chaining */ public WriterBuilder withPerRowTTL(String placeholder) { return withTTL(TTLOption$.MODULE$.perRow(placeholder)); } /** * Sets the output operator for the RDD or DStream. * *

Calling this method cause the data to be written to the database using all the configuration * settings.

*/ public void saveToCassandra() { RDDAndDStreamCommonJavaFunctions.this.saveToCassandra(keyspaceName, tableName, rowWriterFactory, columnSelector, writeConf, connector); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy