All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datamountaineer.streamreactor.connect.cassandra.sink.CassandraJsonWriter.scala Maven / Gradle / Ivy

/*
 * Copyright 2017 Datamountaineer.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.datamountaineer.streamreactor.connect.cassandra.sink

import java.util.concurrent.Executors

import com.datamountaineer.kcql.Kcql
import com.datamountaineer.streamreactor.connect.cassandra.CassandraConnection
import com.datamountaineer.streamreactor.connect.cassandra.config.CassandraSinkSetting
import com.datamountaineer.streamreactor.connect.cassandra.utils.CassandraUtils
import com.datamountaineer.streamreactor.connect.concurrent.ExecutorExtension._
import com.datamountaineer.streamreactor.connect.concurrent.FutureAwaitWithFailFastFn
import com.datamountaineer.streamreactor.connect.converters.{FieldConverter, Transform}
import com.datamountaineer.streamreactor.connect.errors.ErrorHandler
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import com.datastax.driver.core.exceptions.SyntaxError
import com.datastax.driver.core.{PreparedStatement, Session}
import com.typesafe.scalalogging.slf4j.StrictLogging
import org.apache.kafka.connect.sink.SinkRecord

import scala.collection.JavaConversions._
import scala.concurrent.duration._
import scala.util.{Failure, Success, Try}

/**
  * 

CassandraJsonWriter

* Cassandra Json writer for Kafka connect * Writes a list of Kafka connect sink records to Cassandra using the JSON support. */ class CassandraJsonWriter(connection: CassandraConnection, settings: CassandraSinkSetting) extends StrictLogging with ConverterUtil with ErrorHandler { logger.info("Initialising Cassandra writer.") //initialize error tracker initialize(settings.taskRetries, settings.errorPolicy) private var session: Session = getSession.get CassandraUtils.checkCassandraTables(session.getCluster, settings.kcqls, session.getLoggedKeyspace) private var preparedCache: Map[String, Map[String, (PreparedStatement, Kcql)]] = cachePreparedStatements /** * Get a connection to cassandra based on the config **/ private def getSession: Option[Session] = { val t = Try(connection.cluster.connect(settings.keySpace)) handleTry[Session](t) } /** * Cache the preparedStatements per topic rather than create them every time * Each one is an insert statement aligned to topics. * * @return A Map of topic->(target -> preparedStatements). **/ private def cachePreparedStatements = { settings.kcqls .groupBy(_.getSource) .map { case (topic, kcqls) => val innerMap = kcqls.foldLeft(Map.empty[String, (PreparedStatement, Kcql)]) { case (map, k) => val table = k.getTarget val ttl = k.getTTL logger.info(s"Preparing statements for $topic->$table") map + (table -> (getPreparedStatement(table, ttl).get, k)) } topic -> innerMap } } /** * Build a preparedStatement for the given topic. * * @param table The table name to prepare the statement for. * @return A prepared statement for the given topic. **/ private def getPreparedStatement(table: String, ttl: Long): Option[PreparedStatement] = { val t: Try[PreparedStatement] = Try { val statement = if (ttl.equals(0)) { session.prepare(s"INSERT INTO ${session.getLoggedKeyspace}.$table JSON ?") } else { session.prepare(s"INSERT INTO ${session.getLoggedKeyspace}.$table JSON ? USING TTL $ttl") } settings.consistencyLevel.foreach(statement.setConsistencyLevel) statement } handleTry[PreparedStatement](t) } /** * Write SinkRecords to Cassandra (aSync) in Json. * * @param records A list of SinkRecords from Kafka Connect to write. **/ def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) { logger.debug("No records received.") } else { logger.debug(s"Received ${records.size} records.") //is the connection still alive if (session.isClosed) { logger.error(s"Session is closed attempting to reconnect to keySpace ${settings.keySpace}") session = getSession.get preparedCache = cachePreparedStatements } insert(records) } } /** * Write SinkRecords to Cassandra (aSync) in Json * * @param records A list of SinkRecords from Kafka Connect to write. * @return boolean indication successful write. **/ private def insert(records: Seq[SinkRecord]) = { val executor = Executors.newFixedThreadPool(settings.threadPoolSize) try { //This is a conscious decision to use a thread pool here in order to have more control. As we create multiple //futures to insert a record in Cassandra we want to fail immediately rather than waiting on all to finish. //If the error occurs it would be down to the error handler to do its thing. // NOOP should never be used!! otherwise data could be lost val futures = records.flatMap { record => val tables = preparedCache.getOrElse(record.topic(), throw new IllegalArgumentException(s"Topic ${record.topic()} doesn't have a KSQL setup")) tables.map { case (table, (statement, kcql)) => executor.submit { val json = Transform( kcql.getFields.map(FieldConverter.apply), kcql.getIgnoredFields.map(FieldConverter.apply), record.valueSchema(), record.value(), kcql.hasRetainStructure()) try { val bound = statement.bind(json) session.execute(bound) //we don't care about the ResultSet here () } catch { case e: SyntaxError => logger.error(s"Syntax error inserting <$json>", e) throw e } } } } //when the call returns the pool is shutdown FutureAwaitWithFailFastFn(executor, futures, 1.hours) handleTry(Success(())) logger.debug(s"Processed ${futures.size} records.") } catch { case t: Throwable => logger.error(s"There was an error inserting the records ${t.getMessage}", t) handleTry(Failure(t)) } } /** * Closed down the driver session and cluster. **/ def close(): Unit = { logger.info("Shutting down Cassandra driver session and cluster.") session.close() session.getCluster.close() } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy