com.datamountaineer.streamreactor.connect.cassandra.source.CassandraSourceTask.scala Maven / Gradle / Ivy
/*
* Copyright 2017 Datamountaineer.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datamountaineer.streamreactor.connect.cassandra.source
import java.util
import java.util.concurrent.LinkedBlockingQueue
import com.datamountaineer.streamreactor.connect.cassandra.CassandraConnection
import com.datamountaineer.streamreactor.connect.cassandra.config.{CassandraConfigConstants, CassandraConfigSource, CassandraSettings, CassandraSourceSetting}
import com.datamountaineer.streamreactor.connect.queues.QueueHelpers
import com.typesafe.scalalogging.slf4j.StrictLogging
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.util.{Failure, Success, Try}
/**
* Created by [email protected] on 14/04/16.
* stream-reactor
*/
class CassandraSourceTask extends SourceTask with StrictLogging {
private var queues = mutable.Map.empty[String, LinkedBlockingQueue[SourceRecord]]
private val readers = mutable.Map.empty[String, CassandraTableReader]
private var taskConfig: Option[CassandraConfigSource] = None
private var connection: Option[CassandraConnection] = None
private var settings: Seq[CassandraSourceSetting] = _
private var bufferSize: Option[Int] = None
private var batchSize: Option[Int] = None
private var tracker: Long = 0
private var pollInterval: Long = CassandraConfigConstants.DEFAULT_POLL_INTERVAL
/**
* Starts the Cassandra source, parsing the options and setting up the reader.
*
* @param props A map of supplied properties.
*/
override def start(props: util.Map[String, String]): Unit = {
//get configuration for this task
taskConfig = Try(new CassandraConfigSource(props)) match {
case Failure(f) => throw new ConnectException("Couldn't start CassandraSource due to configuration error.", f)
case Success(s) => Some(s)
}
logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/cass-source-ascii.txt")).mkString + s" v $version")
//get the list of assigned tables this sink
val assigned = taskConfig.get.getString(CassandraConfigConstants.ASSIGNED_TABLES).split(",").toList
bufferSize = Some(taskConfig.get.getInt(CassandraConfigConstants.READER_BUFFER_SIZE))
batchSize = Some(taskConfig.get.getInt(CassandraConfigConstants.BATCH_SIZE))
///get cassandra connection
connection = Try(CassandraConnection(taskConfig.get)) match {
case Success(s) =>
logger.info("Connection to Cassandra established.")
Some(s)
case Failure(f) => throw new ConnectException(s"Couldn't connect to Cassandra.", f)
}
//Setup queues for readers to put records into
assigned.map(table => queues += table -> new LinkedBlockingQueue[SourceRecord](bufferSize.get))
settings = CassandraSettings.configureSource(taskConfig.get)
//set up readers
assigned.map(table => {
//get settings
val setting = settings.filter(s => s.kcql.getSource.equals(table)).head
val session = connection.get.session
val queue = queues(table)
readers += table -> CassandraTableReader(session = session, setting = setting, context = context, queue = queue)
})
pollInterval = settings.head.pollInterval
}
/**
* Called by the Framework
*
* Map over the queues, draining each and returning SourceRecords.
*
* @return A util.List of SourceRecords.
*/
override def poll(): util.List[SourceRecord] = {
val now = System.currentTimeMillis()
if (tracker + pollInterval <= now) {
tracker = now
settings
.map(s => s.kcql)
.flatten(r => process(r.getSource))
.toList
} else {
logger.debug(s"Waiting for poll interval to pass")
Thread.sleep(pollInterval)
List[SourceRecord]()
}
}
/**
* Process the table
*
* Get the reader can call read, if querying it will return then drain it's queue
* else if will start a new query then drain.
*
* @param table The table to query and drain
* @return A list of Source records
*/
def process(table: String): List[SourceRecord] = {
val reader = readers(table)
if (!reader.isQuerying) {
//query
reader.read()
val queue = queues(table)
if (!queue.isEmpty) {
logger.debug(s"Attempting to draining $batchSize from the queue for table $table")
val records = QueueHelpers.drainQueue(queues(table), batchSize.get).toList
records
} else {
List[SourceRecord]()
}
} else {
logger.info(s"Reader for table $table is still querying")
List[SourceRecord]()
}
}
/**
* Stop the task and close readers.
*/
override def stop(): Unit = {
logger.info("Stopping Cassandra source.")
readers.foreach({ case (_, v) => v.close() })
val cluster = connection.get.session.getCluster
logger.info("Shutting down Cassandra driver connections.")
connection.get.session.close()
cluster.close()
}
/**
* Gets the version of this Source.
*
* @return
*/
override def version: String = Option(getClass.getPackage.getImplementationVersion).getOrElse("")
/**
* Check if the reader for a table is querying.
*
* @param table The table to check if a query is active against.
* @return A boolean indicating if querying is in progress.
*/
private[datamountaineer] def isReaderQuerying(table: String): Boolean = {
readers(table).isQuerying
}
/**
* Check the queue size of a table.
*
* @param table The table to check the queue size for.
* @return The size of the queue.
*/
private[datamountaineer] def queueSize(table: String): Int = {
queues(table).size()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy