All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landoop.streamreactor.connect.hive.source.HiveSourceTask.scala Maven / Gradle / Ivy

The newest version!
package com.landoop.streamreactor.connect.hive.source

import java.util

import com.datamountaineer.streamreactor.connect.utils.JarManifest
import com.landoop.streamreactor.connect.hive.sink.config.HiveSinkConfigConstants
import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig
import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader
import com.typesafe.scalalogging.slf4j.StrictLogging
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}

import scala.collection.JavaConverters._

class HiveSourceTask extends SourceTask with StrictLogging {

  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)
  private var client: HiveMetaStoreClient = _
  private var fs: FileSystem = _
  private var config: HiveSourceConfig = _

  private var sources: Set[HiveSource] = Set.empty
  private var iterator: Iterator[SourceRecord] = Iterator.empty

  def this(fs: FileSystem, client: HiveMetaStoreClient) {
    this()
    this.client = client
    this.fs = fs
  }

  override def start(props: util.Map[String, String]): Unit = {

    if (client == null) {
      val hiveConf = new HiveConf()
      hiveConf.set("hive.metastore", props.get(HiveSinkConfigConstants.MetastoreTypeKey))
      hiveConf.set("hive.metastore.uris", props.get(HiveSinkConfigConstants.MetastoreUrisKey))
      client = new HiveMetaStoreClient(hiveConf)
    }

    if (fs == null) {
      val conf = new Configuration()
      conf.set("fs.defaultFS", props.get(HiveSinkConfigConstants.FsDefaultKey))
      fs = FileSystem.get(conf)
    }

    config = HiveSourceConfig.fromProps(props.asScala.toMap)

    sources = config.tableOptions.map { options =>
      new HiveSource(
        config.dbName,
        options.tableName,
        options.topic,
        new HiveSourceOffsetStorageReader(context.offsetStorageReader),
        config
      )(client, fs)
    }

    iterator = sources.reduce((a: Iterator[SourceRecord], b: Iterator[SourceRecord]) => a ++ b)
  }

  override def poll(): util.List[SourceRecord] = {
    iterator.take(config.pollSize).toList.asJava
  }

  override def stop(): Unit = sources.foreach(_.close)

  override def version(): String = manifest.version()
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy