com.datamountaineer.streamreactor.connect.elastic5.ElasticJsonWriter.scala Maven / Gradle / Ivy
/*
* Copyright 2017 Datamountaineer.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datamountaineer.streamreactor.connect.elastic5
import java.util
import com.datamountaineer.kcql.{Kcql, WriteModeEnum}
import com.datamountaineer.streamreactor.connect.converters.FieldConverter
import com.datamountaineer.streamreactor.connect.elastic5.config.ElasticSettings
import com.datamountaineer.streamreactor.connect.elastic5.indexname.CreateIndex
import com.datamountaineer.streamreactor.connect.errors.ErrorHandler
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import com.fasterxml.jackson.databind.JsonNode
import com.landoop.json.sql.Field
import com.sksamuel.elastic4s.ElasticDsl._
import com.sksamuel.elastic4s.Indexable
import com.typesafe.scalalogging.slf4j.StrictLogging
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.sink.SinkRecord
import org.elasticsearch.action.support.WriteRequest.RefreshPolicy
import scala.collection.JavaConversions._
import scala.concurrent.duration._
import scala.concurrent.{Await, Future}
import scala.util.Try
import scala.concurrent.ExecutionContext.Implicits.global
class ElasticJsonWriter(client: KElasticClient, settings: ElasticSettings)
extends ErrorHandler with StrictLogging with ConverterUtil {
logger.info("Initialising Elastic Json writer")
//initialize error tracker
initialize(settings.taskRetries, settings.errorPolicy)
//create the index automatically if it was set to do so
settings.kcqls.filter(_.isAutoCreate).foreach(client.index)
settings.kcqls.filter(_.getWriteMode == WriteModeEnum.UPSERT).foreach { kcql =>
if (kcql.getPrimaryKeys.size() != 1) {
throw new ConfigException(s"UPSERTING into ${kcql.getTarget} needs to have one PK only!")
}
}
private val topicKcqlMap = settings.kcqls.groupBy(_.getSource)
private val kcqlMap = new util.IdentityHashMap[Kcql, KcqlValues]()
settings.kcqls.foreach { kcql =>
kcqlMap.put(kcql,
KcqlValues(
kcql.getFields.map(FieldConverter.apply),
kcql.getIgnoredFields.map(FieldConverter.apply),
kcql.getPrimaryKeys.map { pk =>
val path = Option(pk.getParentFields).map(_.toVector).getOrElse(Vector.empty)
path :+ pk.getName
}
))
}
implicit object SinkRecordIndexable extends Indexable[SinkRecord] {
override def json(t: SinkRecord): String = convertValueToJson(t).toString
}
/**
* Close elastic4s client
**/
def close(): Unit = client.close()
/**
* Write SinkRecords to Elastic Search if list is not empty
*
* @param records A list of SinkRecords
**/
def write(records: Vector[SinkRecord]): Unit = {
if (records.isEmpty) {
logger.debug("No records received.")
} else {
logger.debug(s"Received ${records.size} records.")
val grouped = records.groupBy(_.topic())
insert(grouped)
}
}
/**
* Create a bulk index statement and execute against elastic4s client
*
* @param records A list of SinkRecords
**/
def insert(records: Map[String, Vector[SinkRecord]]): Unit = {
val fut = records.flatMap {
case (topic, sinkRecords) =>
val kcqls = topicKcqlMap.getOrElse(topic, throw new IllegalArgumentException(s"$topic hasn't been configured in KCQL"))
//we might have multiple inserts from the same Kafka Message
kcqls.flatMap { kcql =>
val i = CreateIndex.getIndexName(kcql)
val documentType = Option(kcql.getDocType).getOrElse(i)
val kcqlValue = kcqlMap(kcql)
sinkRecords.grouped(settings.batchSize)
.map { batch =>
val indexes = batch.map { r =>
kcql.getWriteMode match {
case WriteModeEnum.INSERT =>
val json = Transform(
kcqlValue.fields,
kcqlValue.ignoredFields,
r.valueSchema(),
r.value(),
kcql.hasRetainStructure
)
indexInto(i / documentType).source(json.toString)
case WriteModeEnum.UPSERT =>
val (json, pks) = TransformAndExtractPK(
kcqlValue.fields,
kcqlValue.ignoredFields,
kcqlValue.primaryKeysPath,
r.valueSchema(),
r.value(),
kcql.hasRetainStructure
)
require(pks.nonEmpty, "Error extracting primary keys")
update(pks.head).in(i / documentType).docAsUpsert(json)(IndexableJsonNode)
}
}
client.execute(bulk(indexes).refresh(RefreshPolicy.IMMEDIATE))
}
}
}
handleTry(
Try(
Await.result(Future.sequence(fut), settings.writeTimeout.seconds)
)
)
}
private case class KcqlValues(fields: Seq[Field],
ignoredFields: Seq[Field],
primaryKeysPath: Seq[Vector[String]])
}
case object IndexableJsonNode extends Indexable[JsonNode] {
override def json(t: JsonNode): String = t.toString
} © 2015 - 2025 Weber Informatics LLC | Privacy Policy