Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
*
* * Licensed to the Apache Software Foundation (ASF) under one or more
* * contributor license agreements. See the NOTICE file distributed with
* * this work for additional information regarding copyright ownership.
* * The ASF licenses this file to You under the Apache License, Version 2.0
* * (the "License"); you may not use this file except in compliance with
* * the License. You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package com.ebiznext.comet.schema.model
import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo, JsonTypeName}
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize}
import com.fasterxml.jackson.databind.ser.std.ToStringSerializer
import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer}
/** Recognized file type format. This will select the correct parser
*
* @param value : NONE, FS, JDBC, BQ, ES
* One of the possible supported sinks
*/
@JsonSerialize(using = classOf[ToStringSerializer])
@JsonDeserialize(using = classOf[SinkTypeDeserializer])
sealed case class SinkType(value: String) {
override def toString: String = value
}
object SinkType {
def fromString(value: String): SinkType = {
value.toUpperCase match {
case "NONE" | "NONESINK" => SinkType.None
case "FS" | "FSSINK" => SinkType.FS
case "JDBC" | "JDBCSINK" => SinkType.JDBC
case "BQ" | "BIGQUERYSINK" => SinkType.BQ
case "ES" | "ESSINK" => SinkType.ES
}
}
object None extends SinkType("None")
object FS extends SinkType("FS")
object BQ extends SinkType("BQ")
object ES extends SinkType("ES")
object JDBC extends SinkType("JDBC")
val sinks: Set[SinkType] = Set(None, FS, BQ, ES, JDBC)
}
class SinkTypeDeserializer extends JsonDeserializer[SinkType] {
override def deserialize(jp: JsonParser, ctx: DeserializationContext): SinkType = {
val value = jp.readValueAs[String](classOf[String])
SinkType.fromString(value)
}
}
/** Once ingested, files may be sinked to BigQuery, Elasticsearch or any JDBC compliant Database.
* @param `type`: Enum
* - JDBC : dataset will be sinked to a JDBC Database. See JdbcSink below
* - ES : dataset is indexed into Elasticsearch. See EsSink below
* - BQ : Dataset is sinked to BigQuery. See BigQuerySink below
* - None: Don't sink. This is the default.
* @param name: String.
* This optional name is used when the configuration is specified in the application.conf file instead of inline in the YAML file.
* This is useful when the same sink parameters are used for different datasets.
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
@JsonSubTypes(
Array(
new JsonSubTypes.Type(value = classOf[NoneSink], name = "None"),
new JsonSubTypes.Type(value = classOf[BigQuerySink], name = "BQ"),
new JsonSubTypes.Type(value = classOf[EsSink], name = "ES"),
new JsonSubTypes.Type(value = classOf[JdbcSink], name = "JDBC")
)
)
sealed abstract class Sink(val `type`: SinkType, val name: Option[String] = None)
/** When the sink *type* field is set to BQ, the options below should be provided.
* @param location : Database location (EU, US, ...)
* @param timestamp: The timestamp column to use for table partitioning if any. No partitioning by default
* @param clustering: List of ordered columns to use for table clustering
* @param days: Number of days before this table is set as expired and deleted. Never by default.
* @param requirePartitionFilter: Should be require a partition filter on every request ? No by default.
*/
@JsonTypeName("BQ")
final case class BigQuerySink(
location: Option[String] = None,
timestamp: Option[String] = None,
clustering: Option[Seq[String]] = None,
days: Option[Int] = None,
requirePartitionFilter: Option[Boolean] = None
) extends Sink(SinkType.BQ)
/** When the sink *type* field is set to ES, the options below should be provided.
* Elasticsearch options are specified in the application.conf file.
* @param id: Attribute to use as id of the document. Generated by Elasticseach if not specified.
* @param timestamp: Timestamp field format as expeted by Elasticsearch ("{beginTs|yyyy.MM.dd}" for example).
*/
@JsonTypeName("ES")
final case class EsSink(id: Option[String] = None, timestamp: Option[String] = None)
extends Sink(SinkType.ES)
@JsonTypeName("None")
final case class NoneSink() extends Sink(SinkType.None)
/** When the sink *type* field is set to JDBC, the options below should be provided.
* @param connection: Connection String
* @param partitions: Number of Spark partitions
* @param batchsize: Batch size of each JDBC bulk insert
*/
@JsonTypeName("JDBC")
final case class JdbcSink(
connection: String,
partitions: Option[Int] = None,
batchsize: Option[Int] = None
) extends Sink(SinkType.JDBC)
object Sink {
def fromType(sinkTypeStr: String): Sink = {
val sinkType = SinkType.fromString(sinkTypeStr)
sinkType match {
case SinkType.None => NoneSink()
case SinkType.BQ => BigQuerySink()
case SinkType.ES => EsSink()
case _ => throw new Exception(s"Unsupported creation of SinkType from $sinkType")
}
}
}