All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jparkie.spark.elasticsearch.conf.SparkEsTransportClientConf.scala Maven / Gradle / Ivy

The newest version!
package com.github.jparkie.spark.elasticsearch.conf

import java.net.InetSocketAddress

import com.github.jparkie.spark.elasticsearch.util.SparkEsConfParam
import org.apache.spark.SparkConf

import scala.collection.mutable

/**
 * Configurations for EsNativeDataFrameWriter's TransportClient.
 *
 * @param transportAddresses The minimum set of hosts to connect to when establishing a client.
 *                           CONFIG_CLIENT_TRANSPORT_SNIFF is enabled by default.
 * @param transportPort The port to connect when establishing a client.
 * @param transportSettings Miscellaneous settings for the TransportClient.
 *                          Empty by default.
 */
case class SparkEsTransportClientConf(
  transportAddresses: Seq[String],
  transportPort:      Int,
  transportSettings:  Map[String, String]
) extends Serializable

object SparkEsTransportClientConf {
  val CONFIG_CLUSTER_NAME = "cluster.name"
  val CONFIG_CLIENT_TRANSPORT_SNIFF = "client.transport.sniff"
  val CONFIG_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME = "client.transport.ignore_cluster_name"
  val CONFIG_CLIENT_TRANSPORT_PING_TIMEOUT = "client.transport.ping_timeout"
  val CONFIG_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL = "client.transport.nodes_sampler_interval"

  val ES_NODES = SparkEsConfParam[Seq[String]](
    name = "es.nodes",
    default = Seq.empty[String]
  )
  val ES_PORT = SparkEsConfParam[Int](
    name = "es.port",
    default = 9300
  )
  val ES_CLUSTER_NAME = SparkEsConfParam[String](
    name = s"es.$CONFIG_CLUSTER_NAME",
    default = null
  )
  val ES_CLIENT_TRANSPORT_SNIFF = SparkEsConfParam[String](
    name = s"es.$CONFIG_CLIENT_TRANSPORT_SNIFF",
    default = null
  )
  val ES_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME = SparkEsConfParam[String](
    name = s"es.$CONFIG_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME",
    default = null
  )
  val ES_CLIENT_TRANSPORT_PING_TIMEOUT = SparkEsConfParam[String](
    name = s"es.$CONFIG_CLIENT_TRANSPORT_PING_TIMEOUT",
    default = null
  )
  val ES_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL = SparkEsConfParam[String](
    name = s"es.$CONFIG_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL",
    default = null
  )

  def getTransportAddresses(transportAddresses: Seq[String], transportPort: Int): Seq[InetSocketAddress] = {
    transportAddresses match {
      case null | Nil => throw new IllegalArgumentException("A contact point list cannot be empty.")
      case hosts => hosts map {
        ipWithPort =>
          ipWithPort.split(":") match {
            case Array(actualHost, actualPort) =>
              new InetSocketAddress(actualHost, actualPort.toInt)
            case Array(actualHost) =>
              new InetSocketAddress(actualHost, transportPort)
            case errorMessage =>
              throw new IllegalArgumentException(s"A contact point should have the form [host:port] or [host] but was: $errorMessage.")
          }
      }
    }
  }

  /**
   * Extracts SparkEsTransportClientConf from a SparkConf.
   *
   * @param sparkConf A SparkConf.
   * @return A SparkEsTransportClientConf from a SparkConf.
   */
  def fromSparkConf(sparkConf: SparkConf): SparkEsTransportClientConf = {
    val tempEsNodes = ES_NODES.fromConf(sparkConf)((sc, name) => sc.get(name).split(","))
    val tempEsPort = ES_PORT.fromConf(sparkConf)((sc, name) => sc.getInt(name, ES_PORT.default))
    val tempSettings = mutable.HashMap.empty[String, String]

    require(
      tempEsNodes.nonEmpty,
      s"""No nodes defined in property ${ES_NODES.name} is in SparkConf.""".stripMargin
    )

    if (sparkConf.contains(ES_CLUSTER_NAME.name) || sparkConf.contains(s"spark.${ES_CLUSTER_NAME.name}"))
      tempSettings.put(CONFIG_CLUSTER_NAME, ES_CLUSTER_NAME.fromConf(sparkConf)((sc, name) => sc.get(name)))

    if (sparkConf.contains(ES_CLIENT_TRANSPORT_SNIFF.name) || sparkConf.contains(s"spark.${ES_CLIENT_TRANSPORT_SNIFF.name}"))
      tempSettings.put(CONFIG_CLIENT_TRANSPORT_SNIFF, ES_CLIENT_TRANSPORT_SNIFF.fromConf(sparkConf)((sc, name) => sc.get(name)))

    if (sparkConf.contains(ES_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME.name) || sparkConf.contains(s"spark.${ES_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME.name}"))
      tempSettings.put(CONFIG_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME, ES_CLIENT_TRANSPORT_IGNORE_CLUSTER_NAME.fromConf(sparkConf)((sc, name) => sc.get(name)))

    if (sparkConf.contains(ES_CLIENT_TRANSPORT_PING_TIMEOUT.name) || sparkConf.contains(s"spark.${ES_CLIENT_TRANSPORT_PING_TIMEOUT.name}"))
      tempSettings.put(CONFIG_CLIENT_TRANSPORT_PING_TIMEOUT, ES_CLIENT_TRANSPORT_PING_TIMEOUT.fromConf(sparkConf)((sc, name) => sc.get(name)))

    if (sparkConf.contains(ES_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL.name) || sparkConf.contains(s"spark.${ES_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL.name}"))
      tempSettings.put(CONFIG_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL, ES_CLIENT_TRANSPORT_NODES_SAMPLER_INTERVAL.fromConf(sparkConf)((sc, name) => sc.get(name)))

    SparkEsTransportClientConf(
      transportAddresses = tempEsNodes,
      transportPort = tempEsPort,
      transportSettings = tempSettings.toMap
    )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy