All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.elasticsearch.package.scala Maven / Gradle / Ivy

/*
 * Copyright 2022 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio

import co.elastic.clients.elasticsearch.core.bulk.BulkOperation
import co.elastic.clients.json.jackson.JacksonJsonpMapper
import co.elastic.clients.json.JsonpMapper
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.spotify.scio.elasticsearch.ElasticsearchIO.{RetryConfig, WriteParam}
import com.spotify.scio.io.ClosedTap
import com.spotify.scio.values.SCollection
import org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write.BulkExecutionException
import org.apache.http.HttpHost
import org.joda.time.Duration

/**
 * Main package for Elasticsearch APIs. Import all.
 *
 * {{{
 * import com.spotify.scio.elasticsearch._
 * }}}
 */
package object elasticsearch extends CoderInstances {
  def defaultMapper(): JsonpMapper = {
    // Use jackson for user json serialization, add scala and java.time support
    val mapper = new JacksonJsonpMapper()
    mapper.objectMapper().registerModule(DefaultScalaModule).registerModule(new JavaTimeModule())
    mapper
  }

  final case class ElasticsearchOptions(
    nodes: Seq[HttpHost],
    usernameAndPassword: Option[(String, String)] = None,
    mapperFactory: () => JsonpMapper = defaultMapper
  )

  implicit class ElasticsearchSCollection[T](@transient private val self: SCollection[T])
      extends AnyVal {

    /**
     * Save this SCollection into Elasticsearch.
     *
     * @param esOptions
     *   Elasticsearch options
     * @param flushInterval
     *   delays to Elasticsearch writes for rate limiting purpose
     * @param f
     *   function to transform arbitrary type T to Elasticsearch `DocWriteRequest`
     * @param numOfShards
     *   number of parallel writes to be performed, recommended setting is the number of pipeline
     *   workers
     * @param errorFn
     *   function to handle error when performing Elasticsearch bulk writes
     */
    def saveAsElasticsearch(
      esOptions: ElasticsearchOptions,
      flushInterval: Duration = WriteParam.DefaultFlushInterval,
      numOfShards: Long = WriteParam.DefaultNumShards,
      maxBulkRequestOperations: Int = WriteParam.DefaultMaxBulkRequestOperations,
      maxBulkRequestBytes: Long = WriteParam.DefaultMaxBulkRequestBytes,
      errorFn: BulkExecutionException => Unit = WriteParam.DefaultErrorFn,
      retry: RetryConfig = WriteParam.DefaultRetryConfig
    )(f: T => Iterable[BulkOperation]): ClosedTap[Nothing] = {
      val param = WriteParam(
        f,
        errorFn,
        flushInterval,
        numOfShards,
        maxBulkRequestOperations,
        maxBulkRequestBytes,
        retry
      )
      self.write(ElasticsearchIO[T](esOptions))(param)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy