com.spotify.scio.jdbc.syntax.SCollectionSyntax.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of scio-jdbc_2.12 Show documentation
Scio add-on for JDBC
The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.jdbc.syntax

import com.spotify.scio.io.ClosedTap
import com.spotify.scio.jdbc.{JdbcConnectionOptions, JdbcIO, JdbcWrite}
import com.spotify.scio.jdbc.JdbcIO.WriteParam
import com.spotify.scio.values.SCollection
import org.apache.beam.sdk.io.jdbc.JdbcIO.{RetryConfiguration, Write}

import java.sql.{PreparedStatement, SQLException}
import javax.sql.DataSource

/** Enhanced version of [[com.spotify.scio.values.SCollection SCollection]] with JDBC methods. */
final class JdbcSCollectionOps[T](private val self: SCollection[T]) extends AnyVal {

  /**
   * Save this SCollection as a JDBC database.
   *
   * NB: in case of transient failures, Beam runners may execute parts of write multiple times for
   * fault tolerance. Because of that, you should avoid using INSERT statements, since that risks
   * duplicating records in the database, or failing due to primary key conflicts. Consider using
   * MERGE ("upsert") statements supported by your database instead.
   *
   * @param connectionOptions
   *   connection options
   * @param statement
   *   query statement
   * @param preparedStatementSetter
   *   function to set values in a [[java.sql.PreparedStatement]]
   * @param batchSize
   *   use apache beam default batch size if the value is -1
   * @param retryConfiguration
   *   [[org.apache.beam.sdk.io.jdbc.JdbcIO.RetryConfiguration]] for specifying retry behavior
   * @param retryStrategy
   *   A predicate of [[java.sql.SQLException]] indicating a failure to retry
   * @param autoSharding
   *   If true, enables using a dynamically determined number of shards to write.
   * @param dataSourceProviderFn
   *   function to provide a custom [[javax.sql.DataSource]]
   * @param configOverride
   *   function to override or replace a Write transform before applying it
   */
  def saveAsJdbc(
    connectionOptions: JdbcConnectionOptions,
    statement: String,
    batchSize: Long = WriteParam.BeamDefaultBatchSize,
    retryConfiguration: RetryConfiguration = WriteParam.BeamDefaultRetryConfiguration,
    retryStrategy: SQLException => Boolean = WriteParam.DefaultRetryStrategy,
    autoSharding: Boolean = WriteParam.DefaultAutoSharding,
    dataSourceProviderFn: () => DataSource = WriteParam.DefaultDataSourceProviderFn,
    configOverride: Write[T] => Write[T] = WriteParam.defaultConfigOverride[T]
  )(preparedStatementSetter: (T, PreparedStatement) => Unit): ClosedTap[Nothing] =
    self.write(JdbcWrite[T](connectionOptions, statement))(
      JdbcIO.WriteParam(
        preparedStatementSetter,
        batchSize,
        retryConfiguration,
        retryStrategy,
        autoSharding,
        dataSourceProviderFn,
        configOverride
      )
    )
}

trait SCollectionSyntax {
  implicit def jdbcSCollectionOps[T](sc: SCollection[T]): JdbcSCollectionOps[T] =
    new JdbcSCollectionOps(sc)
}