com.spotify.scio.jdbc.sharded.JdbcShardedReadOptions.scala Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of scio-jdbc_2.12 Show documentation

Scio add-on for JDBC

There is a newer version: 0.14.9

/*
 * Copyright 2020 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.jdbc.sharded

import java.sql.ResultSet
import com.spotify.scio.jdbc.JdbcConnectionOptions

/**
 * A bag of options for the JDBC sharded read.
 *
 * @param connectionOptions
 *   Connection options
 * @param tableName
 *   Name of a table or materialized view to read from
 * @param shardColumn
 *   Column to shard by. Should ideally have evenly distributed values. Column type must have a
 *   corresponding [[com.spotify.scio.jdbc.sharded.Shard]] implementation.
 * @param rowMapper
 *   Function to map from a SQL [[java.sql.ResultSet]] to `T`
 * @param fetchSize
 *   Amount of rows fetched per [[java.sql.ResultSet]]. Default value is 100000. To apply an
 *   unbounded fetch size set this parameter to -1
 * @param numShards
 *   Number of shards to split the table into for reading. There is no guarantee that Beam will
 *   actually execute reads in parallel. It is up to Beam auto scaler to decide the level of
 *   parallelism to use (number of workers and threads per worker). But the behavior could be
 *   controlled with maxNumWorkers and numberOfWorkerHarnessThreads parameters (see more details
 *   about these parameters here). Defaults to 4
 * @param shard
 *   An implementation of the [[com.spotify.scio.jdbc.sharded.Shard]] trait which knows how to shard
 *   a column of a type S. Example of sharding by a column of type Long:
 *   {{{
 *               sc.jdbcShardedSelect(getShardedReadOptions(opts), Shard.range[Long])
 *   }}}
 */
final case class JdbcShardedReadOptions[T, S](
  connectionOptions: JdbcConnectionOptions,
  tableName: String,
  shardColumn: String,
  shard: Shard[S],
  rowMapper: ResultSet => T,
  fetchSize: Int = JdbcShardedReadOptions.DefaultFetchSize,
  numShards: Int = JdbcShardedReadOptions.DefaultNumShards
)

object JdbcShardedReadOptions {

  val DefaultFetchSize: Int = 100000
  val UnboundedFetchSize: Int = -1
  val DefaultNumShards: Int = 4

}