org.apache.beam.sdk.io.jdbc.JdbcIO Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.io.jdbc;

import static org.apache.beam.sdk.io.jdbc.SchemaUtil.checkNullabilityForFields;
import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;

import com.google.auto.value.AutoValue;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.IOException;
import java.io.Serializable;
import java.net.URLClassLoader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.sql.DataSource;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.CoderRegistry;
import org.apache.beam.sdk.coders.RowCoder;
import org.apache.beam.sdk.coders.VoidCoder;
import org.apache.beam.sdk.io.jdbc.JdbcIO.WriteFn.WriteFnSpec;
import org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn;
import org.apache.beam.sdk.io.jdbc.SchemaUtil.FieldWithIndex;
import org.apache.beam.sdk.metrics.Distribution;
import org.apache.beam.sdk.metrics.Metrics;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.schemas.NoSuchSchemaException;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.SchemaRegistry;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.Filter;
import org.apache.beam.sdk.transforms.GroupIntoBatches;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.Reshuffle;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.transforms.SerializableFunctions;
import org.apache.beam.sdk.transforms.SimpleFunction;
import org.apache.beam.sdk.transforms.Values;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.transforms.Wait;
import org.apache.beam.sdk.transforms.WithKeys;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.HasDisplayData;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.util.BackOff;
import org.apache.beam.sdk.util.BackOffUtils;
import org.apache.beam.sdk.util.FluentBackoff;
import org.apache.beam.sdk.util.Sleeper;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollection.IsBounded;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TypeDescriptor;
import org.apache.beam.sdk.values.TypeDescriptors;
import org.apache.beam.sdk.values.TypeDescriptors.TypeVariableExtractor;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.commons.dbcp2.DataSourceConnectionFactory;
import org.apache.commons.dbcp2.PoolableConnectionFactory;
import org.apache.commons.dbcp2.PoolingDataSource;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.checkerframework.dataflow.qual.Pure;
import org.joda.time.Duration;
import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * IO to read and write data on JDBC.
 *
 * Reading from JDBC datasource
 *
 * JdbcIO source returns a bounded collection of {@code T} as a {@code PCollection}. T is the
 * type returned by the provided {@link RowMapper}.
 *
 * 
To configure the JDBC source, you have to provide a {@link DataSourceConfiguration} using

 * 1. {@link DataSourceConfiguration#create(DataSource)}(which must be {@link Serializable});

 * 2. or {@link DataSourceConfiguration#create(String, String)}(driver class name and url).
 * Optionally, {@link DataSourceConfiguration#withUsername(String)} and {@link
 * DataSourceConfiguration#withPassword(String)} allows you to define username and password.
 *
 * 
For example:
 *
 * 
{@code
 * pipeline.apply(JdbcIO.>read()
 *   .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
 *          "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb")
 *        .withUsername("username")
 *        .withPassword("password"))
 *   .withQuery("select id,name from Person")
 *   .withRowMapper(new JdbcIO.RowMapper>() {
 *     public KV mapRow(ResultSet resultSet) throws Exception {
 *       return KV.of(resultSet.getInt(1), resultSet.getString(2));
 *     }
 *   })
 * );
 * }
 *
 * Query parameters can be configured using a user-provided {@link StatementPreparator}. For
 * example:
 *
 * 
{@code
 * pipeline.apply(JdbcIO.>read()
 *   .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
 *       "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb",
 *       "username", "password"))
 *   .withQuery("select id,name from Person where name = ?")
 *   .withStatementPreparator(new JdbcIO.StatementPreparator() {
 *     public void setParameters(PreparedStatement preparedStatement) throws Exception {
 *       preparedStatement.setString(1, "Darwin");
 *     }
 *   })
 *   .withRowMapper(new JdbcIO.RowMapper>() {
 *     public KV mapRow(ResultSet resultSet) throws Exception {
 *       return KV.of(resultSet.getInt(1), resultSet.getString(2));
 *     }
 *   })
 * );
 * }
 *
 * To customize the building of the {@link DataSource} we can provide a {@link
 * SerializableFunction}. For example if you need to provide a {@link PoolingDataSource} from an
 * existing {@link DataSourceConfiguration}: you can use a {@link PoolableDataSourceProvider}:
 *
 * 
{@code
 * pipeline.apply(JdbcIO.>read()
 *   .withDataSourceProviderFn(JdbcIO.PoolableDataSourceProvider.of(
 *       JdbcIO.DataSourceConfiguration.create(
 *           "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb",
 *           "username", "password")))
 *    // ...
 * );
 * }
 *
 * By default, the provided function requests a DataSource per execution thread. In some
 * circumstances this can quickly overwhelm the database by requesting too many connections. In that
 * case you should look into sharing a single instance of a {@link PoolingDataSource} across all the
 * execution threads. For example:
 *
 * 

 * private static class MyDataSourceProviderFn implements{@literal SerializableFunction} {
 *   private static transient DataSource dataSource;
 *
 *  {@literal @Override}
 *   public synchronized DataSource apply(Void input) {
 *     if (dataSource == null) {
 *       dataSource = ... build data source ...
 *     }
 *     return dataSource;
 *   }
 * }
 * {@literal
 * pipeline.apply(JdbcIO.>read()
 *   .withDataSourceProviderFn(new MyDataSourceProviderFn())
 *   // ...
 * );
 * }
 *
 * Parallel reading from a JDBC datasource
 *
 * Beam supports partitioned reading of all data from a table. Automatic partitioning is
 * supported for a few data types: {@link Long}, {@link org.joda.time.DateTime}, {@link String}. To
 * enable this, use {@link JdbcIO#readWithPartitions(TypeDescriptor)}.
 *
 * 
The partitioning scheme depends on these parameters, which can be user-provided, or
 * automatically inferred by Beam (for the supported types):
 *
 * 

 *   Upper bound
 *   
Lower bound
 *   
Number of partitions - when auto-inferred, the number of partitions defaults to the square
 *       root of the number of rows divided by 5 (i.e.: {@code Math.floor(Math.sqrt(numRows) / 5)}).
 * 
 *
 * To trigger auto-inference of these parameters, the user just needs to not provide them. To
 * infer them automatically, Beam runs either of these statements:
 *
 * 

 *   {@code SELECT min(column), max(column), COUNT(*) from table} when none of the parameters is
 *       passed to the transform.
 *   
{@code SELECT min(column), max(column) from table} when only number of partitions is
 *       provided, but not upper or lower bounds.
 * 
 *
 * Should I use this transform? Consider using this transform in the following situations:
 *
 * 

 *   The partitioning column is indexed. This will help speed up the range queries
 *   
Use auto-inference if the queries for bound and partition inference are efficient to
 *       execute in your DBMS.
 *   
The distribution of data over the partitioning column is roughly uniform. Uniformity
 *       is not mandatory, but this transform will work best in that situation.
 * 
 *
 * The following example shows usage of auto-inferred ranges, number of partitions, and
 * schema
 *
 * 
{@code
 * pipeline.apply(JdbcIO.readWithPartitions()
 *  .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
 *         "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb")
 *       .withUsername("username")
 *       .withPassword("password"))
 *  .withTable("Person")
 *  .withPartitionColumn("id")
 *  .withRowOutput()
 * );
 * }
 *
 * Instead of a full table you could also use a subquery in parentheses. The subquery can be
 * specified using Table option instead and partition columns can be qualified using the subquery
 * alias provided as part of Table. Note that a subquery may not perform as well with
 * auto-inferred ranges and partitions, because it may not rely on indices to speed up the
 * partitioning.
 *
 * 
{@code
 * pipeline.apply(JdbcIO.>readWithPartitions()
 *  .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
 *         "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb")
 *       .withUsername("username")
 *       .withPassword("password"))
 *  .withTable("(select id, name from Person) as subq")
 *  .withPartitionColumn("id")
 *  .withLowerBound(0)
 *  .withUpperBound(1000)
 *  .withNumPartitions(5)
 *  .withRowMapper(new JdbcIO.RowMapper>() {
 *    public KV mapRow(ResultSet resultSet) throws Exception {
 *      return KV.of(resultSet.getInt(1), resultSet.getString(2));
 *    }
 *  })
 * );
 * }
 *
 * Writing to JDBC datasource
 *
 * JDBC sink supports writing records into a database. It writes a {@link PCollection} to the
 * database by converting each T into a {@link PreparedStatement} via a user-provided {@link
 * PreparedStatementSetter}.
 *
 * 
Like the source, to configure the sink, you have to provide a {@link DataSourceConfiguration}.
 *
 * 
{@code
 * pipeline
 *   .apply(...)
 *   .apply(JdbcIO.>write()
 *      .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
 *            "com.mysql.jdbc.Driver", "jdbc:mysql://hostname:3306/mydb")
 *          .withUsername("username")
 *          .withPassword("password"))
 *      .withStatement("insert into Person values(?, ?)")
 *      .withPreparedStatementSetter(new JdbcIO.PreparedStatementSetter>() {
 *        public void setParameters(KV element, PreparedStatement query)
 *          throws SQLException {
 *          query.setInt(1, element.getKey());
 *          query.setString(2, element.getValue());
 *        }
 *      })
 *    );
 * }
 *
 * NB: in case of transient failures, Beam runners may execute parts of JdbcIO.Write multiple
 * times for fault tolerance. Because of that, you should avoid using {@code INSERT} statements,
 * since that risks duplicating records in the database, or failing due to primary key conflicts.
 * Consider using MERGE ("upsert")
 * statements supported by your database instead.
 */
@SuppressWarnings({
  "rawtypes" // TODO(https://github.com/apache/beam/issues/20447)
})
public class JdbcIO {

  private static final Logger LOG = LoggerFactory.getLogger(JdbcIO.class);

  /**
   * Read data from a JDBC datasource.
   *
   * @param  Type of the data to be read.
   */
  public static  Read read() {
    return new AutoValue_JdbcIO_Read.Builder()
        .setFetchSize(DEFAULT_FETCH_SIZE)
        .setOutputParallelization(true)
        .build();
  }

  /** Read Beam {@link Row}s from a JDBC data source. */
  public static ReadRows readRows() {
    return new AutoValue_JdbcIO_ReadRows.Builder()
        .setFetchSize(DEFAULT_FETCH_SIZE)
        .setOutputParallelization(true)
        .setStatementPreparator(ignored -> {})
        .build();
  }

  /**
   * Like {@link #read}, but executes multiple instances of the query substituting each element of a
   * {@link PCollection} as query parameters.
   *
   * @param  Type of the data representing query parameters.
   * @param  Type of the data to be read.
   */
  public static  ReadAll readAll() {
    return new AutoValue_JdbcIO_ReadAll.Builder()
        .setFetchSize(DEFAULT_FETCH_SIZE)
        .setOutputParallelization(true)
        .build();
  }

  /**
   * Like {@link #readAll}, but executes multiple instances of the query on the same table
   * (subquery) using ranges.
   *
   * @param  Type of the data to be read.
   */
  public static  ReadWithPartitions readWithPartitions(
      TypeDescriptor partitioningColumnType) {
    return new AutoValue_JdbcIO_ReadWithPartitions.Builder()
        .setPartitionColumnType(partitioningColumnType)
        .setNumPartitions(DEFAULT_NUM_PARTITIONS)
        .setFetchSize(DEFAULT_FETCH_SIZE)
        .setUseBeamSchema(false)
        .build();
  }

  public static  ReadWithPartitions readWithPartitions() {
    return JdbcIO.readWithPartitions(TypeDescriptors.longs());
  }

  private static final long DEFAULT_BATCH_SIZE = 1000L;
  private static final int DEFAULT_FETCH_SIZE = 50_000;
  // Default values used from fluent backoff.
  private static final Duration DEFAULT_INITIAL_BACKOFF = Duration.standardSeconds(1);
  private static final Duration DEFAULT_MAX_CUMULATIVE_BACKOFF = Duration.standardDays(1000);
  // Default value used for partitioning a table
  private static final int DEFAULT_NUM_PARTITIONS = 200;

  /**
   * Write data to a JDBC datasource.
   *
   * @param  Type of the data to be written.
   */
  public static  Write write() {
    return new Write<>();
  }

  public static  WriteVoid writeVoid() {
    return new AutoValue_JdbcIO_WriteVoid.Builder()
        .setBatchSize(DEFAULT_BATCH_SIZE)
        .setRetryStrategy(new DefaultRetryStrategy())
        .setRetryConfiguration(RetryConfiguration.create(5, null, Duration.standardSeconds(5)))
        .build();
  }

  /**
   * This is the default {@link Predicate} we use to detect DeadLock. It basically test if the
   * {@link SQLException#getSQLState()} equals 40001 or 40P01. 40001 is the SQL State used by most
   * of databases to identify deadlock, and 40P01 is specific to PostgreSQL (see PostgreSQL documentation).
   */
  public static class DefaultRetryStrategy implements RetryStrategy {
    private static final Set errorCodesToRetry =
        new HashSet(Arrays.asList("40001", "40P01"));

    @Override
    public boolean apply(SQLException e) {
      String sqlState = e.getSQLState();
      return sqlState != null && errorCodesToRetry.contains(sqlState);
    }
  }

  private JdbcIO() {}

  /**
   * An interface used by {@link JdbcIO.Read} for converting each row of the {@link ResultSet} into
   * an element of the resulting {@link PCollection}.
   */
  @FunctionalInterface
  public interface RowMapper extends Serializable {
    T mapRow(ResultSet resultSet) throws Exception;
  }

  /**
   * A POJO describing a {@link DataSource}, either providing directly a {@link DataSource} or all
   * properties allowing to create a {@link DataSource}.
   */
  @AutoValue
  public abstract static class DataSourceConfiguration implements Serializable {

    @Pure
    abstract @Nullable ValueProvider getDriverClassName();

    @Pure
    abstract @Nullable ValueProvider getUrl();

    @Pure
    abstract @Nullable ValueProvider<@Nullable String> getUsername();

    @Pure
    abstract @Nullable ValueProvider<@Nullable String> getPassword();

    @Pure
    abstract @Nullable ValueProvider getConnectionProperties();

    @Pure
    abstract @Nullable ValueProvider> getConnectionInitSqls();

    @Pure
    abstract @Nullable ValueProvider getMaxConnections();

    @Pure
    abstract @Nullable ClassLoader getDriverClassLoader();

    @Pure
    abstract @Nullable ValueProvider getDriverJars();

    @Pure
    abstract @Nullable DataSource getDataSource();

    abstract Builder builder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setDriverClassName(ValueProvider<@Nullable String> driverClassName);

      abstract Builder setUrl(ValueProvider<@Nullable String> url);

      abstract Builder setUsername(ValueProvider<@Nullable String> username);

      abstract Builder setPassword(ValueProvider<@Nullable String> password);

      abstract Builder setConnectionProperties(
          ValueProvider<@Nullable String> connectionProperties);

      abstract Builder setConnectionInitSqls(
          ValueProvider> connectionInitSqls);

      abstract Builder setMaxConnections(ValueProvider<@Nullable Integer> maxConnections);

      abstract Builder setDriverClassLoader(ClassLoader driverClassLoader);

      abstract Builder setDriverJars(ValueProvider driverJars);

      abstract Builder setDataSource(@Nullable DataSource dataSource);

      abstract DataSourceConfiguration build();
    }

    public static DataSourceConfiguration create(DataSource dataSource) {
      checkArgument(dataSource != null, "dataSource can not be null");
      checkArgument(dataSource instanceof Serializable, "dataSource must be Serializable");
      return new AutoValue_JdbcIO_DataSourceConfiguration.Builder()
          .setDataSource(dataSource)
          .build();
    }

    public static DataSourceConfiguration create(String driverClassName, String url) {
      checkArgument(driverClassName != null, "driverClassName can not be null");
      checkArgument(url != null, "url can not be null");
      return create(
          ValueProvider.StaticValueProvider.of(driverClassName),
          ValueProvider.StaticValueProvider.of(url));
    }

    public static DataSourceConfiguration create(
        ValueProvider<@Nullable String> driverClassName, ValueProvider<@Nullable String> url) {
      checkArgument(driverClassName != null, "driverClassName can not be null");
      checkArgument(url != null, "url can not be null");
      return new AutoValue_JdbcIO_DataSourceConfiguration.Builder()
          .setDriverClassName(driverClassName)
          .setUrl(url)
          .build();
    }

    public DataSourceConfiguration withUsername(@Nullable String username) {
      return withUsername(ValueProvider.StaticValueProvider.of(username));
    }

    public DataSourceConfiguration withUsername(ValueProvider<@Nullable String> username) {
      return builder().setUsername(username).build();
    }

    public DataSourceConfiguration withPassword(@Nullable String password) {
      return withPassword(ValueProvider.StaticValueProvider.of(password));
    }

    public DataSourceConfiguration withPassword(ValueProvider<@Nullable String> password) {
      return builder().setPassword(password).build();
    }

    /**
     * Sets the connection properties passed to driver.connect(...). Format of the string must be
     * [propertyName=property;]*
     *
     * 
NOTE - The "user" and "password" properties can be add via {@link #withUsername(String)},
     * {@link #withPassword(String)}, so they do not need to be included here.
     */
    public DataSourceConfiguration withConnectionProperties(String connectionProperties) {
      checkArgument(connectionProperties != null, "connectionProperties can not be null");
      return withConnectionProperties(ValueProvider.StaticValueProvider.of(connectionProperties));
    }

    /** Same as {@link #withConnectionProperties(String)} but accepting a ValueProvider. */
    public DataSourceConfiguration withConnectionProperties(
        ValueProvider<@Nullable String> connectionProperties) {
      checkArgument(connectionProperties != null, "connectionProperties can not be null");
      return builder().setConnectionProperties(connectionProperties).build();
    }

    /**
     * Sets the connection init sql statements to driver.connect(...).
     *
     * 
NOTE - This property is not applicable across databases. Only MySQL and MariaDB support
     * this. A Sql exception is thrown if your database does not support it.
     */
    public DataSourceConfiguration withConnectionInitSqls(
        Collection<@Nullable String> connectionInitSqls) {
      checkArgument(connectionInitSqls != null, "connectionInitSqls can not be null");
      return withConnectionInitSqls(ValueProvider.StaticValueProvider.of(connectionInitSqls));
    }

    /** Same as {@link #withConnectionInitSqls(Collection)} but accepting a ValueProvider. */
    public DataSourceConfiguration withConnectionInitSqls(
        ValueProvider> connectionInitSqls) {
      checkArgument(connectionInitSqls != null, "connectionInitSqls can not be null");
      checkArgument(!connectionInitSqls.get().isEmpty(), "connectionInitSqls can not be empty");
      return builder().setConnectionInitSqls(connectionInitSqls).build();
    }

    /** Sets the maximum total number of connections. Use a negative value for no limit. */
    public DataSourceConfiguration withMaxConnections(Integer maxConnections) {
      checkArgument(maxConnections != null, "maxConnections can not be null");
      return withMaxConnections(ValueProvider.StaticValueProvider.of(maxConnections));
    }

    /** Same as {@link #withMaxConnections(Integer)} but accepting a ValueProvider. */
    public DataSourceConfiguration withMaxConnections(
        ValueProvider<@Nullable Integer> maxConnections) {
      return builder().setMaxConnections(maxConnections).build();
    }

    /**
     * Sets the class loader instance to be used to load the JDBC driver. If not specified, the
     * default class loader is used.
     */
    public DataSourceConfiguration withDriverClassLoader(ClassLoader driverClassLoader) {
      checkArgument(driverClassLoader != null, "driverClassLoader can not be null");
      return builder().setDriverClassLoader(driverClassLoader).build();
    }

    /**
     * Comma separated paths for JDBC drivers. This method is filesystem agnostic and can be used
     * for all FileSystems supported by Beam If not specified, the default classloader is used to
     * load the jars.
     *
     * 
For example, gs://your-bucket/driver_jar1.jar,gs://your-bucket/driver_jar2.jar.
     */
    public DataSourceConfiguration withDriverJars(String driverJars) {
      checkArgument(driverJars != null, "driverJars can not be null");
      return withDriverJars(ValueProvider.StaticValueProvider.of(driverJars));
    }

    /** Same as {@link #withDriverJars(String)} but accepting a ValueProvider. */
    public DataSourceConfiguration withDriverJars(ValueProvider driverJars) {
      checkArgument(driverJars != null, "driverJars can not be null");
      return builder().setDriverJars(driverJars).build();
    }

    void populateDisplayData(DisplayData.Builder builder) {
      if (getDataSource() != null) {
        builder.addIfNotNull(DisplayData.item("dataSource", getDataSource().getClass().getName()));
      } else {
        builder.addIfNotNull(DisplayData.item("jdbcDriverClassName", getDriverClassName()));
        builder.addIfNotNull(DisplayData.item("jdbcUrl", getUrl()));
        builder.addIfNotNull(DisplayData.item("username", getUsername()));
        builder.addIfNotNull(DisplayData.item("driverJars", getDriverJars()));
      }
    }

    public DataSource buildDatasource() {
      if (getDataSource() == null) {
        BasicDataSource basicDataSource = new BasicDataSource();
        if (getDriverClassName() != null) {
          basicDataSource.setDriverClassName(getDriverClassName().get());
        }
        if (getUrl() != null) {
          basicDataSource.setUrl(getUrl().get());
        }
        if (getUsername() != null) {
          @SuppressWarnings(
              "nullness") // this is actually nullable, but apache commons dbcp2 not annotated
          @NonNull
          String username = getUsername().get();
          basicDataSource.setUsername(username);
        }
        if (getPassword() != null) {
          @SuppressWarnings(
              "nullness") // this is actually nullable, but apache commons dbcp2 not annotated
          @NonNull
          String password = getPassword().get();
          basicDataSource.setPassword(password);
        }
        if (getConnectionProperties() != null && getConnectionProperties().get() != null) {
          basicDataSource.setConnectionProperties(getConnectionProperties().get());
        }
        if (getConnectionInitSqls() != null
            && getConnectionInitSqls().get() != null
            && !getConnectionInitSqls().get().isEmpty()) {
          basicDataSource.setConnectionInitSqls(getConnectionInitSqls().get());
        }
        if (getMaxConnections() != null && getMaxConnections().get() != null) {
          basicDataSource.setMaxTotal(getMaxConnections().get());
        }
        if (getDriverClassLoader() != null) {
          basicDataSource.setDriverClassLoader(getDriverClassLoader());
        }
        if (getDriverJars() != null) {
          URLClassLoader classLoader =
              URLClassLoader.newInstance(JdbcUtil.saveFilesLocally(getDriverJars().get()));
          basicDataSource.setDriverClassLoader(classLoader);
        }

        return basicDataSource;
      }
      return getDataSource();
    }
  }

  /**
   * An interface used by the JdbcIO Write to set the parameters of the {@link PreparedStatement}
   * used to setParameters into the database.
   */
  @FunctionalInterface
  public interface StatementPreparator extends Serializable {
    void setParameters(PreparedStatement preparedStatement) throws Exception;
  }

  /** Implementation of {@link #readRows()}. */
  @AutoValue
  public abstract static class ReadRows extends PTransform> {

    @Pure
    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    @Pure
    abstract @Nullable ValueProvider getQuery();

    @Pure
    abstract @Nullable StatementPreparator getStatementPreparator();

    @Pure
    abstract int getFetchSize();

    @Pure
    abstract boolean getOutputParallelization();

    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setDataSourceProviderFn(
          SerializableFunction dataSourceProviderFn);

      abstract Builder setQuery(ValueProvider query);

      abstract Builder setStatementPreparator(StatementPreparator statementPreparator);

      abstract Builder setFetchSize(int fetchSize);

      abstract Builder setOutputParallelization(boolean outputParallelization);

      abstract ReadRows build();
    }

    public ReadRows withDataSourceConfiguration(DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public ReadRows withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public ReadRows withQuery(String query) {
      checkArgument(query != null, "query can not be null");
      return withQuery(ValueProvider.StaticValueProvider.of(query));
    }

    public ReadRows withQuery(ValueProvider query) {
      checkArgument(query != null, "query can not be null");
      return toBuilder().setQuery(query).build();
    }

    public ReadRows withStatementPreparator(StatementPreparator statementPreparator) {
      checkArgument(statementPreparator != null, "statementPreparator can not be null");
      return toBuilder().setStatementPreparator(statementPreparator).build();
    }

    /**
     * This method is used to set the size of the data that is going to be fetched and loaded in
     * memory per every database call. Please refer to: {@link java.sql.Statement#setFetchSize(int)}
     * It should ONLY be used if the default value throws memory errors.
     */
    public ReadRows withFetchSize(int fetchSize) {
      // Note that api.java.sql.Statement#setFetchSize says it only accepts values >= 0
      // and that MySQL supports using Integer.MIN_VALUE as a hint to stream the ResultSet instead
      // of loading it into memory. See
      // https://dev.mysql.com/doc/connector-j/8.0/en/connector-j-reference-implementation-notes.html for additional details.
      checkArgument(
          fetchSize >= 0 || fetchSize == Integer.MIN_VALUE,
          "fetch size must be >= 0 or equal to Integer.MIN_VALUE");
      return toBuilder().setFetchSize(fetchSize).build();
    }

    /**
     * Whether to reshuffle the resulting PCollection so results are distributed to all workers. The
     * default is to parallelize and should only be changed if this is known to be unnecessary.
     */
    public ReadRows withOutputParallelization(boolean outputParallelization) {
      return toBuilder().setOutputParallelization(outputParallelization).build();
    }

    @Override
    public PCollection expand(PBegin input) {
      ValueProvider query = checkStateNotNull(getQuery(), "withQuery() is required");
      SerializableFunction dataSourceProviderFn =
          checkStateNotNull(
              getDataSourceProviderFn(),
              "withDataSourceConfiguration() or withDataSourceProviderFn() is required");

      Schema schema = inferBeamSchema(dataSourceProviderFn.apply(null), query.get());
      PCollection rows =
          input.apply(
              JdbcIO.read()
                  .withDataSourceProviderFn(dataSourceProviderFn)
                  .withQuery(query)
                  .withCoder(RowCoder.of(schema))
                  .withRowMapper(SchemaUtil.BeamRowMapper.of(schema))
                  .withFetchSize(getFetchSize())
                  .withOutputParallelization(getOutputParallelization())
                  .withStatementPreparator(checkStateNotNull(getStatementPreparator())));
      rows.setRowSchema(schema);
      return rows;
    }

    // Spotbugs seems to not understand the multi-statement try-with-resources
    @SuppressFBWarnings("OBL_UNSATISFIED_OBLIGATION")
    public static Schema inferBeamSchema(DataSource ds, String query) {
      try (Connection conn = ds.getConnection();
          PreparedStatement statement =
              conn.prepareStatement(
                  query, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)) {
        ResultSetMetaData metadata =
            checkStateNotNull(statement.getMetaData(), "could not get statement metadata");
        return SchemaUtil.toBeamSchema(metadata);
      } catch (SQLException e) {
        throw new BeamSchemaInferenceException("Failed to infer Beam schema", e);
      }
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      super.populateDisplayData(builder);
      builder.add(DisplayData.item("query", getQuery()));
      if (getDataSourceProviderFn() instanceof HasDisplayData) {
        ((HasDisplayData) getDataSourceProviderFn()).populateDisplayData(builder);
      }
    }
  }

  /** Implementation of {@link #read}. */
  @AutoValue
  public abstract static class Read extends PTransform> {

    @Pure
    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    @Pure
    abstract @Nullable ValueProvider getQuery();

    @Pure
    abstract @Nullable StatementPreparator getStatementPreparator();

    @Pure
    abstract @Nullable RowMapper getRowMapper();

    @Pure
    abstract @Nullable Coder getCoder();

    @Pure
    abstract int getFetchSize();

    @Pure
    abstract boolean getOutputParallelization();

    @Pure
    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setDataSourceProviderFn(
          SerializableFunction dataSourceProviderFn);

      abstract Builder setQuery(ValueProvider query);

      abstract Builder setStatementPreparator(StatementPreparator statementPreparator);

      abstract Builder setRowMapper(RowMapper rowMapper);

      abstract Builder setCoder(Coder coder);

      abstract Builder setFetchSize(int fetchSize);

      abstract Builder setOutputParallelization(boolean outputParallelization);

      abstract Read build();
    }

    public Read withDataSourceConfiguration(final DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public Read withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public Read withQuery(String query) {
      checkArgument(query != null, "query can not be null");
      return withQuery(ValueProvider.StaticValueProvider.of(query));
    }

    public Read withQuery(ValueProvider query) {
      checkArgument(query != null, "query can not be null");
      return toBuilder().setQuery(query).build();
    }

    public Read withStatementPreparator(StatementPreparator statementPreparator) {
      checkArgumentNotNull(statementPreparator, "statementPreparator can not be null");
      return toBuilder().setStatementPreparator(statementPreparator).build();
    }

    public Read withRowMapper(RowMapper rowMapper) {
      checkArgumentNotNull(rowMapper, "rowMapper can not be null");
      return toBuilder().setRowMapper(rowMapper).build();
    }

    /**
     * @deprecated
     *     
{@link JdbcIO} is able to infer appropriate coders from other parameters.
     */
    @Deprecated
    public Read withCoder(Coder coder) {
      checkArgument(coder != null, "coder can not be null");
      return toBuilder().setCoder(coder).build();
    }

    /**
     * This method is used to set the size of the data that is going to be fetched and loaded in
     * memory per every database call. Please refer to: {@link java.sql.Statement#setFetchSize(int)}
     * It should ONLY be used if the default value throws memory errors.
     */
    public Read withFetchSize(int fetchSize) {
      // Note that api.java.sql.Statement#setFetchSize says it only accepts values >= 0
      // and that MySQL supports using Integer.MIN_VALUE as a hint to stream the ResultSet instead
      // of loading it into memory. See
      // https://dev.mysql.com/doc/connector-j/8.0/en/connector-j-reference-implementation-notes.html for additional details.
      checkArgument(
          fetchSize >= 0 || fetchSize == Integer.MIN_VALUE,
          "fetch size must be >= 0 or equal to Integer.MIN_VALUE");
      return toBuilder().setFetchSize(fetchSize).build();
    }

    /**
     * Whether to reshuffle the resulting PCollection so results are distributed to all workers. The
     * default is to parallelize and should only be changed if this is known to be unnecessary.
     */
    public Read withOutputParallelization(boolean outputParallelization) {
      return toBuilder().setOutputParallelization(outputParallelization).build();
    }

    @Override
    public PCollection expand(PBegin input) {
      ValueProvider query = checkArgumentNotNull(getQuery(), "withQuery() is required");
      RowMapper rowMapper = checkArgumentNotNull(getRowMapper(), "withRowMapper() is required");
      SerializableFunction dataSourceProviderFn =
          checkArgumentNotNull(
              getDataSourceProviderFn(),
              "withDataSourceConfiguration() or withDataSourceProviderFn() is required");

      JdbcIO.ReadAll readAll =
          JdbcIO.readAll()
              .withDataSourceProviderFn(dataSourceProviderFn)
              .withQuery(query)
              .withRowMapper(rowMapper)
              .withFetchSize(getFetchSize())
              .withOutputParallelization(getOutputParallelization())
              .withParameterSetter(
                  (element, preparedStatement) -> {
                    if (getStatementPreparator() != null) {
                      getStatementPreparator().setParameters(preparedStatement);
                    }
                  });

      @Nullable Coder coder = getCoder();
      if (coder != null) {
        readAll = readAll.toBuilder().setCoder(coder).build();
      }
      return input.apply(Create.of((Void) null)).apply(readAll);
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      super.populateDisplayData(builder);
      builder.add(DisplayData.item("query", getQuery()));

      if (getRowMapper() != null) {
        builder.add(DisplayData.item("rowMapper", getRowMapper().getClass().getName()));
      }
      if (getCoder() != null) {
        builder.add(DisplayData.item("coder", getCoder().getClass().getName()));
      }
      if (getDataSourceProviderFn() instanceof HasDisplayData) {
        ((HasDisplayData) getDataSourceProviderFn()).populateDisplayData(builder);
      }
    }
  }

  /** Implementation of {@link #readAll}. */
  @AutoValue
  public abstract static class ReadAll
      extends PTransform, PCollection> {

    @Pure
    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    @Pure
    abstract @Nullable ValueProvider getQuery();

    @Pure
    abstract @Nullable PreparedStatementSetter getParameterSetter();

    @Pure
    abstract @Nullable RowMapper getRowMapper();

    @Pure
    abstract @Nullable Coder getCoder();

    abstract int getFetchSize();

    abstract boolean getOutputParallelization();

    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setDataSourceProviderFn(
          SerializableFunction dataSourceProviderFn);

      abstract Builder setQuery(ValueProvider query);

      abstract Builder setParameterSetter(
          PreparedStatementSetter parameterSetter);

      abstract Builder setRowMapper(RowMapper rowMapper);

      abstract Builder setCoder(Coder coder);

      abstract Builder setFetchSize(int fetchSize);

      abstract Builder setOutputParallelization(boolean outputParallelization);

      abstract ReadAll build();
    }

    public ReadAll withDataSourceConfiguration(
        DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public ReadAll withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      if (getDataSourceProviderFn() != null) {
        throw new IllegalArgumentException(
            "A dataSourceConfiguration or dataSourceProviderFn has "
                + "already been provided, and does not need to be provided again.");
      }
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public ReadAll withQuery(String query) {
      checkArgument(query != null, "JdbcIO.readAll().withQuery(query) called with null query");
      return withQuery(ValueProvider.StaticValueProvider.of(query));
    }

    public ReadAll withQuery(ValueProvider query) {
      checkArgument(query != null, "JdbcIO.readAll().withQuery(query) called with null query");
      return toBuilder().setQuery(query).build();
    }

    public ReadAll withParameterSetter(
        PreparedStatementSetter parameterSetter) {
      checkArgumentNotNull(
          parameterSetter,
          "JdbcIO.readAll().withParameterSetter(parameterSetter) called "
              + "with null statementPreparator");
      return toBuilder().setParameterSetter(parameterSetter).build();
    }

    public ReadAll withRowMapper(RowMapper rowMapper) {
      checkArgument(
          rowMapper != null,
          "JdbcIO.readAll().withRowMapper(rowMapper) called with null rowMapper");
      return toBuilder().setRowMapper(rowMapper).build();
    }

    /**
     * @deprecated
     *     
{@link JdbcIO} is able to infer appropriate coders from other parameters.
     */
    @Deprecated
    public ReadAll withCoder(Coder coder) {
      checkArgument(coder != null, "JdbcIO.readAll().withCoder(coder) called with null coder");
      return toBuilder().setCoder(coder).build();
    }

    /**
     * This method is used to set the size of the data that is going to be fetched and loaded in
     * memory per every database call. Please refer to: {@link java.sql.Statement#setFetchSize(int)}
     * It should ONLY be used if the default value throws memory errors.
     */
    public ReadAll withFetchSize(int fetchSize) {
      // Note that api.java.sql.Statement#setFetchSize says it only accepts values >= 0
      // and that MySQL supports using Integer.MIN_VALUE as a hint to stream the ResultSet instead
      // of loading it into memory. See
      // https://dev.mysql.com/doc/connector-j/8.0/en/connector-j-reference-implementation-notes.html for additional details.
      checkArgument(
          fetchSize >= 0 || fetchSize == Integer.MIN_VALUE,
          "fetch size must be >= 0 or equal to Integer.MIN_VALUE");
      return toBuilder().setFetchSize(fetchSize).build();
    }

    /**
     * Whether to reshuffle the resulting PCollection so results are distributed to all workers. The
     * default is to parallelize and should only be changed if this is known to be unnecessary.
     */
    public ReadAll withOutputParallelization(boolean outputParallelization) {
      return toBuilder().setOutputParallelization(outputParallelization).build();
    }

    private @Nullable Coder inferCoder(
        CoderRegistry registry, SchemaRegistry schemaRegistry) {
      if (getCoder() != null) {
        return getCoder();
      } else {
        RowMapper rowMapper = getRowMapper();
        TypeDescriptor outputType =
            TypeDescriptors.extractFromTypeParameters(
                rowMapper,
                RowMapper.class,
                new TypeVariableExtractor, OutputT>() {});
        try {
          return schemaRegistry.getSchemaCoder(outputType);
        } catch (NoSuchSchemaException e) {
          LOG.warn(
              "Unable to infer a schema for type {}. Attempting to infer a coder without a schema.",
              outputType);
        }
        try {
          return registry.getCoder(outputType);
        } catch (CannotProvideCoderException e) {
          LOG.warn("Unable to infer a coder for type {}", outputType);
          return null;
        }
      }
    }

    @Override
    public PCollection expand(PCollection input) {
      Coder coder =
          inferCoder(
              input.getPipeline().getCoderRegistry(), input.getPipeline().getSchemaRegistry());
      checkStateNotNull(
          coder,
          "Unable to infer a coder for JdbcIO.readAll() transform. "
              + "Provide a coder via withCoder, or ensure that one can be inferred from the"
              + " provided RowMapper.");
      PCollection output =
          input
              .apply(
                  ParDo.of(
                      new ReadFn<>(
                          checkStateNotNull(getDataSourceProviderFn()),
                          checkStateNotNull(getQuery()),
                          checkStateNotNull(getParameterSetter()),
                          checkStateNotNull(getRowMapper()),
                          getFetchSize())))
              .setCoder(coder);

      if (getOutputParallelization()) {
        output = output.apply(new Reparallelize<>());
      }

      try {
        TypeDescriptor typeDesc = coder.getEncodedTypeDescriptor();
        SchemaRegistry registry = input.getPipeline().getSchemaRegistry();
        Schema schema = registry.getSchema(typeDesc);
        output.setSchema(
            schema,
            typeDesc,
            registry.getToRowFunction(typeDesc),
            registry.getFromRowFunction(typeDesc));
      } catch (NoSuchSchemaException e) {
        // ignore
      }

      return output;
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      super.populateDisplayData(builder);
      builder.add(DisplayData.item("query", getQuery()));

      if (getRowMapper() != null) {
        builder.add(DisplayData.item("rowMapper", getRowMapper().getClass().getName()));
      }
      if (getCoder() != null) {
        builder.add(DisplayData.item("coder", getCoder().getClass().getName()));
      }
      if (getDataSourceProviderFn() instanceof HasDisplayData) {
        ((HasDisplayData) getDataSourceProviderFn()).populateDisplayData(builder);
      }
    }
  }

  /** Implementation of {@link #readWithPartitions}. */
  @AutoValue
  public abstract static class ReadWithPartitions
      extends PTransform> {

    @Pure
    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    @Pure
    abstract @Nullable RowMapper getRowMapper();

    @Pure
    abstract @Nullable Coder getCoder();

    @Pure
    abstract @Nullable Integer getNumPartitions();

    @Pure
    abstract @Nullable String getPartitionColumn();

    @Pure
    abstract int getFetchSize();

    @Pure
    abstract boolean getUseBeamSchema();

    @Pure
    abstract @Nullable PartitionColumnT getLowerBound();

    @Pure
    abstract @Nullable PartitionColumnT getUpperBound();

    @Pure
    abstract @Nullable String getTable();

    @Pure
    abstract TypeDescriptor getPartitionColumnType();

    @Pure
    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {

      abstract Builder setDataSourceProviderFn(
          SerializableFunction dataSourceProviderFn);

      abstract Builder setRowMapper(RowMapper rowMapper);

      abstract Builder setCoder(Coder coder);

      abstract Builder setNumPartitions(int numPartitions);

      abstract Builder setPartitionColumn(String partitionColumn);

      abstract Builder setLowerBound(PartitionColumnT lowerBound);

      abstract Builder setUpperBound(PartitionColumnT upperBound);

      abstract Builder setUseBeamSchema(boolean useBeamSchema);

      abstract Builder setFetchSize(int fetchSize);

      abstract Builder setTable(String tableName);

      abstract Builder setPartitionColumnType(
          TypeDescriptor partitionColumnType);

      abstract ReadWithPartitions build();
    }

    public ReadWithPartitions withDataSourceConfiguration(
        final DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public ReadWithPartitions withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public ReadWithPartitions withRowMapper(RowMapper rowMapper) {
      checkNotNull(rowMapper, "rowMapper can not be null");
      return toBuilder().setRowMapper(rowMapper).build();
    }

    /**
     * @deprecated
     *     
{@link JdbcIO} is able to infer appropriate coders from other parameters.
     */
    @Deprecated
    public ReadWithPartitions withCoder(Coder coder) {
      checkNotNull(coder, "coder can not be null");
      return toBuilder().setCoder(coder).build();
    }

    /**
     * The number of partitions. This, along with withLowerBound and withUpperBound, form partitions
     * strides for generated WHERE clause expressions used to split the column withPartitionColumn
     * evenly. When the input is less than 1, the number is set to 1.
     */
    public ReadWithPartitions withNumPartitions(int numPartitions) {
      checkArgument(numPartitions > 0, "numPartitions can not be less than 1");
      return toBuilder().setNumPartitions(numPartitions).build();
    }

    /** The name of a column of numeric type that will be used for partitioning. */
    public ReadWithPartitions withPartitionColumn(String partitionColumn) {
      checkNotNull(partitionColumn, "partitionColumn can not be null");
      return toBuilder().setPartitionColumn(partitionColumn).build();
    }

    /** The number of rows to fetch from the database in the same {@link ResultSet} round-trip. */
    public ReadWithPartitions withFetchSize(int fetchSize) {
      checkArgument(fetchSize > 0, "fetchSize can not be less than 1");
      return toBuilder().setFetchSize(fetchSize).build();
    }

    /** Data output type is {@link Row}, and schema is auto-inferred from the database. */
    public ReadWithPartitions withRowOutput() {
      return toBuilder().setUseBeamSchema(true).build();
    }

    public ReadWithPartitions withLowerBound(PartitionColumnT lowerBound) {
      return toBuilder().setLowerBound(lowerBound).build();
    }

    public ReadWithPartitions withUpperBound(PartitionColumnT upperBound) {
      return toBuilder().setUpperBound(upperBound).build();
    }

    /** Name of the table in the external database. Can be used to pass a user-defined subqery. */
    public ReadWithPartitions withTable(String tableName) {
      checkNotNull(tableName, "table can not be null");
      return toBuilder().setTable(tableName).build();
    }

    private static final int EQUAL = 0;

    @Override
    public PCollection expand(PBegin input) {
      SerializableFunction dataSourceProviderFn =
          checkStateNotNull(
              getDataSourceProviderFn(),
              "withDataSourceConfiguration() or withDataSourceProviderFn() is required");
      String partitionColumn =
          checkStateNotNull(getPartitionColumn(), "withPartitionColumn() is required");
      String table = checkStateNotNull(getTable(), "withTable() is required");
      checkArgument(
          // We XOR so that only one of these is true / provided. (^ is an xor operator : ))
          getUseBeamSchema() ^ getRowMapper() != null,
          "Provide only withRowOutput() or withRowMapper() arguments for "
              + "JdbcIO.readWithPartitions). These are mutually exclusive.");
      checkArgument(
          (getUpperBound() != null) == (getLowerBound() != null),
          "When providing either lower or upper bound, both "
              + "parameters are mandatory for JdbcIO.readWithPartitions");
      if (getLowerBound() != null
          && getUpperBound() != null
          && getLowerBound() instanceof Comparable) {
        // Not all partition types are comparable. For example, LocalDateTime, which is a valid
        // partitioning type, is not Comparable, so we can't enforce this for all sorts of
        // partitioning.
        checkArgument(
            ((Comparable) getLowerBound()).compareTo(getUpperBound()) < EQUAL,
            "The lower bound of partitioning column is larger or equal than the upper bound");
      }
      checkNotNull(
          JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(getPartitionColumnType()),
          "readWithPartitions only supports the following types: %s",
          JdbcUtil.PRESET_HELPERS.keySet());

      PCollection>> params;

      if (getLowerBound() == null && getUpperBound() == null) {
        String query =
            String.format(
                "SELECT min(%s), max(%s) FROM %s", partitionColumn, partitionColumn, table);
        if (getNumPartitions() == null) {
          query =
              String.format(
                  "SELECT min(%s), max(%s), count(*) FROM %s",
                  partitionColumn, partitionColumn, table);
        }
        params =
            input
                .apply(
                    JdbcIO.>>read()
                        .withQuery(query)
                        .withDataSourceProviderFn(dataSourceProviderFn)
                        .withRowMapper(
                            checkStateNotNull(
                                JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(
                                    getPartitionColumnType())))
                        .withFetchSize(getFetchSize()))
                .apply(
                    MapElements.via(
                        new SimpleFunction<
                            KV>,
                            KV>>() {
                          @Override
                          public KV> apply(
                              KV> input) {
                            KV> result;
                            if (getNumPartitions() == null) {
                              // In this case, we use the table row count to infer a number of
                              // partitions.
                              // We take the square root of the number of rows, and divide it by 10
                              // to keep a relatively low number of partitions, given that an RDBMS
                              // cannot usually accept a very large number of connections.
                              long numPartitions =
                                  Math.max(
                                      1, Math.round(Math.floor(Math.sqrt(input.getKey()) / 10)));
                              result = KV.of(numPartitions, input.getValue());
                            } else {
                              result = KV.of(getNumPartitions().longValue(), input.getValue());
                            }
                            LOG.info(
                                "Inferred min: {} - max: {} - numPartitions: {}",
                                result.getValue().getKey(),
                                result.getValue().getValue(),
                                result.getKey());
                            return result;
                          }
                        }));
      } else {
        params =
            input.apply(
                Create.of(
                    KV.of(
                        checkStateNotNull(getNumPartitions()).longValue(),
                        KV.of(getLowerBound(), getUpperBound()))));
      }

      RowMapper rowMapper = null;
      Schema schema = null;
      if (getUseBeamSchema()) {
        schema =
            ReadRows.inferBeamSchema(
                dataSourceProviderFn.apply(null), String.format("SELECT * FROM %s", getTable()));
        rowMapper = (RowMapper) SchemaUtil.BeamRowMapper.of(schema);
      } else {
        rowMapper = getRowMapper();
      }
      checkStateNotNull(rowMapper);

      PCollection> ranges =
          params
              .apply("Partitioning", ParDo.of(new PartitioningFn<>(getPartitionColumnType())))
              .apply("Reshuffle partitions", Reshuffle.viaRandomKey());

      JdbcIO.ReadAll, T> readAll =
          JdbcIO., T>readAll()
              .withDataSourceProviderFn(dataSourceProviderFn)
              .withQuery(
                  String.format(
                      "select * from %1$s where %2$s >= ? and %2$s < ?", table, partitionColumn))
              .withRowMapper(rowMapper)
              .withFetchSize(getFetchSize())
              .withParameterSetter(
                  checkStateNotNull(
                          JdbcUtil.JdbcReadWithPartitionsHelper.getPartitionsHelper(
                              getPartitionColumnType()))
                      ::setParameters)
              .withOutputParallelization(false);

      if (getUseBeamSchema()) {
        checkStateNotNull(schema);
        readAll = readAll.withCoder((Coder) RowCoder.of(schema));
      } else if (getCoder() != null) {
        readAll = readAll.withCoder(getCoder());
      }

      return ranges.apply("Read ranges", readAll);
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      super.populateDisplayData(builder);
      builder.add(
          DisplayData.item(
              "rowMapper",
              getRowMapper() == null
                  ? "auto-infer"
                  : getRowMapper().getClass().getCanonicalName()));
      if (getCoder() != null) {
        builder.add(DisplayData.item("coder", getCoder().getClass().getName()));
      }
      builder.add(DisplayData.item("partitionColumn", getPartitionColumn()));
      builder.add(DisplayData.item("table", getTable()));
      builder.add(
          DisplayData.item(
              "numPartitions",
              getNumPartitions() == null ? "auto-infer" : getNumPartitions().toString()));
      builder.add(
          DisplayData.item(
              "lowerBound", getLowerBound() == null ? "auto-infer" : getLowerBound().toString()));
      builder.add(
          DisplayData.item(
              "upperBound", getUpperBound() == null ? "auto-infer" : getUpperBound().toString()));
      if (getDataSourceProviderFn() instanceof HasDisplayData) {
        ((HasDisplayData) getDataSourceProviderFn()).populateDisplayData(builder);
      }
    }
  }

  /** A {@link DoFn} executing the SQL query to read from the database. */
  private static class ReadFn extends DoFn {

    private final SerializableFunction dataSourceProviderFn;
    private final ValueProvider query;
    private final PreparedStatementSetter parameterSetter;
    private final RowMapper rowMapper;
    private final int fetchSize;

    private @Nullable DataSource dataSource;
    private @Nullable Connection connection;

    private ReadFn(
        SerializableFunction dataSourceProviderFn,
        ValueProvider query,
        PreparedStatementSetter parameterSetter,
        RowMapper rowMapper,
        int fetchSize) {
      this.dataSourceProviderFn = dataSourceProviderFn;
      this.query = query;
      this.parameterSetter = parameterSetter;
      this.rowMapper = rowMapper;
      this.fetchSize = fetchSize;
    }

    @Setup
    public void setup() throws Exception {
      dataSource = dataSourceProviderFn.apply(null);
    }

    private Connection getConnection() throws SQLException {
      if (this.connection == null) {
        this.connection = checkStateNotNull(this.dataSource).getConnection();
      }
      return this.connection;
    }

    @ProcessElement
    // Spotbugs seems to not understand the nested try-with-resources
    @SuppressFBWarnings({
      "OBL_UNSATISFIED_OBLIGATION",
      "ODR_OPEN_DATABASE_RESOURCE", // connection closed in finishbundle
    })
    public void processElement(ProcessContext context) throws Exception {
      // Only acquire the connection if we need to perform a read.
      Connection connection = getConnection();
      // PostgreSQL requires autocommit to be disabled to enable cursor streaming
      // see https://jdbc.postgresql.org/documentation/head/query.html#query-with-cursor
      LOG.info("Autocommit has been disabled");
      connection.setAutoCommit(false);
      try (PreparedStatement statement =
          connection.prepareStatement(
              query.get(), ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)) {
        statement.setFetchSize(fetchSize);
        parameterSetter.setParameters(context.element(), statement);
        try (ResultSet resultSet = statement.executeQuery()) {
          while (resultSet.next()) {
            context.output(rowMapper.mapRow(resultSet));
          }
        }
      }
    }

    @FinishBundle
    public void finishBundle() throws Exception {
      cleanUpConnection();
    }

    @Teardown
    public void tearDown() throws Exception {
      cleanUpConnection();
    }

    private void cleanUpConnection() throws Exception {
      if (connection != null) {
        try {
          connection.close();
        } finally {
          connection = null;
        }
      }
    }
  }

  /**
   * Builder used to help with retry configuration for {@link JdbcIO}. The retry configuration
   * accepts maxAttempts and maxDuration for {@link FluentBackoff}.
   */
  @AutoValue
  public abstract static class RetryConfiguration implements Serializable {

    abstract int getMaxAttempts();

    abstract @Nullable Duration getMaxDuration();

    abstract @Nullable Duration getInitialDuration();

    abstract RetryConfiguration.Builder builder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setMaxAttempts(int maxAttempts);

      abstract Builder setMaxDuration(Duration maxDuration);

      abstract Builder setInitialDuration(Duration initialDuration);

      abstract RetryConfiguration build();
    }

    public static RetryConfiguration create(
        int maxAttempts, @Nullable Duration maxDuration, @Nullable Duration initialDuration) {

      if (maxDuration == null || maxDuration.equals(Duration.ZERO)) {
        maxDuration = DEFAULT_MAX_CUMULATIVE_BACKOFF;
      }

      if (initialDuration == null || initialDuration.equals(Duration.ZERO)) {
        initialDuration = DEFAULT_INITIAL_BACKOFF;
      }

      checkArgument(maxAttempts > 0, "maxAttempts must be greater than 0");

      return new AutoValue_JdbcIO_RetryConfiguration.Builder()
          .setMaxAttempts(maxAttempts)
          .setInitialDuration(initialDuration)
          .setMaxDuration(maxDuration)
          .build();
    }
  }

  /**
   * An interface used by the JdbcIO Write to set the parameters of the {@link PreparedStatement}
   * used to setParameters into the database.
   */
  @FunctionalInterface
  public interface PreparedStatementSetter extends Serializable {
    void setParameters(T element, PreparedStatement preparedStatement) throws Exception;
  }

  /**
   * An interface used to control if we retry the statements when a {@link SQLException} occurs. If
   * {@link RetryStrategy#apply(SQLException)} returns true, {@link Write} tries to replay the
   * statements.
   */
  @FunctionalInterface
  public interface RetryStrategy extends Serializable {
    boolean apply(SQLException sqlException);
  }

  /**
   * This class is used as the default return value of {@link JdbcIO#write()}.
   *
   * 
All methods in this class delegate to the appropriate method of {@link JdbcIO.WriteVoid}.
   */
  public static class Write extends PTransform, PDone> {
    WriteVoid inner;

    Write() {
      this(JdbcIO.writeVoid());
    }

    Write(WriteVoid inner) {
      this.inner = inner;
    }

    /** See {@link WriteVoid#withAutoSharding()}. */
    public Write withAutoSharding() {
      return new Write<>(inner.withAutoSharding());
    }

    /** See {@link WriteVoid#withDataSourceConfiguration(DataSourceConfiguration)}. */
    public Write withDataSourceConfiguration(DataSourceConfiguration config) {
      return new Write<>(inner.withDataSourceConfiguration(config));
    }

    /** See {@link WriteVoid#withDataSourceProviderFn(SerializableFunction)}. */
    public Write withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return new Write<>(inner.withDataSourceProviderFn(dataSourceProviderFn));
    }

    /** See {@link WriteVoid#withStatement(String)}. */
    public Write withStatement(String statement) {
      return new Write<>(inner.withStatement(statement));
    }

    /** See {@link WriteVoid#withPreparedStatementSetter(PreparedStatementSetter)}. */
    public Write withPreparedStatementSetter(PreparedStatementSetter setter) {
      return new Write<>(inner.withPreparedStatementSetter(setter));
    }

    /** See {@link WriteVoid#withBatchSize(long)}. */
    public Write withBatchSize(long batchSize) {
      return new Write<>(inner.withBatchSize(batchSize));
    }

    /** See {@link WriteVoid#withRetryStrategy(RetryStrategy)}. */
    public Write withRetryStrategy(RetryStrategy retryStrategy) {
      return new Write<>(inner.withRetryStrategy(retryStrategy));
    }

    /** See {@link WriteVoid#withRetryConfiguration(RetryConfiguration)}. */
    public Write withRetryConfiguration(RetryConfiguration retryConfiguration) {
      return new Write<>(inner.withRetryConfiguration(retryConfiguration));
    }

    /** See {@link WriteVoid#withTable(String)}. */
    public Write withTable(String table) {
      return new Write<>(inner.withTable(table));
    }

    /**
     * Returns {@link WriteVoid} transform which can be used in {@link Wait#on(PCollection[])} to
     * wait until all data is written.
     *
     * 
Example: write a {@link PCollection} to one database and then to another database, making
     * sure that writing a window of data to the second database starts only after the respective
     * window has been fully written to the first database.
     *
     * 
{@code
     * PCollection firstWriteResults = data.apply(JdbcIO.write()
     *     .withDataSourceConfiguration(CONF_DB_1).withResults());
     * data.apply(Wait.on(firstWriteResults))
     *     .apply(JdbcIO.write().withDataSourceConfiguration(CONF_DB_2));
     * }
     */
    public WriteVoid withResults() {
      return inner;
    }

    /**
     * Returns {@link WriteWithResults} transform that could return a specific result.
     *
     * See {@link WriteWithResults}
     */
    public  WriteWithResults withWriteResults(
        RowMapper rowMapper) {
      return new AutoValue_JdbcIO_WriteWithResults.Builder()
          .setRowMapper(rowMapper)
          .setRetryStrategy(inner.getRetryStrategy())
          .setRetryConfiguration(inner.getRetryConfiguration())
          .setDataSourceProviderFn(inner.getDataSourceProviderFn())
          .setPreparedStatementSetter(inner.getPreparedStatementSetter())
          .setStatement(inner.getStatement())
          .setTable(inner.getTable())
          .setAutoSharding(inner.getAutoSharding())
          .build();
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      inner.populateDisplayData(builder);
    }

    @Override
    public PDone expand(PCollection input) {
      inner.expand(input);
      return PDone.in(input.getPipeline());
    }
  }

  /* The maximum number of elements that will be included in a batch. */

  static  PCollection> batchElements(
      PCollection input, @Nullable Boolean withAutoSharding, long batchSize) {
    PCollection> iterables;
    if (input.isBounded() == IsBounded.UNBOUNDED) {
      PCollection> keyedInput = input.apply(WithKeys.of(""));
      GroupIntoBatches groupTransform =
          GroupIntoBatches.ofSize(batchSize)
              .withMaxBufferingDuration(Duration.millis(200));
      if (withAutoSharding != null && withAutoSharding) {
        // unbounded and withAutoSharding enabled, group into batches with shardedKey
        iterables = keyedInput.apply(groupTransform.withShardedKey()).apply(Values.create());
      } else {
        // unbounded and without auto sharding, group into batches of assigned max size
        iterables = keyedInput.apply(groupTransform).apply(Values.create());
      }
    } else {
      iterables =
          input.apply(
              ParDo.of(
                  new DoFn>() {
                    @Nullable List outputList;

                    @ProcessElement
                    public void process(ProcessContext c) {
                      if (outputList == null) {
                        outputList = new ArrayList<>();
                      }
                      outputList.add(c.element());
                      if (outputList.size() > batchSize) {
                        c.output(outputList);
                        outputList = null;
                      }
                    }

                    @FinishBundle
                    public void finish(FinishBundleContext c) {
                      if (outputList != null && outputList.size() > 0) {
                        c.output(outputList, Instant.now(), GlobalWindow.INSTANCE);
                      }
                      outputList = null;
                    }
                  }));
    }
    return iterables;
  }

  /** Interface implemented by functions that sets prepared statement data. */
  @FunctionalInterface
  interface PreparedStatementSetCaller extends Serializable {
    void set(
        Row element,
        PreparedStatement preparedStatement,
        int prepareStatementIndex,
        SchemaUtil.FieldWithIndex schemaFieldWithIndex)
        throws SQLException;
  }

  /**
   * A {@link PTransform} to write to a JDBC datasource. Executes statements one by one.
   *
   * 
The INSERT, UPDATE, and DELETE commands sometimes have an optional RETURNING clause that
   * supports obtaining data from modified rows while they are being manipulated. Output {@link
   * PCollection} of this transform is a collection of such returning results mapped by {@link
   * RowMapper}.
   */
  @AutoValue
  public abstract static class WriteWithResults
      extends PTransform, PCollection> {
    abstract @Nullable Boolean getAutoSharding();

    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    abstract @Nullable ValueProvider getStatement();

    abstract @Nullable PreparedStatementSetter getPreparedStatementSetter();

    abstract @Nullable RetryStrategy getRetryStrategy();

    abstract @Nullable RetryConfiguration getRetryConfiguration();

    abstract @Nullable String getTable();

    abstract @Nullable RowMapper getRowMapper();

    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setDataSourceProviderFn(
          @Nullable SerializableFunction dataSourceProviderFn);

      abstract Builder setAutoSharding(@Nullable Boolean autoSharding);

      abstract Builder setStatement(@Nullable ValueProvider statement);

      abstract Builder setPreparedStatementSetter(
          @Nullable PreparedStatementSetter setter);

      abstract Builder setRetryStrategy(@Nullable RetryStrategy deadlockPredicate);

      abstract Builder setRetryConfiguration(@Nullable RetryConfiguration retryConfiguration);

      abstract Builder setTable(@Nullable String table);

      abstract Builder setRowMapper(RowMapper rowMapper);

      abstract WriteWithResults build();
    }

    public WriteWithResults withDataSourceConfiguration(DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public WriteWithResults withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public WriteWithResults withStatement(String statement) {
      return withStatement(ValueProvider.StaticValueProvider.of(statement));
    }

    public WriteWithResults withStatement(ValueProvider statement) {
      return toBuilder().setStatement(statement).build();
    }

    public WriteWithResults withPreparedStatementSetter(PreparedStatementSetter setter) {
      return toBuilder().setPreparedStatementSetter(setter).build();
    }

    /** If true, enables using a dynamically determined number of shards to write. */
    public WriteWithResults withAutoSharding() {
      return toBuilder().setAutoSharding(true).build();
    }

    /**
     * When a SQL exception occurs, {@link Write} uses this {@link RetryStrategy} to determine if it
     * will retry the statements. If {@link RetryStrategy#apply(SQLException)} returns {@code true},
     * then {@link Write} retries the statements.
     */
    public WriteWithResults withRetryStrategy(RetryStrategy retryStrategy) {
      checkArgument(retryStrategy != null, "retryStrategy can not be null");
      return toBuilder().setRetryStrategy(retryStrategy).build();
    }

    /**
     * When a SQL exception occurs, {@link Write} uses this {@link RetryConfiguration} to
     * exponentially back off and retry the statements based on the {@link RetryConfiguration}
     * mentioned.
     *
     * 
Usage of RetryConfiguration -
     *
     * 
{@code
     * pipeline.apply(JdbcIO.write())
     *    .withReturningResults(...)
     *    .withDataSourceConfiguration(...)
     *    .withRetryStrategy(...)
     *    .withRetryConfiguration(JdbcIO.RetryConfiguration.
     *        create(5, Duration.standardSeconds(5), Duration.standardSeconds(1))
     *
     * }
     *
     * maxDuration and initialDuration are Nullable
     *
     * {@code
     * pipeline.apply(JdbcIO.write())
     *    .withReturningResults(...)
     *    .withDataSourceConfiguration(...)
     *    .withRetryStrategy(...)
     *    .withRetryConfiguration(JdbcIO.RetryConfiguration.
     *        create(5, null, null)
     *
     * }
     */
    public WriteWithResults withRetryConfiguration(RetryConfiguration retryConfiguration) {
      checkArgument(retryConfiguration != null, "retryConfiguration can not be null");
      return toBuilder().setRetryConfiguration(retryConfiguration).build();
    }

    public WriteWithResults withTable(String table) {
      checkArgument(table != null, "table name can not be null");
      return toBuilder().setTable(table).build();
    }

    public WriteWithResults withRowMapper(RowMapper rowMapper) {
      checkArgument(rowMapper != null, "result set getter can not be null");
      return toBuilder().setRowMapper(rowMapper).build();
    }

    @Override
    public PCollection expand(PCollection input) {
      checkArgument(getStatement() != null, "withStatement() is required");
      checkArgument(
          getPreparedStatementSetter() != null, "withPreparedStatementSetter() is required");
      checkArgument(
          (getDataSourceProviderFn() != null),
          "withDataSourceConfiguration() or withDataSourceProviderFn() is required");
      @Nullable Boolean autoSharding = getAutoSharding();
      checkArgument(
          autoSharding == null || (autoSharding && input.isBounded() != IsBounded.UNBOUNDED),
          "Autosharding is only supported for streaming pipelines.");

      PCollection> iterables =
          JdbcIO.batchElements(input, autoSharding, DEFAULT_BATCH_SIZE);
      return iterables.apply(
          ParDo.of(
              new WriteFn(
                  WriteFnSpec.builder()
                      .setRetryStrategy(getRetryStrategy())
                      .setDataSourceProviderFn(getDataSourceProviderFn())
                      .setPreparedStatementSetter(getPreparedStatementSetter())
                      .setRowMapper(getRowMapper())
                      .setStatement(getStatement())
                      .setRetryConfiguration(getRetryConfiguration())
                      .setReturnResults(true)
                      .setBatchSize(1L)
                      .build())));
    }
  }

  /**
   * A {@link PTransform} to write to a JDBC datasource. Executes statements in a batch, and returns
   * a trivial result.
   */
  @AutoValue
  public abstract static class WriteVoid extends PTransform, PCollection> {

    abstract @Nullable Boolean getAutoSharding();

    abstract @Nullable SerializableFunction getDataSourceProviderFn();

    abstract @Nullable ValueProvider getStatement();

    abstract long getBatchSize();

    abstract @Nullable PreparedStatementSetter getPreparedStatementSetter();

    abstract @Nullable RetryStrategy getRetryStrategy();

    abstract @Nullable RetryConfiguration getRetryConfiguration();

    abstract @Nullable String getTable();

    abstract Builder toBuilder();

    @AutoValue.Builder
    abstract static class Builder {
      abstract Builder setAutoSharding(Boolean autoSharding);

      abstract Builder setDataSourceProviderFn(
          SerializableFunction dataSourceProviderFn);

      abstract Builder setStatement(ValueProvider statement);

      abstract Builder setBatchSize(long batchSize);

      abstract Builder setPreparedStatementSetter(PreparedStatementSetter setter);

      abstract Builder setRetryStrategy(RetryStrategy deadlockPredicate);

      abstract Builder setRetryConfiguration(RetryConfiguration retryConfiguration);

      abstract Builder setTable(String table);

      abstract WriteVoid build();
    }

    /** If true, enables using a dynamically determined number of shards to write. */
    public WriteVoid withAutoSharding() {
      return toBuilder().setAutoSharding(true).build();
    }

    public WriteVoid withDataSourceConfiguration(DataSourceConfiguration config) {
      return withDataSourceProviderFn(new DataSourceProviderFromDataSourceConfiguration(config));
    }

    public WriteVoid withDataSourceProviderFn(
        SerializableFunction dataSourceProviderFn) {
      return toBuilder().setDataSourceProviderFn(dataSourceProviderFn).build();
    }

    public WriteVoid withStatement(String statement) {
      return withStatement(ValueProvider.StaticValueProvider.of(statement));
    }

    public WriteVoid withStatement(ValueProvider statement) {
      return toBuilder().setStatement(statement).build();
    }

    public WriteVoid withPreparedStatementSetter(PreparedStatementSetter setter) {
      return toBuilder().setPreparedStatementSetter(setter).build();
    }

    /**
     * Provide a maximum size in number of SQL statement for the batch. Default is 1000.
     *
     * @param batchSize maximum batch size in number of statements
     */
    public WriteVoid withBatchSize(long batchSize) {
      checkArgument(batchSize > 0, "batchSize must be > 0, but was %s", batchSize);
      return toBuilder().setBatchSize(batchSize).build();
    }

    /**
     * When a SQL exception occurs, {@link Write} uses this {@link RetryStrategy} to determine if it
     * will retry the statements. If {@link RetryStrategy#apply(SQLException)} returns {@code true},
     * then {@link Write} retries the statements.
     */
    public WriteVoid withRetryStrategy(RetryStrategy retryStrategy) {
      checkArgument(retryStrategy != null, "retryStrategy can not be null");
      return toBuilder().setRetryStrategy(retryStrategy).build();
    }

    /**
     * When a SQL exception occurs, {@link Write} uses this {@link RetryConfiguration} to
     * exponentially back off and retry the statements based on the {@link RetryConfiguration}
     * mentioned.
     *
     * Usage of RetryConfiguration -
     *
     * 
{@code
     * pipeline.apply(JdbcIO.write())
     *    .withDataSourceConfiguration(...)
     *    .withRetryStrategy(...)
     *    .withRetryConfiguration(JdbcIO.RetryConfiguration.
     *        create(5, Duration.standardSeconds(5), Duration.standardSeconds(1))
     *
     * }
     *
     * maxDuration and initialDuration are Nullable
     *
     * {@code
     * pipeline.apply(JdbcIO.write())
     *    .withDataSourceConfiguration(...)
     *    .withRetryStrategy(...)
     *    .withRetryConfiguration(JdbcIO.RetryConfiguration.
     *        create(5, null, null)
     *
     * }
     */
    public WriteVoid withRetryConfiguration(RetryConfiguration retryConfiguration) {
      checkArgument(retryConfiguration != null, "retryConfiguration can not be null");
      return toBuilder().setRetryConfiguration(retryConfiguration).build();
    }

    public WriteVoid withTable(String table) {
      checkArgument(table != null, "table name can not be null");
      return toBuilder().setTable(table).build();
    }

    @Override
    public PCollection expand(PCollection input) {
      WriteVoid spec = this;
      checkArgument(
          (spec.getDataSourceProviderFn() != null),
          "withDataSourceConfiguration() or withDataSourceProviderFn() is required");
      // fixme: validate invalid table input
      if (input.hasSchema() && !spec.hasStatementAndSetter()) {
        checkArgument(spec.getTable() != null, "table cannot be null if statement is not provided");
        List fields = spec.getFilteredFields(input.getSchema());
        spec =
            spec.toBuilder()
                .setStatement(spec.generateStatement(fields))
                .setPreparedStatementSetter(
                    new AutoGeneratedPreparedStatementSetter(fields, input.getToRowFunction()))
                .build();
      } else {
        checkArgument(spec.getStatement() != null, "withStatement() is required");
        checkArgument(
            spec.getPreparedStatementSetter() != null, "withPreparedStatementSetter() is required");
      }

      PCollection> iterables =
          JdbcIO.batchElements(input, getAutoSharding(), getBatchSize());

      return iterables
          .apply(
              ParDo.of(
                  new WriteFn(
                      WriteFnSpec.builder()
                          .setRetryConfiguration(spec.getRetryConfiguration())
                          .setRetryStrategy(spec.getRetryStrategy())
                          .setPreparedStatementSetter(spec.getPreparedStatementSetter())
                          .setDataSourceProviderFn(spec.getDataSourceProviderFn())
                          .setTable(spec.getTable())
                          .setStatement(spec.getStatement())
                          .setBatchSize(spec.getBatchSize())
                          .setReturnResults(false)
                          .build())))
          .setCoder(VoidCoder.of());
    }

    private StaticValueProvider generateStatement(List fields) {
      return StaticValueProvider.of(
          JdbcUtil.generateStatement(
              checkStateNotNull(getTable()),
              fields.stream().map(FieldWithIndex::getField).collect(Collectors.toList())));
    }

    // Spotbugs seems to not understand the multi-statement try-with-resources
    @SuppressFBWarnings("OBL_UNSATISFIED_OBLIGATION")
    private List getFilteredFields(Schema schema) {
      Schema tableSchema;

      try (Connection connection =
              checkStateNotNull(getDataSourceProviderFn()).apply(null).getConnection();
          PreparedStatement statement =
              connection.prepareStatement(String.format("SELECT * FROM %s", getTable()))) {
        ResultSetMetaData metadata =
            checkStateNotNull(statement.getMetaData(), "could not get statement metadata");
        tableSchema = SchemaUtil.toBeamSchema(metadata);
      } catch (SQLException e) {
        throw new RuntimeException("Error while determining columns from table: " + getTable(), e);
      }

      checkState(
          tableSchema.getFieldCount() >= schema.getFieldCount(),
          String.format(
              "Input schema has more fields (%s) than actual table (%s).%n\t"
                  + "Input schema fields: %s | Table fields: %s",
              tableSchema.getFieldCount(),
              schema.getFieldCount(),
              schema.getFields().stream()
                  .map(Schema.Field::getName)
                  .collect(Collectors.joining(", ")),
              tableSchema.getFields().stream()
                  .map(Schema.Field::getName)
                  .collect(Collectors.joining(", "))));

      // filter out missing fields from output table
      List missingFields =
          tableSchema.getFields().stream()
              .filter(
                  line ->
                      schema.getFields().stream()
                          .noneMatch(s -> s.getName().equalsIgnoreCase(line.getName())))
              .collect(Collectors.toList());

      // allow insert only if missing fields are nullable
      checkState(
          !checkNullabilityForFields(missingFields),
          "Non nullable fields are not allowed without a matching schema. "
              + "Fields %s were in the destination table but not in the input schema.",
          missingFields);

      List tableFilteredFields = new ArrayList<>();

      for (Schema.Field tableField : tableSchema.getFields()) {
        for (Schema.Field f : schema.getFields()) {
          if (SchemaUtil.compareSchemaField(tableField, f)) {
            tableFilteredFields.add(FieldWithIndex.of(tableField, schema.getFields().indexOf(f)));
            break;
          }
        }
      }

      checkState(
          tableFilteredFields.size() == schema.getFieldCount(),
          "Provided schema doesn't match with database schema."
              + " Table has fields: %s"
              + " while provided schema has fields: %s",
          tableFilteredFields.stream()
              .map(f -> f.getIndex().toString() + "-" + f.getField().getName())
              .collect(Collectors.joining(",")),
          schema.getFieldNames().toString());

      return tableFilteredFields;
    }

    /**
     * A {@link org.apache.beam.sdk.io.jdbc.JdbcIO.PreparedStatementSetter} implementation that
     * calls related setters on prepared statement.
     */
    private class AutoGeneratedPreparedStatementSetter implements PreparedStatementSetter {

      private final List fields;
      private final SerializableFunction toRowFn;
      private final List preparedStatementFieldSetterList =
          new ArrayList<>();

      AutoGeneratedPreparedStatementSetter(
          List fieldsWithIndex, SerializableFunction toRowFn) {
        this.fields = fieldsWithIndex;
        this.toRowFn = toRowFn;
        IntStream.range(0, fields.size())
            .forEach(
                (index) -> {
                  Schema.FieldType fieldType = fields.get(index).getField().getType();
                  preparedStatementFieldSetterList.add(
                      JdbcUtil.getPreparedStatementSetCaller(fieldType));
                });
      }

      @Override
      public void setParameters(T element, PreparedStatement preparedStatement) throws Exception {
        Row row = (element instanceof Row) ? (Row) element : toRowFn.apply(element);
        IntStream.range(0, fields.size())
            .forEach(
                (index) -> {
                  try {
                    preparedStatementFieldSetterList
                        .get(index)
                        .set(row, preparedStatement, index, fields.get(index));
                  } catch (SQLException | NullPointerException e) {
                    throw new RuntimeException("Error while setting data to preparedStatement", e);
                  }
                });
      }
    }

    private boolean hasStatementAndSetter() {
      return getStatement() != null && getPreparedStatementSetter() != null;
    }
  }

  private static class Reparallelize extends PTransform, PCollection> {
    @Override
    public PCollection expand(PCollection input) {
      // See https://issues.apache.org/jira/browse/BEAM-2803
      // We use a combined approach to "break fusion" here:
      // (see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion)
      // 1) force the data to be materialized by passing it as a side input to an identity fn,
      // then 2) reshuffle it with a random key. Initial materialization provides some parallelism
      // and ensures that data to be shuffled can be generated in parallel, while reshuffling
      // provides perfect parallelism.
      // In most cases where a "fusion break" is needed, a simple reshuffle would be sufficient.
      // The current approach is necessary only to support the particular case of JdbcIO where
      // a single query may produce many gigabytes of query results.
      PCollectionView> empty =
          input
              .apply("Consume", Filter.by(SerializableFunctions.constant(false)))
              .apply(View.asIterable());
      PCollection materialized =
          input.apply(
              "Identity",
              ParDo.of(
                      new DoFn() {
                        @ProcessElement
                        public void process(ProcessContext c) {
                          c.output(c.element());
                        }
                      })
                  .withSideInputs(empty));
      return materialized.apply(Reshuffle.viaRandomKey());
    }
  }

  /**
   * Wraps a {@link DataSourceConfiguration} to provide a {@link PoolingDataSource}.
   *
   * At most a single {@link DataSource} instance will be constructed during pipeline execution
   * for each unique {@link DataSourceConfiguration} within the pipeline.
   */
  public static class PoolableDataSourceProvider
      implements SerializableFunction, HasDisplayData {
    private static final ConcurrentHashMap instances =
        new ConcurrentHashMap<>();
    private final DataSourceProviderFromDataSourceConfiguration config;

    private PoolableDataSourceProvider(DataSourceConfiguration config) {
      this.config = new DataSourceProviderFromDataSourceConfiguration(config);
    }

    public static SerializableFunction of(DataSourceConfiguration config) {
      return new PoolableDataSourceProvider(config);
    }

    @Override
    public DataSource apply(Void input) {
      return instances.computeIfAbsent(
          config.config,
          ignored -> {
            DataSource basicSource = config.apply(input);
            DataSourceConnectionFactory connectionFactory =
                new DataSourceConnectionFactory(basicSource);
            @SuppressWarnings("nullness") // apache.commons.dbcp2 not annotated
            PoolableConnectionFactory poolableConnectionFactory =
                new PoolableConnectionFactory(connectionFactory, null);
            GenericObjectPoolConfig poolConfig = new GenericObjectPoolConfig();
            poolConfig.setMinIdle(0);
            poolConfig.setMinEvictableIdleTimeMillis(10000);
            poolConfig.setSoftMinEvictableIdleTimeMillis(30000);
            GenericObjectPool connectionPool =
                new GenericObjectPool(poolableConnectionFactory, poolConfig);
            poolableConnectionFactory.setPool(connectionPool);
            poolableConnectionFactory.setDefaultAutoCommit(false);
            poolableConnectionFactory.setDefaultReadOnly(false);
            return new PoolingDataSource(connectionPool);
          });
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      config.populateDisplayData(builder);
    }
  }

  /**
   * Wraps a {@link DataSourceConfiguration} to provide a {@link DataSource}.
   *
   * At most a single {@link DataSource} instance will be constructed during pipeline execution
   * for each unique {@link DataSourceConfiguration} within the pipeline.
   */
  public static class DataSourceProviderFromDataSourceConfiguration
      implements SerializableFunction, HasDisplayData {
    private static final ConcurrentHashMap instances =
        new ConcurrentHashMap<>();
    private final DataSourceConfiguration config;

    private DataSourceProviderFromDataSourceConfiguration(DataSourceConfiguration config) {
      this.config = config;
    }

    public static SerializableFunction of(DataSourceConfiguration config) {
      return new DataSourceProviderFromDataSourceConfiguration(config);
    }

    @Override
    public DataSource apply(Void input) {
      return instances.computeIfAbsent(config, DataSourceConfiguration::buildDatasource);
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      config.populateDisplayData(builder);
    }
  }

  /**
   * {@link DoFn} class to write results data to a JDBC sink. It supports writing rows one by one
   * (and returning individual results) - or by batch.
   *
   * @param 
   * @param 
   */
  static class WriteFn extends DoFn, V> {

    @AutoValue
    abstract static class WriteFnSpec implements Serializable, HasDisplayData {
      @Override
      public void populateDisplayData(DisplayData.Builder builder) {
        builder
            .addIfNotNull(
                DisplayData.item(
                    "dataSourceProviderFn",
                    getDataSourceProviderFn() == null
                        ? "null"
                        : getDataSourceProviderFn().getClass().getName()))
            .addIfNotNull(DisplayData.item("statement", getStatement()))
            .addIfNotNull(
                DisplayData.item(
                    "preparedStatementSetter",
                    getPreparedStatementSetter() == null
                        ? "null"
                        : getPreparedStatementSetter().getClass().getName()))
            .addIfNotNull(
                DisplayData.item(
                    "retryConfiguration",
                    getRetryConfiguration() == null
                        ? "null"
                        : getRetryConfiguration().getClass().getName()))
            .addIfNotNull(DisplayData.item("table", getTable()))
            .addIfNotNull(
                DisplayData.item(
                    "rowMapper",
                    getRowMapper() == null ? "null" : getRowMapper().getClass().toString()))
            .addIfNotNull(DisplayData.item("batchSize", getBatchSize()));
      }

      @Pure
      abstract @Nullable SerializableFunction getDataSourceProviderFn();

      @Pure
      abstract @Nullable ValueProvider getStatement();

      @Pure
      abstract @Nullable PreparedStatementSetter getPreparedStatementSetter();

      @Pure
      abstract @Nullable RetryStrategy getRetryStrategy();

      @Pure
      abstract @Nullable RetryConfiguration getRetryConfiguration();

      @Pure
      abstract @Nullable String getTable();

      @Pure
      abstract @Nullable RowMapper getRowMapper();

      @Pure
      abstract @Nullable Long getBatchSize();

      @Pure
      abstract Boolean getReturnResults();

      @Pure
      static Builder builder() {
        return new AutoValue_JdbcIO_WriteFn_WriteFnSpec.Builder();
      }

      @AutoValue.Builder
      abstract static class Builder {
        abstract Builder setDataSourceProviderFn(
            @Nullable SerializableFunction fn);

        abstract Builder setStatement(@Nullable ValueProvider statement);

        abstract Builder setPreparedStatementSetter(
            @Nullable PreparedStatementSetter setter);

        abstract Builder setRetryStrategy(@Nullable RetryStrategy retryStrategy);

        abstract Builder setRetryConfiguration(
            @Nullable RetryConfiguration retryConfiguration);

        abstract Builder setTable(@Nullable String table);

        abstract Builder setRowMapper(@Nullable RowMapper rowMapper);

        abstract Builder setBatchSize(@Nullable Long batchSize);

        abstract Builder setReturnResults(Boolean returnResults);

        abstract WriteFnSpec build();
      }
    }

    private static final Distribution RECORDS_PER_BATCH =
        Metrics.distribution(WriteFn.class, "records_per_jdbc_batch");
    private static final Distribution MS_PER_BATCH =
        Metrics.distribution(WriteFn.class, "milliseconds_per_batch");

    private final WriteFnSpec spec;
    private @Nullable DataSource dataSource;
    private @Nullable Connection connection;
    private @Nullable PreparedStatement preparedStatement;
    private static @Nullable FluentBackoff retryBackOff;

    public WriteFn(WriteFnSpec spec) {
      this.spec = spec;
    }

    @Override
    public void populateDisplayData(DisplayData.Builder builder) {
      spec.populateDisplayData(builder);
      builder.add(
          DisplayData.item(
              "query", preparedStatement == null ? "null" : preparedStatement.toString()));
      builder.add(
          DisplayData.item("dataSource", dataSource == null ? "null" : dataSource.toString()));
      builder.add(DisplayData.item("spec", spec == null ? "null" : spec.toString()));
    }

    @Setup
    public void setup() {
      dataSource = checkStateNotNull(spec.getDataSourceProviderFn()).apply(null);
      RetryConfiguration retryConfiguration = checkStateNotNull(spec.getRetryConfiguration());

      retryBackOff =
          FluentBackoff.DEFAULT
              .withInitialBackoff(checkStateNotNull(retryConfiguration.getInitialDuration()))
              .withMaxCumulativeBackoff(checkStateNotNull(retryConfiguration.getMaxDuration()))
              .withMaxRetries(retryConfiguration.getMaxAttempts());
    }

    private Connection getConnection() throws SQLException {
      if (connection == null) {
        connection = checkStateNotNull(dataSource).getConnection();
        connection.setAutoCommit(false);
        preparedStatement =
            connection.prepareStatement(checkStateNotNull(spec.getStatement()).get());
      }
      return connection;
    }

    @ProcessElement
    public void processElement(ProcessContext context) throws Exception {
      executeBatch(context, context.element());
    }

    @FinishBundle
    public void finishBundle() throws Exception {
      // We pass a null context because we only execute a final batch for WriteVoid cases.
      cleanUpStatementAndConnection();
    }

    @Teardown
    public void tearDown() throws Exception {
      cleanUpStatementAndConnection();
    }

    private void cleanUpStatementAndConnection() throws Exception {
      try {
        if (preparedStatement != null) {
          try {
            preparedStatement.close();
          } finally {
            preparedStatement = null;
          }
        }
      } finally {
        if (connection != null) {
          try {
            connection.close();
          } finally {
            connection = null;
          }
        }
      }
    }

    private void executeBatch(ProcessContext context, Iterable records)
        throws SQLException, IOException, InterruptedException {
      Long startTimeNs = System.nanoTime();
      Sleeper sleeper = Sleeper.DEFAULT;
      BackOff backoff = checkStateNotNull(retryBackOff).backoff();
      RetryStrategy retryStrategy = checkStateNotNull(spec.getRetryStrategy());
      while (true) {
        try (PreparedStatement preparedStatement =
            getConnection().prepareStatement(checkStateNotNull(spec.getStatement()).get())) {
          try {
            // add each record in the statement batch
            int recordsInBatch = 0;
            for (T record : records) {
              processRecord(record, preparedStatement, context);
              recordsInBatch += 1;
            }
            if (!spec.getReturnResults()) {
              // execute the batch
              preparedStatement.executeBatch();
              // commit the changes
              getConnection().commit();
            }
            RECORDS_PER_BATCH.update(recordsInBatch);
            MS_PER_BATCH.update(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs));
            break;
          } catch (SQLException exception) {
            LOG.trace(
                "SQL exception thrown while writing to JDBC database: {}", exception.getMessage());
            if (!retryStrategy.apply(exception)) {
              throw exception;
            }
            LOG.warn("Deadlock detected, retrying", exception);
            // clean up the statement batch and the connection state
            preparedStatement.clearBatch();
            if (connection != null) {
              connection.rollback();
            }
            if (!BackOffUtils.next(sleeper, backoff)) {
              // we tried the max number of times
              throw exception;
            }
          }
        }
      }
    }

    private void processRecord(T record, PreparedStatement preparedStatement, ProcessContext c) {
      try {
        preparedStatement.clearParameters();
        checkStateNotNull(spec.getPreparedStatementSetter())
            .setParameters(record, preparedStatement);
        if (spec.getReturnResults()) {
          RowMapper rowMapper = checkStateNotNull(spec.getRowMapper());
          // execute the statement
          preparedStatement.execute();
          // commit the changes
          getConnection().commit();
          c.output(rowMapper.mapRow(preparedStatement.getResultSet()));
        } else {
          preparedStatement.addBatch();
        }
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }
}