All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.pipeline.JdbcSinkBuilder Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.pipeline;

import com.hazelcast.function.BiConsumerEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.core.processor.SinkProcessors;
import com.hazelcast.jet.impl.connector.DataSourceFromJdbcUrl;
import com.hazelcast.spi.annotation.Beta;

import javax.annotation.Nonnull;
import javax.sql.CommonDataSource;
import java.sql.PreparedStatement;

import static com.hazelcast.internal.util.Preconditions.checkPositive;

/**
 * See {@link Sinks#jdbcBuilder()}.
 *
 * @param  type of the items the sink accepts
 *
 * @since Jet 4.1
 */
public class JdbcSinkBuilder {
    /**
     * The default setting for whether exactly-once is allowed for the sink.
     */
    public static final boolean DEFAULT_EXACTLY_ONCE = true;

    /**
     * The default batch size limit to use for the sink if batching is supported.
     */
    public static final int DEFAULT_BATCH_LIMIT = 50;

    private String updateQuery;
    private String jdbcUrl;
    private BiConsumerEx bindFn;
    private SupplierEx dataSourceSupplier;
    private boolean exactlyOnce = DEFAULT_EXACTLY_ONCE;
    private int batchLimit = DEFAULT_BATCH_LIMIT;
    private DataConnectionRef dataConnectionRef;

    JdbcSinkBuilder() {
    }

    /**
     * The query to execute for each item. It should contain a parametrized
     * query to which the {@linkplain #bindFn(BiConsumerEx) bind function} will
     * bind values.
     * 

* Which type of statement to use? *

* In exactly-once mode we recommend using an {@code INSERT} statement. * Each parallel processor uses a separate transaction: if two processors * try to update the same record, the later one will be deadlocked: it will * be blocked waiting for a record lock but will never get it because the * snapshot will not be able to complete and the lock owner will never * commit. *

* On the other hand, in at-least-once mode we recommend using a {@code * MERGE} statement. If a unique key is derived from the items, this can * give you exactly-once behavior through idempotence without using XA * transactions: the MERGE statement will insert a record initially and * overwrite the same record, if the job restarted and the same item is * written again. If you don't have a unique key in the item, use INSERT * with auto-generated key. * * @param updateQuery the SQL statement to execute for each item * @return this instance for fluent API */ @Nonnull public JdbcSinkBuilder updateQuery(@Nonnull String updateQuery) { this.updateQuery = updateQuery; return this; } /** * Set the function to bind values to a {@code PreparedStatement} created * with the query set with {@link #updateQuery(String)}. The function * should not execute the query, nor call {@code commit()} or any other * method. * * @param bindFn the bind function. The function must be stateless. * @return this instance for fluent API */ @Nonnull public JdbcSinkBuilder bindFn(@Nonnull BiConsumerEx bindFn) { this.bindFn = bindFn; return this; } /** * Sets the connection URL for the target database. *

* If your job runs in exactly-once mode, don't use this method, but * provide an {@link javax.sql.XADataSource} using {@link * #dataSourceSupplier(SupplierEx)} method, otherwise the job will fail. * If your driver doesn't have an XADataSource implementation, also call * {@link #exactlyOnce(boolean) exactlyOnce(false)}. *

* See also {@link #dataSourceSupplier(SupplierEx)}. * * @param connectionUrl the connection URL * @return this instance for fluent API */ @Nonnull public JdbcSinkBuilder jdbcUrl(String connectionUrl) { this.jdbcUrl = connectionUrl; this.dataSourceSupplier = null; this.dataConnectionRef = null; return this; } /** * Sets the supplier of {@link javax.sql.DataSource} or {@link * javax.sql.XADataSource}. One dataSource instance will be created on each * member. For exactly-once guarantee an XADataSource must be used or the * job will fail. If your driver doesn't have an XADataSource * implementation, also call {@link #exactlyOnce(boolean) exactlyOnce(false)}. *

* There's no need to use {@link javax.sql.ConnectionPoolDataSource}. One * connection is given to each processor and that connection is held during * the entire job execution. * * @param dataSourceSupplier the supplier of data source. The function must * be stateless. * @return this instance for fluent API */ @Nonnull public JdbcSinkBuilder dataSourceSupplier(SupplierEx dataSourceSupplier) { this.dataSourceSupplier = dataSourceSupplier; this.jdbcUrl = null; this.dataConnectionRef = null; return this; } /** * Sets whether the exactly-once mode is enabled for the sink. If * exactly-once is enabled, the job must also be in exactly-once mode for * that mode to be used, otherwise the sink will use the job's guarantee. *

* Set exactly-once to false if you want your job to run in exactly-once, * but want to reduce the guarantee just for the sink. *

* The default is exactly-once set to {@value DEFAULT_EXACTLY_ONCE}. * * @param enabled whether exactly-once is allowed for the sink * @return this instance for fluent API */ @Nonnull public JdbcSinkBuilder exactlyOnce(boolean enabled) { this.exactlyOnce = enabled; return this; } /** * Sets the batch size limit for the sink. If the JDBC driver supports * batched updates, this defines the upper bound to the batch size. *

* The default batch size limit is {@value DEFAULT_BATCH_LIMIT}. * * @param batchLimit the batch size limit for the sink * @return this instance for fluent API * @since Jet 4.5 */ @Nonnull public JdbcSinkBuilder batchLimit(int batchLimit) { checkPositive(batchLimit, "batch size limit must be positive"); this.batchLimit = batchLimit; return this; } /** * Sets the reference to the configured data connection of {@link DataConnectionRef} from which * the instance of the {@link javax.sql.DataSource} will be retrieved. *

* Example: *

* (Prerequisite) Data connection configuration: *

{@code
     *      Config config = smallInstanceConfig();
     *      Properties properties = new Properties();
     *      properties.put("jdbcUrl", jdbcUrl);
     *      properties.put("username", username);
     *      properties.put("password", password);
     *      DataConnectionConfig dataConnectionConfig = new DataConnectionConfig()
     *              .setName("my-jdbc-data-connection")
     *              .setClassName(JdbcDataConnection.class.getName())
     *              .setProperties(properties);
     *      config.getDataConnectionConfigs().put(name, dataConnectionConfig);
     * }
*

*

Pipeline configuration *

{@code
     *
     *         p.readFrom(TestSources.items(IntStream.range(0, PERSON_COUNT).boxed().toArray(Integer[]::new)))
     *                 .map(item -> entry(item, item.toString()))
     *                 .writeTo(Sinks.>jdbcBuilder()
     *                         .updateQuery("INSERT INTO " + tableName + " VALUES(?, ?)")
     *                         .dataConnectionRef(dataConnectionRef("my-jdbc-data-connection"))
     *                         .bindFn((stmt, item1) -> {
     *                             stmt.setInt(1, item1.getKey());
     *                             stmt.setString(2, item1.getValue());
     *                         })
     *                         .build());
     * }
*

* * @param dataConnectionRef the reference to the configured data connection * @return this instance for fluent API * @since 5.2 */ @Nonnull @Beta public JdbcSinkBuilder dataConnectionRef(DataConnectionRef dataConnectionRef) { this.dataSourceSupplier = null; this.jdbcUrl = null; this.dataConnectionRef = dataConnectionRef; return this; } /** * Creates and returns the JDBC {@link Sink} with the supplied components. */ @Nonnull public Sink build() { if (dataSourceSupplier == null && jdbcUrl == null && dataConnectionRef == null) { throw new IllegalStateException("Neither jdbcUrl() nor dataSourceSupplier() nor dataConnectionRef() set"); } if (jdbcUrl != null) { String connectionUrl = jdbcUrl; dataSourceSupplier = () -> new DataSourceFromJdbcUrl(connectionUrl); } if (dataSourceSupplier != null) { return Sinks.fromProcessor("jdbcSink", SinkProcessors.writeJdbcP(jdbcUrl, updateQuery, dataSourceSupplier, bindFn, exactlyOnce, batchLimit)); } return Sinks.fromProcessor("jdbcSink", SinkProcessors.writeJdbcP(updateQuery, dataConnectionRef, bindFn, exactlyOnce, batchLimit)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy