com.hazelcast.jet.pipeline.Sinks Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-jdbc Show documentation
Hazelcast JDBC Driver
The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.pipeline;

import com.hazelcast.client.config.ClientConfig;
import com.hazelcast.collection.IList;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.core.Offloadable;
import com.hazelcast.function.BiConsumerEx;
import com.hazelcast.function.BiFunctionEx;
import com.hazelcast.function.BinaryOperatorEx;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.JetService;
import com.hazelcast.jet.Observable;
import com.hazelcast.jet.core.Processor;
import com.hazelcast.jet.core.ProcessorMetaSupplier;
import com.hazelcast.jet.core.processor.SinkProcessors;
import com.hazelcast.jet.impl.pipeline.SinkImpl;
import com.hazelcast.jet.json.JsonUtil;
import com.hazelcast.map.EntryProcessor;
import com.hazelcast.map.IMap;
import com.hazelcast.security.impl.function.SecuredFunctions;
import com.hazelcast.security.permission.ReliableTopicPermission;
import com.hazelcast.spi.annotation.Beta;
import com.hazelcast.topic.ITopic;
import jakarta.jms.ConnectionFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.sql.CommonDataSource;
import java.nio.charset.Charset;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.SQLNonTransientException;
import java.util.Map.Entry;

import static com.hazelcast.client.HazelcastClient.newHazelcastClient;
import static com.hazelcast.function.Functions.entryKey;
import static com.hazelcast.function.Functions.entryValue;
import static com.hazelcast.jet.core.ProcessorMetaSupplier.preferLocalParallelismOne;
import static com.hazelcast.jet.core.processor.DiagnosticProcessors.writeLoggerP;
import static com.hazelcast.jet.core.processor.Processors.noopP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeCacheP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeListP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeRemoteCacheP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeRemoteListP;
import static com.hazelcast.jet.core.processor.SinkProcessors.writeSocketP;
import static com.hazelcast.jet.datamodel.Tuple2.tuple2;
import static com.hazelcast.jet.impl.util.ImdgUtil.asClientConfig;
import static com.hazelcast.jet.impl.util.ImdgUtil.asXmlString;
import static com.hazelcast.security.permission.ActionConstants.ACTION_CREATE;
import static com.hazelcast.security.permission.ActionConstants.ACTION_PUBLISH;
import static java.nio.charset.StandardCharsets.UTF_8;

/**
 * Contains factory methods for various types of pipeline sinks. Formally,
 * a sink transform is one which has no output. A pipeline stage with a sink
 * transform has the type {@link SinkStage} and accepts no downstream stages.
 * 
 * The default local parallelism for the sinks in this class is typically 1,
 * check the documentation of individual methods.
 *
 * @since Jet 3.0
 */
public final class Sinks {

    private Sinks() {
    }

    /**
     * Returns a sink constructed directly from the given Core API processor
     * meta-supplier.
     * 

     * The default local parallelism for this source is specified inside the
     * {@link ProcessorMetaSupplier#preferredLocalParallelism() metaSupplier}.
     *
     * @param sinkName     user-friendly sink name
     * @param metaSupplier the processor meta-supplier
     */
    @Nonnull
    public static  Sink fromProcessor(
            @Nonnull String sinkName,
            @Nonnull ProcessorMetaSupplier metaSupplier
    ) {
        return new SinkImpl<>(sinkName, metaSupplier);
    }

    /**
     * Returns a sink constructed directly from the given Core API processor
     * meta-supplier.
     * 

     * The default local parallelism for this source is specified inside the
     * {@link ProcessorMetaSupplier#preferredLocalParallelism() metaSupplier}.
     *
     * @param sinkName       user-friendly sink name
     * @param metaSupplier   the processor meta-supplier
     * @param partitionKeyFn key extractor function for partitioning edges to
     *                       sink. It must be stateless and {@linkplain Processor#isCooperative()
     *                       cooperative}.
     */
    @Nonnull
    public static  Sink fromProcessor(
            @Nonnull String sinkName,
            @Nonnull ProcessorMetaSupplier metaSupplier,
            @Nullable FunctionEx partitionKeyFn
    ) {
        return new SinkImpl<>(sinkName, metaSupplier, partitionKeyFn);
    }

    /**
     * Returns a sink that puts {@code Map.Entry}s it receives into a Hazelcast
     * {@code IMap} with the specified name.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink> map(@Nonnull String mapName) {
        return map(mapName, Entry::getKey, Entry::getValue);
    }

    /**
     * Returns a sink that puts {@code Map.Entry}s it receives into the given
     * Hazelcast {@code IMap}.
     * 

     * NOTE: Jet only remembers the name of the map you supply
     * and acquires a map with that name on the local cluster. If you supply a
     * map instance from another cluster, no error will be thrown to indicate
     * this.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink> map(@Nonnull IMap map) {
        return map(map.getName());
    }


    /**
     * Returns a sink that uses the supplied functions to extract the key
     * and value with which to put to a Hazelcast {@code IMap} with the
     * specified name.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @since Jet 4.2
     */
    @Nonnull
    public static  Sink map(
            @Nonnull String mapName,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn

    ) {
        return Sinks.mapBuilder(mapName)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .build();
    }

    /**
     * Returns a sink that uses the supplied functions to extract the key
     * and value with which to put to given Hazelcast {@code IMap}.
     * 

     * NOTE: Jet only remembers the name of the map you supply
     * and acquires a map with that name on the local cluster. If you supply a
     * map instance from another cluster, no error will be thrown to indicate
     * this.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @since Jet 4.2
     */
    @Nonnull
    public static  Sink map(
            @Nonnull IMap map,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn

    ) {
        return map(map.getName(), toKeyFn, toValueFn);
    }

    /**
     * Returns a sink that puts {@code Map.Entry}s it receives into a Hazelcast
     * {@code IMap} with the specified name in a remote cluster identified by
     * the supplied {@code ClientConfig}.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink> remoteMap(@Nonnull String mapName, @Nonnull ClientConfig clientConfig) {
        return remoteMap(mapName, clientConfig, Entry::getKey, Entry::getValue);
    }

    /**
     * The same as the {@link #remoteMap(String, ClientConfig)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @param mapName           the name of the map
     * @param dataConnectionRef the reference to DataConnectionConfig
     * @since 5.4
     */
    @Nonnull
    public static  Sink> remoteMap(@Nonnull String mapName,
                                                     @Nonnull DataConnectionRef dataConnectionRef) {
        return remoteMap(mapName, dataConnectionRef, Entry::getKey, Entry::getValue);
    }

    /**
     * Returns a sink that uses the supplied functions to extract the key
     * and value with which to put to a Hazelcast {@code IMap} in a remote
     * cluster identified by the supplied {@code ClientConfig}.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @since Jet 4.2
     */
    @Nonnull
    public static  Sink remoteMap(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn
    ) {
        return Sinks.mapBuilder(mapName)
                .clientConfig(clientConfig)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .build();
    }

    /**
     * The same as the {@link #remoteMap(String, ClientConfig, FunctionEx, FunctionEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @param mapName           the name of the map
     * @param dataConnectionRef the reference to DataConnectionConfig
     * @since 5.4
     */
    @Nonnull
    public static  Sink remoteMap(
            @Nonnull String mapName,
            DataConnectionRef dataConnectionRef,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn
    ) {
        return Sinks.mapBuilder(mapName)
                .dataConnectionRef(dataConnectionRef)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .build();
    }

    /**
     * Returns a builder object that offers a step-by-step fluent API to build
     * a custom file sink for the Pipeline API. See javadoc of methods in
     * {@link MapSinkBuilder} for more details.
     *
     * @param mapName name of the map to sink into, must not be null
     * @param  type of the incoming items
     * @param  type of the key extracted from each item
     * @param  type fo the value extracted from each item
     * @since 5.4
     */
    @Nonnull
    public static  MapSinkBuilder mapBuilder(String mapName) {
        return new MapSinkBuilder<>(mapName);
    }

    /**
     * Returns a builder object that offers a step-by-step fluent API to build
     * a custom file sink for the Pipeline API. See javadoc of methods in
     * {@link MapSinkBuilder} for more details.
     *
     * @param mapName name of the map to sink into, must not be null
     * @param  type of the incoming items
     * @param  type of the key extracted from each item
     * @param  type fo the value extracted from each item
     * @since 5.4
     */
    @Nonnull
    public static  MapSinkEntryProcessorBuilder mapEntryProcessorBuilder(String mapName) {
        return new MapSinkEntryProcessorBuilder<>(mapName);
    }

    /**
     * Returns a sink that uses the supplied functions to extract the key
     * and value with which to update a Hazelcast {@code IMap}. If the map
     * already contains the key, it applies the given {@code mergeFn} to
     * resolve the existing and the proposed value into the value to use. If
     * the value comes out as {@code null}, it removes the key from the map.
     * Expressed as code, the sink performs the equivalent of the following for
     * each item:
     * 
     * K key = toKeyFn.apply(item);
     * V oldValue = map.get(key);
     * V newValue = toValueFn.apply(item);
     * V resolved = (oldValue == null)
     *            ? newValue
     * : mergeFn.apply(oldValue, newValue);
     * if (value == null)
     *     map.remove(key);
     * else
     *     map.put(key, value);
     * 
     * 
     * This sink supports exactly-once processing only if the
     * supplied merge function performs idempotent updates, i.e.,
     * it satisfies the rule
     * {@code mergeFn.apply(oldValue, toValueFn.apply(e)).equals(oldValue)}
     * for any {@code e} that was already observed.
     * 

     * Note: This operation is NOT lock-aware, it will process the
     * entries no matter if they are locked or not. Use {@link #mapWithEntryProcessor}
     * if you need locking.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param mapName   name of the map
     * @param toKeyFn   function that extracts the key from the input item
     * @param toValueFn function that extracts the value from the input item
     * @param mergeFn   function that merges the existing value with the value acquired from the
     *                  received item
     * @param        input item type
     * @param        key type
     * @param        value type
     */
    @Nonnull
    public static  Sink mapWithMerging(
            @Nonnull String mapName,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return Sinks.mapBuilder(mapName)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .mergeFn(mergeFn)
                .build();
    }

    /**
     * Returns a sink that uses the supplied functions to extract the key
     * and value with which to update a Hazelcast {@code IMap}. If the map
     * already contains the key, it applies the given {@code mergeFn} to
     * resolve the existing and the proposed value into the value to use. If
     * the value comes out as {@code null}, it removes the key from the map.
     * Expressed as code, the sink performs the equivalent of the following for
     * each item:
     * 
     * K key = toKeyFn.apply(item);
     * V oldValue = map.get(key);
     * V newValue = toValueFn.apply(item);
     * V resolved = (oldValue == null)
     *            ? newValue
     * : mergeFn.apply(oldValue, newValue);
     * if (value == null)
     *     map.remove(key);
     * else
     *     map.put(key, value);
     * 
     * NOTE: Jet only remembers the name of the map you supply
     * and acquires a map with that name on the local cluster. If you supply a
     * map instance from another cluster, no error will be thrown to indicate
     * this.
     * 
     * This sink supports exactly-once processing only if the
     * supplied merge function performs idempotent updates, i.e.,
     * it satisfies the rule
     * {@code mergeFn.apply(oldValue, toValueFn.apply(e)).equals(oldValue)}
     * for any {@code e} that was already observed.
     * 

     * Note: This operation is NOT lock-aware, it will process the
     * entries no matter if they are locked or not. Use {@link #mapWithEntryProcessor}
     * if you need locking.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param map       the map to drain to
     * @param toKeyFn   function that extracts the key from the input item
     * @param toValueFn function that extracts the value from the input item
     * @param mergeFn   function that merges the existing value with the value acquired from the
     *                  received item
     * @param        input item type
     * @param        key type
     * @param        value type
     */
    @Nonnull
    public static  Sink mapWithMerging(
            @Nonnull IMap map,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return mapWithMerging(map.getName(), toKeyFn, toValueFn, mergeFn);
    }

    /**
     * Convenience for {@link #remoteMapWithMerging} with {@link Entry} as
     * input item.
     */
    @Nonnull
    public static  Sink> remoteMapWithMerging(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return remoteMapWithMerging(mapName, clientConfig, Entry::getKey, entryValue(), mergeFn);
    }

    /**
     * The same as the {@link #remoteMapWithMerging(String, ClientConfig, BinaryOperatorEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @since 5.4
     */
    @Nonnull
    public static  Sink> remoteMapWithMerging(
            @Nonnull String mapName,
            @Nonnull DataConnectionRef dataConnectionRef,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return remoteMapWithMerging(mapName, dataConnectionRef, Entry::getKey, entryValue(), mergeFn);
    }

    /**
     * Returns a sink equivalent to {@link #mapWithMerging(String, BinaryOperatorEx)},
     * but for a map in a remote Hazelcast cluster identified by the supplied
     * {@code ClientConfig}.
     * 

     * Due to the used API, the remote cluster must be at least version 4.0.
     */
    @Nonnull
    public static  Sink remoteMapWithMerging(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return Sinks.mapBuilder(mapName)
                .clientConfig(clientConfig)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .mergeFn(mergeFn)
                .build();
    }

    /**
     * The same as the {@link #remoteMapWithMerging(String, ClientConfig, FunctionEx, FunctionEx, BinaryOperatorEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @since 5.4
     */
    @Nonnull
    public static  Sink remoteMapWithMerging(
            @Nonnull String mapName,
            @Nonnull DataConnectionRef dataConnectionRef,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx toValueFn,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return Sinks.mapBuilder(mapName)
                .dataConnectionRef(dataConnectionRef)
                .toKeyFn(toKeyFn)
                .toValueFn(toValueFn)
                .mergeFn(mergeFn)
                .build();
    }

    /**
     * Convenience for {@link #mapWithMerging(String, FunctionEx, FunctionEx,
     * BinaryOperatorEx)} with {@link Entry} as input item.
     */
    @Nonnull
    public static  Sink> mapWithMerging(
            @Nonnull String mapName,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return Sinks., K, V>mapWithMerging(mapName, entryKey(), entryValue(), mergeFn);
    }

    /**
     * Convenience for {@link #mapWithMerging(IMap, FunctionEx, FunctionEx,
     * BinaryOperatorEx)} with {@link Entry} as input item.
     */
    @Nonnull
    public static  Sink> mapWithMerging(
            @Nonnull IMap map,
            @Nonnull BinaryOperatorEx mergeFn
    ) {
        return mapWithMerging(map.getName(), mergeFn);
    }


    /**
     * Returns a sink that uses the supplied key-extracting and value-updating
     * functions to update a Hazelcast {@code IMap}. For each item it receives, it
     * applies {@code toKeyFn} to get the key and then applies {@code updateFn} to
     * the existing value in the map and the received item to acquire the new
     * value to associate with the key. If the new value is {@code null}, it
     * removes the key from the map. Expressed as code, the sink performs the
     * equivalent of the following for each item:
     * 
     * K key = toKeyFn.apply(item);
     * V oldValue = map.get(key);
     * V newValue = updateFn.apply(oldValue, item);
     * if (newValue == null)
     *     map.remove(key);
     * else
     *     map.put(key, newValue);
     * 
     * 
     * This sink supports exactly-once processing only if the
     * supplied update function performs idempotent updates, i.e., it
     * satisfies the rule {@code updateFn.apply(v, e).equals(v)} for any
     * {@code e} that was already observed.
     * 

     * Note: This operation is NOT lock-aware, it will process the entries
     * no matter if they are locked or not.
     * Use {@link #mapWithEntryProcessor} if you need locking.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param mapName  name of the map
     * @param toKeyFn  function that extracts the key from the input item
     * @param updateFn function that receives the existing map value and the item
     *                 and returns the new map value
     * @param       input item type
     * @param       key type
     * @param       value type
     */
    @Nonnull
    public static  Sink mapWithUpdating(
            @Nonnull String mapName,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull BiFunctionEx updateFn
    ) {
        return Sinks.mapBuilder(mapName)
                .toKeyFn(toKeyFn)
                .updateFn(updateFn)
                .build();
    }

    /**
     * Returns a sink that uses the supplied key-extracting and value-updating
     * functions to update a Hazelcast {@code IMap}. For each item it receives, it
     * applies {@code toKeyFn} to get the key and then applies {@code updateFn} to
     * the existing value in the map and the received item to acquire the new
     * value to associate with the key. If the new value is {@code null}, it
     * removes the key from the map. Expressed as code, the sink performs the
     * equivalent of the following for each item:
     * 
     * K key = toKeyFn.apply(item);
     * V oldValue = map.get(key);
     * V newValue = updateFn.apply(oldValue, item);
     * if (newValue == null)
     *     map.remove(key);
     * else
     *     map.put(key, newValue);
     * 
     * NOTE: Jet only remembers the name of the map you supply
     * and acquires a map with that name on the local cluster. If you supply a
     * map instance from another cluster, no error will be thrown to indicate
     * this.
     * 
     * This sink supports exactly-once processing only if the supplied update
     * function performs idempotent updates, i.e., it satisfies the rule
     * {@code updateFn.apply(v, e).equals(v)} for any {@code e} that was
     * already observed.
     * 

     * Note: This operation is not lock-aware, it will process the entries
     * even if they are locked. Use {@link #mapWithEntryProcessor} if you need
     * locking.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param map      map to drain to
     * @param toKeyFn  function that extracts the key from the input item
     * @param updateFn function that receives the existing map value and the item
     *                 and returns the new map value
     * @param       input item type
     * @param       key type
     * @param       value type
     */
    @Nonnull
    public static  Sink mapWithUpdating(
            @Nonnull IMap map,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull BiFunctionEx updateFn
    ) {
        return mapWithUpdating(map.getName(), toKeyFn, updateFn);
    }

    /**
     * Returns a sink equivalent to {@link #mapWithUpdating}, but for a map
     * in a remote Hazelcast cluster identified by the supplied {@code
     * ClientConfig}.
     * 

     * Due to the used API, the remote cluster must be at least version 4.0.
     */
    @Nonnull
    public static  Sink remoteMapWithUpdating(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull BiFunctionEx updateFn
    ) {
        return Sinks.mapBuilder(mapName)
                .clientConfig(clientConfig)
                .toKeyFn(toKeyFn)
                .updateFn(updateFn)
                .build();
    }

    /**
     * The same as the {@link #remoteMapWithUpdating(String, ClientConfig, BiFunctionEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @since 5.4
     */
    @Nonnull
    public static  Sink remoteMapWithUpdating(
            @Nonnull String mapName,
            @Nonnull DataConnectionRef dataConnectionRef,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull BiFunctionEx updateFn
    ) {
        return Sinks.mapBuilder(mapName)
                .dataConnectionRef(dataConnectionRef)
                .toKeyFn(toKeyFn)
                .updateFn(updateFn)
                .build();
    }

    /**
     * Convenience for {@link #mapWithUpdating(String, FunctionEx,
     * BiFunctionEx)} with {@link Entry} as the input item.
     */
    @Nonnull
    public static > Sink mapWithUpdating(
            @Nonnull String mapName,
            @Nonnull BiFunctionEx updateFn
    ) {
        return mapWithUpdating(mapName, entryKey(), updateFn);
    }

    /**
     * Convenience for {@link #mapWithUpdating(IMap, FunctionEx,
     * BiFunctionEx)} with {@link Entry} as the input item.
     */
    @Nonnull
    public static > Sink mapWithUpdating(
            @Nonnull IMap map,
            @Nonnull BiFunctionEx updateFn
    ) {
        return mapWithUpdating(map.getName(), updateFn);
    }

    /**
     * Convenience for {@link #remoteMapWithUpdating} with {@link Entry} as
     * input item.
     */
    @Nonnull
    public static > Sink remoteMapWithUpdating(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull BiFunctionEx updateFn
    ) {
        return Sinks.mapBuilder(mapName)
                .clientConfig(clientConfig)
                .toKeyFn(Entry::getKey)
                .updateFn(updateFn)
                .build();
    }

    /**
     * The same as the {@link #remoteMapWithUpdating(String, ClientConfig, BiFunctionEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @since 5.4
     */
    @Nonnull
    public static > Sink remoteMapWithUpdating(
            @Nonnull String mapName,
            @Nonnull DataConnectionRef dataConnectionRef,
            @Nonnull BiFunctionEx updateFn
    ) {
        return Sinks.mapBuilder(mapName)
                .dataConnectionRef(dataConnectionRef)
                .toKeyFn(Entry::getKey)
                .updateFn(updateFn)
                .build();
    }

    /**
     * Convenience for {@link #mapWithEntryProcessor(int, String, FunctionEx, FunctionEx)}
     * when the maximum number of async operations is not specified.
     */
    @Nonnull
    public static  Sink mapWithEntryProcessor(
            @Nonnull String mapName,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx> toEntryProcessorFn
    ) {
        return Sinks.mapEntryProcessorBuilder(mapName)
                .toKeyFn(toKeyFn)
                .toEntryProcessorFn(toEntryProcessorFn)
                .build();
    }

    /**
     * Returns a sink that uses the items it receives to create {@code
     * EntryProcessor}s it submits to a Hazelcast {@code IMap} with the
     * specified name. For each received item it applies {@code toKeyFn} to
     * get the key and {@code toEntryProcessorFn} to get the entry processor,
     * and then submits the key and the entry processor to the Hazelcast
     * cluster, which will internally apply the entry processor to the key.
     * 

     * As opposed to {@link #mapWithUpdating} and {@link #mapWithMerging},
     * this sink does not use batching and submits a separate entry processor
     * for each received item. For use cases that are efficiently solvable
     * using those sinks, this one will perform worse. It should be used only
     * when they are not applicable.
     * 

     * If your entry processors take a long time to update a value, consider
     * using entry processors that implement {@link Offloadable}. This will
     * avoid blocking the Hazelcast partition thread during large update
     * operations.
     * 

     * This sink supports exactly-once processing only if the
     * supplied entry processor performs idempotent updates, i.e.,
     * the resulting value would be the same if an entry processor
     * was run on the same entry more than once.
     * 

     * Note: Unlike {@link #mapWithUpdating} and {@link #mapWithMerging},
     * this operation is lock-aware. If the key is locked,
     * the EntryProcessor will wait until it acquires the lock.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param maxParallelAsyncOps maximum number of simultaneous entry
     *                            processors affecting the map
     * @param mapName             name of the map
     * @param toKeyFn             function that extracts the key from the input item
     * @param toEntryProcessorFn  function that returns the {@code EntryProcessor}
     *                            to apply to the key
     * @param                  input item type
     * @param                  key type
     * @param                  value type
     */
    @Nonnull
    public static  Sink mapWithEntryProcessor(
            int maxParallelAsyncOps,
            @Nonnull String mapName,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx> toEntryProcessorFn
    ) {
        return Sinks.mapEntryProcessorBuilder(mapName)
                .maxParallelAsyncOps(maxParallelAsyncOps)
                .toKeyFn(toKeyFn)
                .toEntryProcessorFn(toEntryProcessorFn)
                .build();
    }

    /**
     * Returns a sink that uses the items it receives to create {@code
     * EntryProcessor}s it submits to a Hazelcast {@code IMap} with the
     * specified name. For each received item it applies {@code toKeyFn} to
     * get the key and {@code toEntryProcessorFn} to get the entry processor,
     * and then submits the key and the entry processor to the Hazelcast
     * cluster, which will internally apply the entry processor to the key.
     * 

     * NOTE: Jet only remembers the name of the map you supply
     * and acquires a map with that name on the local cluster. If you supply a
     * map instance from another cluster, no error will be thrown to indicate
     * this.
     * 

     * As opposed to {@link #mapWithUpdating} and {@link #mapWithMerging},
     * this sink does not use batching and submits a separate entry processor
     * for each received item. For use cases that are efficiently solvable
     * using those sinks, this one will perform worse. It should be used only
     * when they are not applicable.
     * 

     * If your entry processors take a long time to update a value, consider
     * using entry processors that implement {@link Offloadable}. This will
     * avoid blocking the Hazelcast partition thread during large update
     * operations.
     * 

     * This sink supports exactly-once processing only if the supplied entry
     * processor performs idempotent updates, i.e., the resulting value
     * would be the same if an entry processor was run on the same entry more
     * than once.
     * 

     * Note: Unlike {@link #mapWithUpdating} and {@link #mapWithMerging},
     * this operation is lock-aware. If the key is locked,
     * the EntryProcessor will wait until it acquires the lock.
     * 

     * The default local parallelism for this sink is 1.
     * 

     * The given functions must be stateless and {@linkplain
     * Processor#isCooperative() cooperative}.
     *
     * @param map                map to drain to
     * @param toKeyFn            function that extracts the key from the input item
     * @param toEntryProcessorFn function that returns the {@code EntryProcessor} to apply to the key
     * @param                 input item type
     * @param                 key type
     * @param                 value type
     */
    @Nonnull
    public static  Sink mapWithEntryProcessor(
            @Nonnull IMap map,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx> toEntryProcessorFn
    ) {
        return mapWithEntryProcessor(map.getName(), toKeyFn, toEntryProcessorFn);
    }

    /**
     * Returns a sink equivalent to {@link #mapWithEntryProcessor}, but for a map
     * in a remote Hazelcast cluster identified by the supplied {@code
     * ClientConfig}.
     */
    @Nonnull
    public static  Sink remoteMapWithEntryProcessor(
            @Nonnull String mapName,
            @Nonnull ClientConfig clientConfig,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx> toEntryProcessorFn
    ) {
        return Sinks.mapEntryProcessorBuilder(mapName)
                .clientConfig(clientConfig)
                .toKeyFn(toKeyFn)
                .toEntryProcessorFn(toEntryProcessorFn)
                .build();
    }

    /**
     * The same as the {@link #remoteMapWithEntryProcessor(String, ClientConfig, FunctionEx, FunctionEx)}
     * method. The only difference is instead of a ClientConfig parameter that
     * is used to connect to remote cluster, this method receives a
     * DataConnectionConfig.
     * 

     * The DataConnectionConfig caches the connection to remote cluster, so that it
     * can be re-used
     *
     * @since 5.4
     */
    public static  Sink remoteMapWithEntryProcessor(
            @Nonnull String mapName,
            @Nonnull DataConnectionRef dataConnectionRef,
            @Nonnull FunctionEx toKeyFn,
            @Nonnull FunctionEx> toEntryProcessorFn
    ) {
        return Sinks.mapEntryProcessorBuilder(mapName)
                .dataConnectionName(dataConnectionRef)
                .toKeyFn(toKeyFn)
                .toEntryProcessorFn(toEntryProcessorFn)
                .build();
    }

    /**
     * Returns a sink that puts {@code Map.Entry}s it receives into a Hazelcast
     * {@code ICache} with the specified name.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 2.
     */
    @Nonnull
    public static  Sink cache(@Nonnull String cacheName) {
        //noinspection Convert2MethodRef (provokes a javac 9 bug)
        return new SinkImpl<>("cacheSink(" + cacheName + ')', writeCacheP(cacheName), Entry::getKey);
    }

    /**
     * Returns a sink that puts {@code Map.Entry}s it receives into a Hazelcast
     * {@code ICache} with the specified name in a remote cluster identified by
     * the supplied {@code ClientConfig}.
     * 

     * This sink provides the exactly-once guarantee thanks to idempotent
     * updates. It means that the value with the same key is not appended,
     * but overwritten. After the job is restarted from snapshot, duplicate
     * items will not change the state in the target map.
     * 

     * The default local parallelism for this sink is 2.
     */
    @Nonnull
    public static  Sink remoteCache(
            @Nonnull String cacheName,
            @Nonnull ClientConfig clientConfig
    ) {
        return fromProcessor("remoteCacheSink(" + cacheName + ')', writeRemoteCacheP(cacheName, clientConfig));
    }

    /**
     * Returns a sink that adds the items it receives to a Hazelcast {@code
     * IList} with the specified name.
     * 

     * No state is saved to snapshot for this sink. After the job is restarted,
     * the items will likely be duplicated, providing an at-least-once
     * guarantee.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink list(@Nonnull String listName) {
        return fromProcessor("listSink(" + listName + ')', writeListP(listName));
    }

    /**
     * Returns a sink that adds the items it receives to the specified
     * Hazelcast {@code IList}.
     * 

     * NOTE: Jet only remembers the name of the list you
     * supply and acquires a list with that name on the local cluster. If you
     * supply a list instance from another cluster, no error will be thrown to
     * indicate this.
     * 

     * No state is saved to snapshot for this sink. After the job is restarted,
     * the items will likely be duplicated, providing an at-least-once
     * guarantee.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink list(@Nonnull IList list) {
        return list(list.getName());
    }

    /**
     * Returns a sink that adds the items it receives to a Hazelcast {@code
     * IList} with the specified name in a remote cluster identified by the
     * supplied {@code ClientConfig}.
     * 

     * No state is saved to snapshot for this sink. After the job is restarted,
     * the items will likely be duplicated, providing an at-least-once
     * guarantee.
     * 

     * The default local parallelism for this sink is 1.
     */
    @Nonnull
    public static  Sink remoteList(@Nonnull String listName, @Nonnull ClientConfig clientConfig) {
        return fromProcessor("remoteListSink(" + listName + ')', writeRemoteListP(listName, clientConfig));
    }

    /**
     * Returns a sink which publishes the items it receives to a distributed
     * reliable topic with the specified name.
     * 

     * No state is saved to snapshot for this sink. After the job is restarted,
     * the items will likely be duplicated, providing an at-least-once
     * guarantee.
     * 

     * Local parallelism for this sink is 1.
     *
     * @since Jet 4.0
     */
    @Nonnull
    public static  Sink reliableTopic(@Nonnull String reliableTopicName) {
        return SinkBuilder.>sinkBuilder("reliableTopicSink(" + reliableTopicName + "))",
                        SecuredFunctions.reliableTopicFn(reliableTopicName))
                .receiveFn(ITopic::publish)
                .permission(new ReliableTopicPermission(reliableTopicName, ACTION_CREATE, ACTION_PUBLISH))
                .build();
    }

    /**
     * Returns a sink which publishes the items it receives to the provided
     * distributed reliable topic. More precisely, it takes the name
     * of the given {@code ITopic} and then independently retrieves the {@code
     * ITopic} with the same name from the cluster where the job is running. To
     * prevent surprising behavior, make sure you have obtained the {@code
     * ITopic} from the same cluster to which you will submit the pipeline.
     * 

     * No state is saved to snapshot for this sink. After the job is restarted,
     * the items will likely be duplicated, providing an at-least-once
     * guarantee.
     * 

     * Local parallelism for this sink is 1.
     *
     * @since Jet 4.0
     */
    @Nonnull
    public static  Sink reliableTopic(@Nonnull ITopic
com.hazelcast.jet.pipeline.Sinks Maven / Gradle / Ivy

Fault tolerance

File name structure

Notes