All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.cdc.CdcSinks Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright (c) 2008-2020, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.cdc;

import com.hazelcast.client.config.ClientConfig;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.jet.cdc.impl.WriteCdcP;
import com.hazelcast.jet.core.ProcessorMetaSupplier;
import com.hazelcast.jet.core.ProcessorSupplier;
import com.hazelcast.jet.impl.connector.AbstractHazelcastConnectorSupplier;
import com.hazelcast.jet.impl.pipeline.SinkImpl;
import com.hazelcast.jet.pipeline.Sink;
import com.hazelcast.map.IMap;
import com.hazelcast.spi.properties.HazelcastProperty;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import static com.hazelcast.jet.cdc.Operation.DELETE;
import static com.hazelcast.jet.impl.pipeline.SinkImpl.Type.DISTRIBUTED_PARTITIONED;
import static com.hazelcast.jet.impl.util.ImdgUtil.asXmlString;
import static java.util.concurrent.TimeUnit.SECONDS;

/**
 * Contains factory methods for change data capture specific pipeline
 * sinks. As a consequence these sinks take {@link ChangeRecord} items
 * as their input.
 * 

* These sinks can detect any reordering that might happen in * the {@code ChangeRecord} stream (Jet pipelines use parallel * execution, so item reordering can and does happen). Reordering * detection is based on implementation-specific sequence numbers * provided by CDC event sources. The sink reacts to reordering by * dropping obsolete input items. The exact behavior is as follows. For * each input item, the sink: *

  1. * applies the {@code keyFn} to the input item to extract its key *
  2. * extracts the item's sequence number *
  3. * compares the sequence number with the previously seen sequence number * for the same key, if any *
  4. * if the previous sequence number is more recent than the one observed in * the input item, it drops (ignores) the input item *
*

* About the implementation-specific sequence numbers provided by the * CDC sources. They consist of two parts: *

  • * numeric sequence for which a monotonically increasing * value is emitted by the source and which allows ordering of the * event *
  • * source descriptor which allows us to identify situations * when the numeric sequence gets reset or any other events when * comparing new numeric values with previous ones no longer makes * sense *
*

* The sequence source is made up of information like ID of the database * instance the connector is connected to, name of the binlog file being * monitored and so on. So whenever the source reconnects to a new server * or switches to a new binlog file or other such event, the source field * of sequence numbers will change. *

* The logic of determining which event are more recent takes the sequence * source into consideration. Whenever the source field changes, the event * carrying it will be considered more recent than ones with the old * source value. Numeric sequence numbers are compared to establish order * only when their sources match. *

* Restarting the CDC Jet source will not change sequence number sources, * only significant changes on the database side will. * * @since 4.2 */ public final class CdcSinks { /** * Number of seconds for which the sink will remember the last seen * sequence number for an input key (used to detect reordering). After * this time the last-seen sequence number values will eventually be * evicted, in order to save space. *

* The default value is 10 seconds. * * @since 4.2 */ public static final HazelcastProperty SEQUENCE_CACHE_EXPIRATION_SECONDS = new HazelcastProperty("jet.cdc.sink.sequence.cache.expiration.seconds", 10, SECONDS); private CdcSinks() { } /** * Returns a sink that applies the changes described by a Change Data * Capture (CDC) stream to an {@code IMap}. The main usage is to have * the {@code IMap} mirror the contents of the data table that is the * source of the CDC stream, but since it accepts arbitrary key and * value functions, other behaviors are possible as well. *

* NOTE: in order for the sink behavior to be predictable, * the map should be non-existent or empty at the time the sink starts * using it. *

* For each item the sink receives, it uses the {@code keyFn} to * determine which map key the change event applies to. Then, based * on the {@code ChangeRecord}'s {@code Operation} it decides to * either: *

  • * delete the key from the map ({@link Operation#DELETE}) *
  • * insert a new value for the key * ({@link Operation#SYNC} & {@link Operation#INSERT}) *
  • * update the current value for the key ({@link Operation#UPDATE}) *
* For insert and update operations, the sink determines the new value * by applying the provided {@code valueFn} to the change record. *

* NOTE: if {@code valueFn} returns {@code null}, * then the key will be deleted no matter the operation (ie. even for * update and insert records). * * @since 4.2 */ @Nonnull public static Sink map( @Nonnull String mapName, @Nonnull FunctionEx keyFn, @Nonnull FunctionEx valueFn ) { String name = "mapCdcSink(" + mapName + ')'; return sink(name, mapName, null, keyFn, valueFn); } /** * Returns a sink that applies the changes described by a Change Data * Capture (CDC) stream to an {@code IMap}. The main usage is to have * the {@code IMap} mirror the contents of the data table that is the * source of the CDC stream, but since it accepts arbitrary key and * value functions, other behaviors are possible as well. *

* NOTE: in order for the sink behavior to be predictable, * the map should be non-existent or empty at the time the sink starts * using it. *

* For each item the sink receives it uses the {@code keyFn} to * determine which map key the change event applies to. Then, based * on the {@code ChangeRecord}'s {@code Operation} it decides to * either: *

  • * delete the key from the map ({@link Operation#DELETE}) *
  • * insert a new value for the key * ({@link Operation#SYNC} & {@link Operation#INSERT}) *
  • * update the current value for the key ({@link Operation#UPDATE}) *
* For insert and update operations, the sink determines the new value * by applying the provided {@code valueFn} to the change record. *

* NOTE: if {@code valueFn} returns {@code null}, * then the key will be deleted no matter the operation (ie. even for * update and insert records). * * @since 4.2 */ @Nonnull public static Sink map( @Nonnull IMap map, @Nonnull FunctionEx keyFn, @Nonnull FunctionEx valueFn ) { return map(map.getName(), keyFn, valueFn); } /** * Returns a sink equivalent to {@link #map}, but for a map in a remote * Hazelcast cluster identified by the supplied {@code ClientConfig}. *

* NOTE 1: in order for the sink behavior to be * predictable, the map should be non-existent or empty at the time the * sink starts * using it. *

* NOTE 2: if {@code valueFn} returns {@code null}, * then the key will be deleted no matter the operation (ie. even for * update and insert records). *

* Due to the used API, the remote cluster must be at least version 4.0. * * @since 4.2 */ @Nonnull public static Sink remoteMap( @Nonnull String mapName, @Nonnull ClientConfig clientConfig, @Nonnull FunctionEx keyFn, @Nonnull FunctionEx valueFn ) { String name = "remoteMapCdcSink(" + mapName + ')'; return sink(name, mapName, clientConfig, keyFn, valueFn); } @Nonnull private static Sink sink( @Nonnull String name, @Nonnull String map, @Nullable ClientConfig clientConfig, @Nonnull FunctionEx keyFn, @Nonnull FunctionEx valueFn ) { FunctionEx toValueFn = record -> DELETE.equals(record.operation()) ? null : valueFn.apply(record); ProcessorSupplier supplier = AbstractHazelcastConnectorSupplier.of(asXmlString(clientConfig), instance -> new WriteCdcP<>(instance, map, keyFn, toValueFn)); ProcessorMetaSupplier metaSupplier = ProcessorMetaSupplier.of(supplier); return new SinkImpl<>(name, metaSupplier, DISTRIBUTED_PARTITIONED, keyFn); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy