All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.ververica.cdc.connectors.postgres.PostgreSQLSource Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.ververica.cdc.connectors.postgres;

import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction;
import io.debezium.connector.postgresql.PostgresConnector;

import java.time.Duration;
import java.util.Properties;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * A builder to build a SourceFunction which can read snapshot and continue to consume binlog for
 * PostgreSQL.
 */
public class PostgreSQLSource {

    private static final long DEFAULT_HEARTBEAT_MS = Duration.ofMinutes(5).toMillis();

    public static  Builder builder() {
        return new Builder<>();
    }

    /** Builder class of {@link PostgreSQLSource}. */
    public static class Builder {

        private String pluginName = "decoderbufs";
        private String slotName = "flink";
        private int port = 5432; // default 5432 port
        private String hostname;
        private String database;
        private String username;
        private String password;
        private String[] schemaList;
        private String[] tableList;
        private Properties dbzProperties;
        private DebeziumDeserializationSchema deserializer;

        /**
         * The name of the Postgres logical decoding plug-in installed on the server. Supported
         * values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,
         * wal2json_rds_streaming and pgoutput.
         */
        public Builder decodingPluginName(String name) {
            this.pluginName = name;
            return this;
        }

        public Builder hostname(String hostname) {
            this.hostname = hostname;
            return this;
        }

        /** Integer port number of the PostgreSQL database server. */
        public Builder port(int port) {
            this.port = port;
            return this;
        }

        /** The name of the PostgreSQL database from which to stream the changes. */
        public Builder database(String database) {
            this.database = database;
            return this;
        }

        /**
         * An optional list of regular expressions that match schema names to be monitored; any
         * schema name not included in the whitelist will be excluded from monitoring. By default
         * all non-system schemas will be monitored.
         */
        public Builder schemaList(String... schemaList) {
            this.schemaList = schemaList;
            return this;
        }

        /**
         * An optional list of regular expressions that match fully-qualified table identifiers for
         * tables to be monitored; any table not included in the whitelist will be excluded from
         * monitoring. Each identifier is of the form schemaName.tableName. By default the connector
         * will monitor every non-system table in each monitored schema.
         */
        public Builder tableList(String... tableList) {
            this.tableList = tableList;
            return this;
        }

        /**
         * Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.
         */
        public Builder username(String username) {
            this.username = username;
            return this;
        }

        /** Password to use when connecting to the PostgreSQL database server. */
        public Builder password(String password) {
            this.password = password;
            return this;
        }

        /**
         * The name of the PostgreSQL logical decoding slot that was created for streaming changes
         * from a particular plug-in for a particular database/schema. The server uses this slot to
         * stream events to the connector that you are configuring. Default is "flink".
         *
         * 

Slot names must conform to PostgreSQL * replication slot naming rules, which state: "Each replication slot has a name, which * can contain lower-case letters, numbers, and the underscore character." */ public Builder slotName(String slotName) { this.slotName = slotName; return this; } /** The Debezium Postgres connector properties. */ public Builder debeziumProperties(Properties properties) { this.dbzProperties = properties; return this; } /** * The deserializer used to convert from consumed {@link * org.apache.kafka.connect.source.SourceRecord}. */ public Builder deserializer(DebeziumDeserializationSchema deserializer) { this.deserializer = deserializer; return this; } public DebeziumSourceFunction build() { Properties props = new Properties(); props.setProperty("connector.class", PostgresConnector.class.getCanonicalName()); props.setProperty("plugin.name", pluginName); // hard code server name, because we don't need to distinguish it, docs: // Logical name that identifies and provides a namespace for the particular PostgreSQL // database server/cluster being monitored. The logical name should be unique across // all other connectors, since it is used as a prefix for all Kafka topic names coming // from this connector. Only alphanumeric characters and underscores should be used. props.setProperty("database.server.name", "postgres_cdc_source"); props.setProperty("database.hostname", checkNotNull(hostname)); props.setProperty("database.dbname", checkNotNull(database)); props.setProperty("database.user", checkNotNull(username)); props.setProperty("database.password", checkNotNull(password)); props.setProperty("database.port", String.valueOf(port)); props.setProperty("slot.name", slotName); // we have to enable heartbeat for PG to make sure DebeziumChangeConsumer#handleBatch // is invoked after job restart props.setProperty("heartbeat.interval.ms", String.valueOf(DEFAULT_HEARTBEAT_MS)); if (schemaList != null) { props.setProperty("schema.whitelist", String.join(",", schemaList)); } if (tableList != null) { props.setProperty("table.whitelist", String.join(",", tableList)); } if (dbzProperties != null) { dbzProperties.forEach(props::put); } return new DebeziumSourceFunction<>(deserializer, props, null); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy