com.alibaba.ververica.cdc.connectors.mysql.MySQLSource Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.ververica.cdc.connectors.mysql;
import com.alibaba.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction;
import com.alibaba.ververica.cdc.debezium.internal.DebeziumOffset;
import io.debezium.connector.mysql.MySqlConnector;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import static org.apache.flink.util.Preconditions.checkNotNull;
/** A builder to build a SourceFunction which can read snapshot and continue to consume binlog. */
public class MySQLSource {
private static final String DATABASE_SERVER_NAME = "mysql_binlog_source";
public static Builder builder() {
return new Builder<>();
}
/** Builder class of {@link MySQLSource}. */
public static class Builder {
private int port = 3306; // default 3306 port
private String hostname;
private String[] databaseList;
private String username;
private String password;
private Integer serverId;
private String serverTimeZone;
private String[] tableList;
private Properties dbzProperties;
private StartupOptions startupOptions = StartupOptions.initial();
private DebeziumDeserializationSchema deserializer;
public Builder hostname(String hostname) {
this.hostname = hostname;
return this;
}
/** Integer port number of the MySQL database server. */
public Builder port(int port) {
this.port = port;
return this;
}
/**
* An optional list of regular expressions that match database names to be monitored; any
* database name not included in the whitelist will be excluded from monitoring. By default
* all databases will be monitored.
*/
public Builder databaseList(String... databaseList) {
this.databaseList = databaseList;
return this;
}
/**
* An optional list of regular expressions that match fully-qualified table identifiers for
* tables to be monitored; any table not included in the list will be excluded from
* monitoring. Each identifier is of the form databaseName.tableName. By default the
* connector will monitor every non-system table in each monitored database.
*/
public Builder tableList(String... tableList) {
this.tableList = tableList;
return this;
}
/** Name of the MySQL database to use when connecting to the MySQL database server. */
public Builder username(String username) {
this.username = username;
return this;
}
/** Password to use when connecting to the MySQL database server. */
public Builder password(String password) {
this.password = password;
return this;
}
/**
* The session time zone in database server, e.g. "America/Los_Angeles". It controls how the
* TIMESTAMP type in MYSQL converted to STRING. See more
* https://debezium.io/documentation/reference/1.2/connectors/mysql.html#_temporal_values
*/
public Builder serverTimeZone(String timeZone) {
this.serverTimeZone = timeZone;
return this;
}
/**
* A numeric ID of this database client, which must be unique across all currently-running
* database processes in the MySQL cluster. This connector joins the MySQL database cluster
* as another server (with this unique ID) so it can read the binlog. By default, a random
* number is generated between 5400 and 6400, though we recommend setting an explicit value.
*/
public Builder serverId(int serverId) {
this.serverId = serverId;
return this;
}
/** The Debezium MySQL connector properties. For example, "snapshot.mode". */
public Builder debeziumProperties(Properties properties) {
this.dbzProperties = properties;
return this;
}
/**
* The deserializer used to convert from consumed {@link
* org.apache.kafka.connect.source.SourceRecord}.
*/
public Builder deserializer(DebeziumDeserializationSchema deserializer) {
this.deserializer = deserializer;
return this;
}
/** Specifies the startup options. */
public Builder startupOptions(StartupOptions startupOptions) {
this.startupOptions = startupOptions;
return this;
}
public DebeziumSourceFunction build() {
Properties props = new Properties();
props.setProperty("connector.class", MySqlConnector.class.getCanonicalName());
// hard code server name, because we don't need to distinguish it, docs:
// Logical name that identifies and provides a namespace for the particular MySQL
// database
// server/cluster being monitored. The logical name should be unique across all other
// connectors,
// since it is used as a prefix for all Kafka topic names emanating from this connector.
// Only alphanumeric characters and underscores should be used.
props.setProperty("database.server.name", DATABASE_SERVER_NAME);
props.setProperty("database.hostname", checkNotNull(hostname));
props.setProperty("database.user", checkNotNull(username));
props.setProperty("database.password", checkNotNull(password));
props.setProperty("database.port", String.valueOf(port));
props.setProperty("database.history.skip.unparseable.ddl", String.valueOf(true));
if (serverId != null) {
props.setProperty("database.server.id", String.valueOf(serverId));
}
if (databaseList != null) {
props.setProperty("database.whitelist", String.join(",", databaseList));
}
if (tableList != null) {
props.setProperty("table.whitelist", String.join(",", tableList));
}
if (serverTimeZone != null) {
props.setProperty("database.serverTimezone", serverTimeZone);
}
DebeziumOffset specificOffset = null;
switch (startupOptions.startupMode) {
case INITIAL:
props.setProperty("snapshot.mode", "initial");
break;
case EARLIEST_OFFSET:
props.setProperty("snapshot.mode", "never");
break;
case LATEST_OFFSET:
props.setProperty("snapshot.mode", "schema_only");
break;
case SPECIFIC_OFFSETS:
// if binlog offset is specified, 'snapshot.mode=schema_only_recovery' must
// be configured. It only snapshots the schemas, not the data,
// and continue binlog reading from the specified offset
props.setProperty("snapshot.mode", "schema_only_recovery");
specificOffset = new DebeziumOffset();
Map sourcePartition = new HashMap<>();
sourcePartition.put("server", DATABASE_SERVER_NAME);
specificOffset.setSourcePartition(sourcePartition);
Map sourceOffset = new HashMap<>();
sourceOffset.put("file", startupOptions.specificOffsetFile);
sourceOffset.put("pos", startupOptions.specificOffsetPos);
specificOffset.setSourceOffset(sourceOffset);
break;
case TIMESTAMP:
checkNotNull(deserializer);
props.setProperty("snapshot.mode", "never");
deserializer =
new SeekBinlogToTimestampFilter<>(
startupOptions.startupTimestampMillis, deserializer);
break;
default:
throw new UnsupportedOperationException();
}
if (dbzProperties != null) {
dbzProperties.forEach(props::put);
}
return new DebeziumSourceFunction<>(deserializer, props, specificOffset);
}
}
}