All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.huaweicloud.dws.client.binlog.collector.BinlogApi Maven / Gradle / Ivy

/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved.
 */

package com.huaweicloud.dws.client.binlog.collector;

import com.huaweicloud.dws.client.TableConfig;
import com.huaweicloud.dws.client.binlog.model.BinlogRecord;
import com.huaweicloud.dws.client.binlog.model.BinlogRecordType;
import com.huaweicloud.dws.client.binlog.model.Slot;
import com.huaweicloud.dws.client.model.Constants;
import lombok.extern.slf4j.Slf4j;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

/**
 * @ProjectName: dws-connectors
 * @Description: 和内核交互的api,待与内核联调
 * @Date: 2023/09/19 11:09
 * @Version: 1.0
 **/
@Slf4j
public class BinlogApi {

    // 记录数据节点的个数,如节点个数发现变化则需要重新获取
    public static final ThreadLocal NODE_COUNT = ThreadLocal.withInitial(() -> 0);

    /**
     * 获取同步点信息(node_name、node_id、last_sync_point、latest_sync_point):
     * 1. 需要注意区分GTM和GTM-free场景,GTM场景所有dn的startCsn和endCsn都相同
     * 2. 如果startCsn返回-1,这种情况就是全量同步
     *
     * @param connection jdbc连接信息
     * @param tableName 表名
     * @param slotName 槽名
     * @param nodeId 获取对应dn上的同步点
     * @param isCheckpoint 是否是checkpoint流程
     * @param isNeedRedistribution 是否兼容扩容重分布流程
     * @return 同步点信息
     */
    public static List getSyncPoint(Connection connection, String tableName, String slotName, int nodeId,
        boolean isCheckpoint, boolean isNeedRedistribution) throws SQLException {
        String sql;
        if (isNeedRedistribution) {
            // 需要传入节点个数
            sql = "select * from pg_catalog.pgxc_get_binlog_sync_point(?, ?, ?, ?, ?);";
        } else {
            sql = "select * from pg_catalog.pgxc_get_binlog_sync_point(?, ?, ?, ?);";
        }
        try (PreparedStatement statement = connection.prepareStatement(sql)) {
            statement.setString(1, tableName);
            statement.setString(2, slotName);
            statement.setBoolean(3, isCheckpoint);
            statement.setInt(4, nodeId);
            if (isNeedRedistribution) {
                statement.setInt(5, NODE_COUNT.get());
            }
            try (ResultSet resultSet = statement.executeQuery()) {
                List slots = new ArrayList<>();
                while (resultSet.next()) {
                    int dnNodeId = resultSet.getInt("node_id");
                    long startCsn = resultSet.getLong("last_sync_point");
                    long endCsn = resultSet.getLong("latest_sync_point");
                    long xmin = resultSet.getLong("xmin");

                    Slot slot = new Slot();
                    slot.slotDNNodeId(dnNodeId)
                        .startCsn(startCsn)
                        .currentStartCsn(startCsn)
                        .endCsn(endCsn)
                        .setXmin(xmin);
                    slots.add(slot);
                }
                // 按照dnNodeId排序返回
                return slots.stream().sorted(Comparator.comparing(Slot::getDnNodeId)).collect(Collectors.toList());
            }
        }
    }

    public static String getSelectValues(List columnNames, TableConfig tableConfig) {
        // 是否使用新的系统字段(注意要和内核版本匹配)
        boolean newSystemValue = tableConfig.isNewSystemValue();
        // 是否包含系统字段
        boolean containBinlogSysValue = tableConfig.isContainBinlogSysValue();
        if (containBinlogSysValue) {
            // 如果包含系统字段,直接返回,即select *,不然会出现二义性
            return Constants.SELECT_ALL;
        }
        String defaultColumns;
        if (newSystemValue) {
            defaultColumns = Constants.NEW_SYNC_POINT + ", " + Constants.NEW_EVENT_SEQUENCE + ", " + Constants.NEW_TYPE + ", ";
        } else {
            defaultColumns = Constants.OLD_SYNC_POINT + ", " + Constants.OLD_EVENT_SEQUENCE + ", " +  Constants.OLD_TYPE + ", ";
        }
        defaultColumns += String.join(", ", columnNames);
        return defaultColumns;
    }

    /**
     * 获取binlog信息
     *
     * @param connection jdbc连接信息
     * @param slot slot信息
     * @param columnNames 需要获取哪些列信息
     * @param tableConfig 表的配置信息
     * @param queue 队列信息
     * @param running 外部程序是否在运行(用于快速停止)
     * @return 解码后的binlog信息
     */
    public static int getBinlogRecords(Connection connection, Slot slot, List columnNames,
        TableConfig tableConfig, BlockingQueue queue, AtomicBoolean running) throws Exception {
        // 获取TableConfig中属性
        int fetchSize = tableConfig.getBinlogBatchReadSize();
        String tableName = tableConfig.getTableName();
        // 获取slot中属性
        long startCsn = slot.getCurrentStartCsn();
        long endCsn = slot.getConsumeEndScn(fetchSize);
        int dnNodeId = slot.getDnNodeId();
        long xmin = slot.getXmin();
        // 设置fetchSize得关闭自动提交
        connection.setAutoCommit(false);
        // 获取select的字段
        String selectValues = getSelectValues(columnNames, tableConfig);
        // 参数: tableName、dnNodeId、startCsn、endScn
        String sql = String.format(Locale.ROOT, "select %s from pg_catalog.pgxc_get_binlog_changes('%s', ?, ?, ?) order by 1,2;",
            selectValues, tableName);
        long start = System.currentTimeMillis();
        try (PreparedStatement statement = connection.prepareStatement(sql)) {
            statement.setFetchSize(fetchSize);
            statement.setInt(1, dnNodeId);
            statement.setLong(2, startCsn);
            statement.setLong(3, endCsn);
            try (ResultSet resultSet = statement.executeQuery()) {
                long end = System.currentTimeMillis();
                int size = 0;
                // 按照fetchSize循环读取
                while (resultSet.next() && running.get()) {
                    long syncPoint = resultSet.getLong(Constants.SYNC_POINT_IDX);
                    int typeIdx = Constants.TYPE_IDX;
                    String type = resultSet.getString(typeIdx);

                    BinlogRecord binlogRecord = new BinlogRecord();
                    List columnValues = new ArrayList<>();
                    Slot newSlot = new Slot().slotDNNodeId(dnNodeId)
                        .startCsn(startCsn)
                        .endCsn(endCsn)
                        .currentStartCsn(syncPoint)
                        .xmin(xmin);
                    for (int idx = typeIdx; idx < columnNames.size() + typeIdx; idx++) {
                        columnValues.add(resultSet.getObject(idx + 1));
                    }
                    binlogRecord.setColumnValues(columnValues);
                    binlogRecord.setSlot(newSlot);

                    binlogRecord.setType(BinlogRecordType.getBinlogRecordType(type).getIndex());
                    size++;
                    queue.put(binlogRecord);
                    if (fetchSize != 0 && size % fetchSize == 0) {
                        log.info("incremental sync {} data in cycle, cost: {}", size, System.currentTimeMillis() - start);
                    }
                }
                if (size > 0) {
                    log.info(
                        "incremental sync binlog info: dnNodeId: {}, consumeStartCsn: {}, consumeEndCsn: {}, size: {}, " +
                                "pgxc_get_binlog_changes cost time: {}, total cost time: {}",
                        dnNodeId, startCsn, endCsn, size, end - start, System.currentTimeMillis() - start);
                }
                return size;
            }
        } finally {
            connection.setAutoCommit(true);
        }
    }

    /**
     * 全量同步binlog信息
     *
     * @param connection jdbc连接信息
     * @param tableName 表名
     * @param dnNodeId 对应要读取dn的nodeId
     * @param columnNames 需要获取哪些列信息
     * @param fetchSize fetchSize大小
     * @param queue 数据队列
     * @param running 外部程序是否在运行(用于快速停止)
     * @return 解码后的binlog信息
     */
    public static int fullSyncBinlog(Connection connection, String tableName, int dnNodeId, List columnNames,
        int fetchSize, BlockingQueue queue, AtomicBoolean running) throws Exception {
        int size = 0;
        String sql = String.format(Locale.ROOT, "select %s from pg_catalog.pgxc_full_sync_binlog_records('%s', ?);",
            String.join(", ", columnNames), tableName);
        connection.setAutoCommit(false);
        log.info("start full sync binlog..., sql: {}", sql);
        long start = System.currentTimeMillis();
        try (PreparedStatement statement = connection.prepareStatement(sql)) {
            statement.setFetchSize(fetchSize);
            statement.setInt(1, dnNodeId);
            try (ResultSet resultSet = statement.executeQuery()) {
                log.info("pgxc_full_sync_binlog_records cost time: {}, fetchSize: {}",
                    System.currentTimeMillis() - start, fetchSize);
                // 按照fetchSize循环读取
                while (resultSet.next() && running.get()) {
                    BinlogRecord binlogRecord = new BinlogRecord();
                    List columnValues = new ArrayList<>();
                    for (String columnName : columnNames) {
                        columnValues.add(resultSet.getObject(columnName));
                    }
                    binlogRecord.setColumnValues(columnValues);
                    binlogRecord.setType(BinlogRecordType.INSERT.getIndex());
                    size++;
                    queue.put(binlogRecord);
                    if (fetchSize != 0 && size % fetchSize == 0) {
                        log.info("full sync {} data in cycle, cost: {}", size, System.currentTimeMillis() - start);
                    }
                }
                int finalSize = size;
                log.info("full sync binlog info: tableName: {}, dnNodeId: {}, size: {}, cost time: {}", tableName,
                    dnNodeId, finalSize, System.currentTimeMillis() - start);
                return size;
            }
        } finally {
            connection.setAutoCommit(true);
        }
    }

    /**
     * 更新同步点信息
     *
     * @param connection jdbc连接信息
     * @param tableName 表名
     * @param slotName 槽名
     * @param endCsn 同步点结束位置
     * @param dnNodeId 要更新的dn下标信息
     * @param xmin xmin
     * @param isCheckPoint 是否是checkpoint时调用
     * @param queryTimeout 更新超时时间
     */
    public static void updateSyncPoint(Connection connection, String tableName, String slotName, long endCsn,
        int dnNodeId, long xmin, boolean isCheckPoint, int queryTimeout) throws SQLException {
        String sql = "select * from pg_catalog.pgxc_register_binlog_sync_point(?, ?, ?, ?, ?, ?);";
        try (PreparedStatement ps = connection.prepareStatement(sql)) {
            if (isCheckPoint) {
                // 如果flink并发度小于dn个数,则存在一个并发任务去读取多个dn数据,设置checkpoint的执行时间来隔离dn间的相互影响
                ps.setQueryTimeout(queryTimeout);
            }
            ps.setString(1, tableName);
            ps.setString(2, slotName);
            ps.setInt(3, dnNodeId);
            ps.setLong(4, endCsn);
            ps.setBoolean(5, isCheckPoint);
            ps.setLong(6, xmin);
            ps.executeQuery();
        }
    }

    /**
     * 获取dn节点对应的nodeId
     * 如果传入了tableName则调用系统函数pgxc_get_exec_node_id来获取,该函数适配了扩容重分布逻辑。
     * 场景:
     * 一开始有3个DN节点,但是需要扩容到6个DN节点,此时OM会先新增3个DN节点,然后再重分布。
     * 如果从pgxc_node来获取节点信息时,无论重分布是否成功都会返回6个节点;
     * 如果调用pgxc_get_exec_node_id系统函数来获取节点信息时,如果重分布失败时则还是返回以前的3个DN节点
     * 
     * @param connection jdbc连接信息
     * @param tableName 表名
     * @return dn对应oid列表
     */
    public static List getNodeIds(Connection connection, String tableName) throws SQLException {
        List nodeIds = new ArrayList<>();
        String sql;
        boolean hasTableName = Objects.nonNull(tableName);
        if (hasTableName) {
            sql = "select * from pg_catalog.pgxc_get_exec_node_id(?);";
        } else {
            sql = "select node_id,node_name from pgxc_node where node_type = 'D' order by node_name;";
        }
        try (PreparedStatement statement = connection.prepareStatement(sql)) {
            if (hasTableName) {
                statement.setString(1, tableName);
            }
            try (ResultSet resultSet = statement.executeQuery()) {
                while (resultSet.next()) {
                    nodeIds.add(resultSet.getInt(1));
                }
            }
        }
        if (hasTableName) {
            NODE_COUNT.set(nodeIds.size());
        }
        log.info("execute sql: {}, node count: {}, thread info: {}", sql, nodeIds.size(),
            Thread.currentThread().getName());
        return nodeIds;
    }

    public static List getNodeIdsWithTableName(Connection connection, String tableName) throws SQLException {
        List nodeIds = new ArrayList<>();
        String sql = "select * from pg_catalog.pgxc_get_exec_node_id(?);";
        try (PreparedStatement statement = connection.prepareStatement(sql)) {
            statement.setString(1, tableName);
            try (ResultSet resultSet = statement.executeQuery()) {
                while (resultSet.next()) {
                    nodeIds.add(resultSet.getInt(1));
                }
            }
        }
        return nodeIds;
    }
}