All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.huaweicloud.dws.client.binlog.reader.ParallelBinlogReader Maven / Gradle / Ivy

/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved.
 */

package com.huaweicloud.dws.client.binlog.reader;

import com.huaweicloud.dws.client.DwsConfig;
import com.huaweicloud.dws.client.binlog.collector.BinlogApi;
import com.huaweicloud.dws.client.binlog.model.BinlogRecord;
import com.huaweicloud.dws.client.binlog.model.Slot;
import com.huaweicloud.dws.client.worker.DwsConnectionPool;

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.function.Function;
import java.util.stream.Collectors;

/**
 * @ProjectName: dws-connector
 * @Description: 并行的binlogReader,可以利用flink多节点能力来消费binlog。和BinlogReader区别如下:
 *               1. BinlogReader使用flink单个节点多线程读取,在主线程中获取同步点以及提交同步点,保证同一批同步点同时消费和提交,
 *               避免各个dn消费进度差距很大
 *               2. ParallelBinlogReader使用flink的并发能力,在每个节点中分独自完成获取同步、消费同步点以及提交同步点动作,如果有数据倾斜,
 *               则可能导致各个节点间的同步点差距较大
 * @Date: 2023/12/11 15:40
 * @Version 1.0
 **/
@Slf4j
public class ParallelBinlogReader extends BinlogReader {
    // 保存当前所需消费的同步点信息
    private final List currentSlots = new ArrayList<>();

    private final int taskId;

    @Getter
    private final List pendingNodeIds = new ArrayList<>();

    public ParallelBinlogReader(DwsConfig config, BlockingQueue queue, List specifySlots,
        List columnNames, DwsConnectionPool dwsConnectionPool, int taskId) {
        super(config, queue, specifySlots, columnNames, dwsConnectionPool);
        this.taskId = taskId;
        setCurrentSlots(currentSlots);
        // 设置线程名,便于区分
        setBinlogWorkerThreadPrefix("[taskId=" + taskId + "]:binlog-worker");
        setBinlogGetRecordThreadPrefix("[taskId=" + taskId + "]:binlog-get-record");
        log.info("init ParallelBinlogReader: taskId: {}", taskId);
    }

    public ParallelBinlogReader(DwsConfig dwsConfig, BlockingQueue queue, List columnNames,
        DwsConnectionPool dwsConnectionPool, int taskId) {
        this(dwsConfig, queue, null, columnNames, dwsConnectionPool, taskId);
    }

    @Override
    public void initCurrentSlots() throws SQLException {
        initOrUpdateCurrentSlots();
    }

    private void initOrUpdateCurrentSlots() throws SQLException {
        initPendingNodeIds();
        // 根据nodeIds获取对应的同步点信息
        List slotsInDb = getSlotsInDb();
        if (currentSlots.size() == 0) {
            currentSlots.addAll(slotsInDb);
            log.info("init currentSlots: slotsInDb: {}, currentSlots: {}", slotsInDb, currentSlots);
            return;
        }
        Map slotMap = slotsInDb.stream().collect(Collectors.toMap(Slot::getDnNodeId, Function.identity()));
        // 更新currentSlots
        currentSlots.forEach(slot -> {
            int dnNodeId = slot.getDnNodeId();
            if (!slotMap.containsKey(dnNodeId)) {
                log.warn("dnNodeId: {} not in currentSlots: [{}]", dnNodeId, currentSlots);
                return;
            }
            // 更新对应消费点
            Slot tempSlot = slotMap.get(dnNodeId);
            slot.setStartCsn(tempSlot.getStartCsn());
            slot.setCurrentStartCsn(tempSlot.getCurrentStartCsn());
            slot.setEndCsn(tempSlot.getEndCsn());
        });
    }

    public void initPendingNodeIds() throws SQLException {
        if (getNodeIds().size() == 0) {
            try (Connection connection = getDwsConnectionPool().getConnection()) {
                if (isNeedRedistribution()) {
                    getNodeIds().addAll(BinlogApi.getNodeIds(connection, getTableName()));
                } else {
                    getNodeIds().addAll(BinlogApi.getNodeIds(connection, null));
                }
            }
        }
        // 根据nodeIds来获取对应的slot信息
        int size = getNodeIds().size();
        if (taskId + 1 > size) {
            // 说明任务量比dn数大,多余的任务直接返回
            log.warn("task number is larger than dn number, stop...");
            stop();
            return;
        }
        if (isNeedRedistribution()) {
            BinlogApi.NODE_COUNT.set(size);
        }
        if (pendingNodeIds.size() == 0) {
            // 初始化pendingNodeIds(现在是完全并行的,所以一个reader只处理一个节点)
            pendingNodeIds.add(getNodeIds().get(taskId));
            log.info("taskId: {}, init pendingNodeIds: {}, nodeIds: {}", taskId, pendingNodeIds, getNodeIds());
        }
    }

    @Override
    public List getSlotsInDb() throws SQLException {
        List slots = new ArrayList<>();
        for (int nodeId : pendingNodeIds) {
            try (Connection connection = getDwsConnectionPool().getConnection()) {
                slots.addAll(BinlogApi.getSyncPoint(connection, getTableName(), getSlotName(), nodeId, false,
                    isNeedRedistribution()));
            }
        }
        return slots.stream().sorted(Comparator.comparing(Slot::getDnNodeId)).collect(Collectors.toList());
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy