
com.huaweicloud.dws.client.binlog.reader.ParallelBinlogReader Maven / Gradle / Ivy
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved.
*/
package com.huaweicloud.dws.client.binlog.reader;
import com.huaweicloud.dws.client.DwsConfig;
import com.huaweicloud.dws.client.binlog.collector.BinlogApi;
import com.huaweicloud.dws.client.binlog.model.BinlogRecord;
import com.huaweicloud.dws.client.binlog.model.Slot;
import com.huaweicloud.dws.client.worker.DwsConnectionPool;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* @ProjectName: dws-connector
* @Description: 并行的binlogReader,可以利用flink多节点能力来消费binlog。和BinlogReader区别如下:
* 1. BinlogReader使用flink单个节点多线程读取,在主线程中获取同步点以及提交同步点,保证同一批同步点同时消费和提交,
* 避免各个dn消费进度差距很大
* 2. ParallelBinlogReader使用flink的并发能力,在每个节点中分独自完成获取同步、消费同步点以及提交同步点动作,如果有数据倾斜,
* 则可能导致各个节点间的同步点差距较大
* @Date: 2023/12/11 15:40
* @Version 1.0
**/
@Slf4j
public class ParallelBinlogReader extends BinlogReader {
// 保存当前所需消费的同步点信息
private final List currentSlots = new ArrayList<>();
private final int taskId;
@Getter
private final List pendingNodeIds = new ArrayList<>();
public ParallelBinlogReader(DwsConfig config, BlockingQueue queue, List specifySlots,
List columnNames, DwsConnectionPool dwsConnectionPool, int taskId) {
super(config, queue, specifySlots, columnNames, dwsConnectionPool);
this.taskId = taskId;
setCurrentSlots(currentSlots);
// 设置线程名,便于区分
setBinlogWorkerThreadPrefix("[taskId=" + taskId + "]:binlog-worker");
setBinlogGetRecordThreadPrefix("[taskId=" + taskId + "]:binlog-get-record");
log.info("init ParallelBinlogReader: taskId: {}", taskId);
}
public ParallelBinlogReader(DwsConfig dwsConfig, BlockingQueue queue, List columnNames,
DwsConnectionPool dwsConnectionPool, int taskId) {
this(dwsConfig, queue, null, columnNames, dwsConnectionPool, taskId);
}
@Override
public void initCurrentSlots() throws SQLException {
initOrUpdateCurrentSlots();
}
private void initOrUpdateCurrentSlots() throws SQLException {
initPendingNodeIds();
// 根据nodeIds获取对应的同步点信息
List slotsInDb = getSlotsInDb();
if (currentSlots.size() == 0) {
currentSlots.addAll(slotsInDb);
log.info("init currentSlots: slotsInDb: {}, currentSlots: {}", slotsInDb, currentSlots);
return;
}
Map slotMap = slotsInDb.stream().collect(Collectors.toMap(Slot::getDnNodeId, Function.identity()));
// 更新currentSlots
currentSlots.forEach(slot -> {
int dnNodeId = slot.getDnNodeId();
if (!slotMap.containsKey(dnNodeId)) {
log.warn("dnNodeId: {} not in currentSlots: [{}]", dnNodeId, currentSlots);
return;
}
// 更新对应消费点
Slot tempSlot = slotMap.get(dnNodeId);
slot.setStartCsn(tempSlot.getStartCsn());
slot.setCurrentStartCsn(tempSlot.getCurrentStartCsn());
slot.setEndCsn(tempSlot.getEndCsn());
});
}
public void initPendingNodeIds() throws SQLException {
if (getNodeIds().size() == 0) {
try (Connection connection = getDwsConnectionPool().getConnection()) {
if (isNeedRedistribution()) {
getNodeIds().addAll(BinlogApi.getNodeIds(connection, getTableName()));
} else {
getNodeIds().addAll(BinlogApi.getNodeIds(connection, null));
}
}
}
// 根据nodeIds来获取对应的slot信息
int size = getNodeIds().size();
if (taskId + 1 > size) {
// 说明任务量比dn数大,多余的任务直接返回
log.warn("task number is larger than dn number, stop...");
stop();
return;
}
if (isNeedRedistribution()) {
BinlogApi.NODE_COUNT.set(size);
}
if (pendingNodeIds.size() == 0) {
// 初始化pendingNodeIds(现在是完全并行的,所以一个reader只处理一个节点)
pendingNodeIds.add(getNodeIds().get(taskId));
log.info("taskId: {}, init pendingNodeIds: {}, nodeIds: {}", taskId, pendingNodeIds, getNodeIds());
}
}
@Override
public List getSlotsInDb() throws SQLException {
List slots = new ArrayList<>();
for (int nodeId : pendingNodeIds) {
try (Connection connection = getDwsConnectionPool().getConnection()) {
slots.addAll(BinlogApi.getSyncPoint(connection, getTableName(), getSlotName(), nodeId, false,
isNeedRedistribution()));
}
}
return slots.stream().sorted(Comparator.comparing(Slot::getDnNodeId)).collect(Collectors.toList());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy