
com.huaweicloud.dws.client.collector.TableActionCollector Maven / Gradle / Ivy
package com.huaweicloud.dws.client.collector;
import com.huaweicloud.dws.client.DwsConfig;
import com.huaweicloud.dws.client.TableConfig;
import com.huaweicloud.dws.client.action.AbstractAction;
import com.huaweicloud.dws.client.action.PutAction;
import com.huaweicloud.dws.client.exception.DwsClientException;
import com.huaweicloud.dws.client.exception.DwsClientRecordException;
import com.huaweicloud.dws.client.model.ColumnKey;
import com.huaweicloud.dws.client.model.Record;
import com.huaweicloud.dws.client.model.TableSchema;
import com.huaweicloud.dws.client.util.AssertUtil;
import com.huaweicloud.dws.client.util.LogUtil;
import com.huaweicloud.dws.client.worker.ExecutionPool;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
/**
* @ProjectName: dws-connector
* @ClassName: TableActionCollector
* @Description: 表事件收集, 用于按表维处理事件
* @Date: 2023/1/10 15:48
* @Version: 1.0
*/
@Slf4j
public class TableActionCollector {
/**
* 缓存最大超过系数
*/
public static final int MAX_OUT_WEIGH_RATIO = 2;
/**
* 记录当前收集器正在处理的表定义,如果表结构改变这里会被体现
*/
private TableSchema currentTableSchema;
/**
* 表中操作缓存
*/
private final RecordBuffer buffer;
private final ExecutionPool pool;
private final DwsConfig config;
private final List> backgroundTasks = new LinkedList<>();
public TableActionCollector(DwsConfig config, ExecutionPool pool) {
this.config = config;
this.pool = pool;
this.buffer = new RecordBuffer(config);
}
public synchronized void collector(Record record) throws DwsClientException {
// 收集操作是,判断表结构有没变更,如果变更直接入库后再收集新的
if (currentTableSchema != null && !currentTableSchema.equals(record.getTableSchema())) {
log.info("schema change. table = {}", currentTableSchema.getTableName());
commit();
}
// 刷库后就会将字段清空,重新缓存
if (currentTableSchema == null) {
currentTableSchema = record.getTableSchema();
}
// 将数据入缓存
buffer.write(record);
LogUtil.withLogSwitch(config, () -> log.info("write record successful. current buffer size = {}", buffer.getSize()));
TableConfig tableConfig = config.getTableConfig(currentTableSchema.getTableName());
if (buffer.flush() && buffer.getSize() > tableConfig.getAutoFlushBatchSize() * tableConfig.getBatchOutWeighRatio()) {
// 只有在数量超过容量规定倍数后 才强制业务线程提交刷库,否则等待定时任务刷库即可
synchronized (backgroundTasks) {
backgroundTasks.addAll(flush());
}
}
}
private void commit() throws DwsClientException {
try {
DwsClientException exception = null;
DwsClientRecordException recordException = null;
for (AbstractAction> action : flush()) {
try {
if (recordException == null) {
recordException = DwsClientRecordException.fromAction(action);
continue;
}
DwsClientRecordException finalRecordException = recordException;
Optional.ofNullable(DwsClientRecordException.fromAction(action)).ifPresent(finalRecordException::merge);
} catch (DwsClientException e) {
log.error("action error.", e);
exception = e;
}
}
AssertUtil.isNull(recordException, recordException);
AssertUtil.isNull(exception, exception);
} catch (Exception e) {
throw DwsClientException.fromException(e);
}
}
public synchronized List> flush() throws DwsClientException {
// 同一张表中的数据,按列宽分类对齐后,同列宽批量入库
try {
List> tasks = new LinkedList<>();
List records = buffer.getRecords();
List delRecords = buffer.getDelRecords();
if (!delRecords.isEmpty()) {
long startTime = System.currentTimeMillis();
log.info("will flush buffer to dws, buffer size {}, ", records.size());
PutAction action = new PutAction(records, config);
while (!pool.submit(action)) {
LogUtil.withLogSwitch(config, () -> log.info("try submit action."));
}
log.info("submit action successful. use time = {}", System.currentTimeMillis() - startTime);
tasks.add(action);
return tasks;
}
writeNoDelete(tasks);
synchronized (backgroundTasks) {
if (!backgroundTasks.isEmpty()) {
tasks.addAll(backgroundTasks);
backgroundTasks.clear();
}
}
return tasks;
} finally {
// 清空缓存
buffer.clear();
currentTableSchema = null;
}
}
private void writeNoDelete(List> tasks) throws DwsClientRecordException {
DwsClientRecordException exception = null;
Map> cache = new HashMap<>();
for (Record record : buffer.getRecords()) {
cache.computeIfAbsent(new ColumnKey(record.getColumnBit(), record.getIgnoreUpdate()),
columnKey -> new ArrayList<>(buffer.getSize())).add(record);
}
for (Map.Entry> entry : cache.entrySet()) {
try {
long startTime = System.currentTimeMillis();
log.info("will flush buffer to dws, buffer size {}, ", entry.getValue().size());
PutAction action = new PutAction(entry.getValue(), config);
while (!pool.submit(action)) {
LogUtil.withLogSwitch(config, () -> log.info("try submit action."));
}
log.info("submit action successful. use time = {}", System.currentTimeMillis() - startTime);
tasks.add(action);
} catch (DwsClientRecordException e) {
if (exception == null) {
exception = e;
} else {
exception.merge(e);
}
} catch (Exception unknown) {
DwsClientRecordException clientRecordException = new DwsClientRecordException(DwsClientException.fromException(unknown), entry.getValue());
if (exception == null) {
exception = clientRecordException;
} else {
exception.merge(clientRecordException);
}
}
}
if (exception != null) {
throw exception;
}
}
/**
* 根据 缓存是否达到刷库要求决定是否需要刷库
* @return
*/
public synchronized List> tryFlush() throws DwsClientException {
if (buffer.flush() || !backgroundTasks.isEmpty()) {
List> flush = buffer.flush() ? flush() : new LinkedList<>();
synchronized (backgroundTasks) {
if (!backgroundTasks.isEmpty()) {
flush.addAll(backgroundTasks);
backgroundTasks.clear();
}
}
return flush;
}
return null;
}
public TableSchema getTableSchema() {
return currentTableSchema;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy