com.aliyun.datahub.client.example.examples.OffsetConsumptionExample Maven / Gradle / Ivy
package com.aliyun.datahub.client.example.examples;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;
import java.util.*;
//点位消费示例,以消费tuple类型为例
public class OffsetConsumptionExample {
public static void main(String[] args) {
Consumption cs = new Consumption();
cs.offset_consumption();
}
}
class Consumption {
private DatahubClient datahubClient;
private RecordSchema schema;
private String subId;
public Consumption() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig(Constant.endpoint,
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount(Constant.accessId, Constant.accessKey), true))
.build();
try {
// 获取schema
schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
}
subId = "";
}
// 点位消费示例,在消费过程中进行点位的提交
// 消费数据过后如果未提交点位,那么可能会出现重复消费的情况
public void offset_consumption() {
try {
RecordSchema schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
List shardIdList = new ArrayList<>();
for (ShardEntry entry : listShardResult.getShards()) {
shardIdList.add(entry.getShardId());
}
// 一次性初始化所有shard的点位
OpenSubscriptionSessionResult openSubscriptionSessionResult = datahubClient.openSubscriptionSession(Constant.projectName, Constant.topicName, subId, shardIdList);
// 每个shard一个线程进行数据读取
List threadList = new ArrayList<>();
for (String shardId : shardIdList) {
Thread thread = new ConsumerThread(3, shardId, subId, schema, openSubscriptionSessionResult.getOffsets().get(shardId), datahubClient);
threadList.add(thread);
thread.start();
System.out.println(shardId);
break;
}
for (Thread thread : threadList) {
thread.join();
}
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
class ConsumerThread extends Thread {
private int maxRetry;
private String shardId;
private String subId;
private RecordSchema schema;
private SubscriptionOffset subscriptionOffset;
private String cursor;
private DatahubClient datahubClient;
private void init() {
/* 1、获取当前点位的cursor,如果当前点位已过期则获取生命周期内第一条record的cursor,未消费同样获取生命周期内第一条record的cursor */
cursor = "";
//sequence < 0说明还未消费
if (subscriptionOffset.getSequence() < 0) {
// 获取生命周期内第一条record的cursor
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
} else {
// 获取下一条记录的Cursor
long nextSequence = subscriptionOffset.getSequence() + 1;
try {
//按照SEQUENCE getCursor可能报SeekOutOfRange错误,表示当前cursor的数据已过期
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SEQUENCE, nextSequence).getCursor();
} catch (SeekOutOfRangeException e) {
// 获取生命周期内第一条record的cursor
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
} catch (DatahubClientException e) {
// other error
e.printStackTrace();
throw e;
}
}
}
public ConsumerThread(int maxRetry, String shardId, String subId, RecordSchema schema, SubscriptionOffset subscriptionOffset,
DatahubClient datahubClient) {
this.maxRetry = maxRetry;
this.shardId = shardId;
this.subId = subId;
this.schema = schema;
this.subscriptionOffset = subscriptionOffset;
this.datahubClient = datahubClient;
init();
}
@Override
public void run() {
long recordCount = 0L;
// 每次最多读取数据量
int fetchNum = 1000;
int retryNum = 0;
// 消费数据量为commitNum时,更新一次点位
int commitNum = 2000;
int sleepTimeMs = 5000;
while (retryNum < maxRetry) {
try {
GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.topicName, shardId, schema, cursor, fetchNum);
if (result.getRecordCount() <= 0) {
// 无数据,sleep后读取
System.out.printf("no data, sleep %d second\n", sleepTimeMs / 1000);
Thread.sleep(sleepTimeMs);
continue;
}
for (RecordEntry recordEntry : result.getRecords()) {
// 消费数据
TupleRecordData data = (TupleRecordData) recordEntry.getRecordData();
String res = "string_field:" + data.getField("string_field") + "\t"
+ "string_field:" + data.getField("string_field");
System.out.println(Thread.currentThread() + "\t" + res);
recordCount++;
// 更新点位
if (recordCount % commitNum == 0) {
// 设置点位
subscriptionOffset.setSequence(recordEntry.getSequence());
subscriptionOffset.setTimestamp(recordEntry.getSystemTime());
//提交点位点位
Map offsetMap = new HashMap<>();
offsetMap.put(shardId, subscriptionOffset);
datahubClient.commitSubscriptionOffset(Constant.projectName, Constant.topicName, subId, offsetMap);
System.out.println(Thread.currentThread() + " commit offset successful");
}
}
// 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
cursor = result.getNextCursor();
retryNum = 0;
} catch (SubscriptionOfflineException | SubscriptionSessionInvalidException e) {
// 退出. Offline: 订阅下线; SessionChange: 表示订阅被其他客户端同时消费
e.printStackTrace();
throw e;
} catch (SubscriptionOffsetResetException e) {
// 点位被重置,需要重新获取SubscriptionOffset版本信息
List shardIds = Arrays.asList(shardId);
SubscriptionOffset offset = datahubClient.getSubscriptionOffset(Constant.projectName, Constant.topicName, subId, shardIds).getOffsets().get(shardId);
subscriptionOffset.setVersionId(offset.getVersionId());
// 点位被重置之后,需要重新获取点位,获取点位的方法应该与重置点位时一致,
// 如果重置点位时,同时设置了sequence和timestamp,那么既可以用SEQUENCE获取,也可以用SYSTEM_TIME获取
// 如果重置点位时,只设置了sequence,那么只能用sequence获取,
// 如果重置点位时,只设置了timestamp,那么只能用SYSTEM_TIME获取点位
// 一般情况下,优先使用SEQUENCE,其次是SYSTEM_TIME,如果都失败,则采用OLDEST获取
cursor = null;
CursorType type = CursorType.SEQUENCE;
while (cursor == null) {
if (type == CursorType.SEQUENCE) {
try {
long nextSequence = offset.getSequence() + 1;
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SEQUENCE, nextSequence).getCursor();
} catch (DatahubClientException exception) {
type = CursorType.SYSTEM_TIME;
}
} else if (type == CursorType.SYSTEM_TIME) {
try {
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SYSTEM_TIME, offset.getTimestamp()).getCursor();
} catch (DatahubClientException exception) {
type = CursorType.OLDEST;
}
} else {
try {
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
} catch (DatahubClientException exception) {
System.exit(1);
}
}
}
} catch (LimitExceededException e) {
// limit exceed, retry
e.printStackTrace();
retryNum++;
} catch (DatahubClientException e) {
// other error, retry
e.printStackTrace();
retryNum++;
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy