com.aliyun.datahub.client.example.examples.ReadExample Maven / Gradle / Ivy
package com.aliyun.datahub.client.example.examples;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReadExample {
public static void main(String[] args) {
ReaderTuple rt = new ReaderTuple();
rt.getRecords();
// ReaderBlob rb = new ReaderBlob();
// rb.getRecords();
}
}
class ReaderTuple {
private DatahubClient datahubClient;
private RecordSchema schema;
private Map mThread;
public ReaderTuple() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig(Constant.endpoint,
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount(Constant.accessId, Constant.accessKey), true))
.build();
mThread = new HashMap<>();
}
// shard列表可能发生更新,更新thread列表
public void freshThread() {
try {
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
List list = new ArrayList<>();
for (ShardEntry entry : listShardResult.getShards()) {
// 对新生成的shard,每个shard开一个thread去进行数据读取
if (!mThread.containsKey(entry.getShardId())) {
Thread thread = new ReadThread(3, entry.getShardId(), schema);
mThread.put(entry.getShardId(), thread);
list.add(thread);
thread.start();
}
}
for (Thread thread : list) {
thread.join();
}
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public void getRecords() {
try {
schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
// 每个shard一个线程进行数据读取
for (ShardEntry entry : listShardResult.getShards()) {
Thread thread = new ReadThread(3, entry.getShardId(), schema);
mThread.put(entry.getShardId(), thread);
thread.start();
}
for (Thread thread : mThread.values()) {
thread.join();
}
} catch (DatahubClientException e) {
// other error
e.printStackTrace();
throw e;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
class ReadThread extends Thread {
private int maxRetry;
private int recordLimit = 1000;
private int noDataSleepTimeMs = 5000;
private String shardId;
private String cursor;
private RecordSchema schema;
private void init() {
try {
// 这里获取读取有效数据中的第一条的cursor,也可以选择其他方式
cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
}
}
private void handleRecords(List records) {
// 消费数据
for (RecordEntry re : records) {
TupleRecordData data = (TupleRecordData) re.getRecordData();
String res = shardId + "\t";
for (Field field : schema.getFields()) {
res += field.getName() + ":" + data.getField(field.getName()) + "\t";
}
System.out.println(res);
}
}
public ReadThread(int maxRetry, String shardId, RecordSchema schema) {
this.shardId = shardId;
this.maxRetry = maxRetry;
this.schema = schema;
init();
}
@Override
public void run() {
int retryNum = 0;
while (retryNum < maxRetry) {
try {
GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.topicName, shardId, schema, cursor, recordLimit);
if (result.getRecordCount() <= 0) {
// 无数据,sleep后读取
System.out.printf("%s no data, sleep %d seconds\n", shardId, noDataSleepTimeMs);
Thread.sleep(noDataSleepTimeMs);
continue;
}
// 消费数据
handleRecords(result.getRecords());
// 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
cursor = result.getNextCursor();
retryNum = 0;
} catch (InvalidParameterException e) {
// invalid parameter, please check your parameter
e.printStackTrace();
throw e;
} catch (AuthorizationFailureException e) {
// AK error, please check your accessId and accessKey
e.printStackTrace();
throw e;
} catch (ResourceNotFoundException e) {
// project or topic not found
e.printStackTrace();
throw e;
} catch (SeekOutOfRangeException e) {
// offset invalid
e.printStackTrace();
throw e;
} catch (ShardSealedException e) {
// throw ShardSealedException when shard status is CLOSED and all data has been read
System.out.printf("shard %s all data has been read\n", shardId);
// 捕获到ShardSealedException异常,一般情况是,是发生了shard分裂或者合并的情况,所以要更新读数据的线程列表
freshThread();
break;
} catch (LimitExceededException e) {
// limit exceed, retry
e.printStackTrace();
retryNum++;
} catch (DatahubClientException e) {
// other error, retry
e.printStackTrace();
retryNum++;
} catch (InterruptedException e) {
e.printStackTrace();
System.exit(-1);
}
}
}
}
}
class ReaderBlob {
private DatahubClient datahubClient;
private Map mThread;
public ReaderBlob() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig(Constant.endpoint,
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount(Constant.accessId, Constant.accessKey), true))
.build();
mThread = new HashMap<>();
}
// shard列表可能发生更新,更新thread列表
public void freshThread() {
try {
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);
List list = new ArrayList<>();
for (ShardEntry entry : listShardResult.getShards()) {
// 对新生成的shard,每个shard开一个thread去进行数据读取
if (!mThread.containsKey(entry.getShardId())) {
Thread thread = new ReadThread(3, entry.getShardId());
mThread.put(entry.getShardId(), thread);
list.add(thread);
thread.start();
}
}
for (Thread thread : list) {
thread.join();
}
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public void getRecords() {
try {
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);
// 每个shard一个线程进行数据读取
List threadList = new ArrayList<>();
for (ShardEntry entry : listShardResult.getShards()) {
Thread thread = new ReadThread(3, entry.getShardId());
threadList.add(thread);
thread.start();
}
for (Thread thread : threadList) {
thread.join();
}
} catch (DatahubClientException e) {
// other error
e.printStackTrace();
throw e;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
class ReadThread extends Thread {
private int maxRetry;
private int recordLimit = 1000;
private int noDataSleepTimeMs = 5000;
private String shardId;
private String cursor;
private void init() {
try {
// 这里获取读取有效数据中的第一条的cursor,也可以选择其他方式
cursor = datahubClient.getCursor(Constant.projectName, Constant.blobTopicName, shardId, CursorType.OLDEST).getCursor();
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
}
}
private void handleRecords(List records) {
// 消费数据
for (RecordEntry re : records) {
BlobRecordData data = (BlobRecordData) re.getRecordData();
String sData = new String(data.getData());
String res = shardId + "\t" + sData;
System.out.println(res);
}
}
public ReadThread(int maxRetry, String shardId) {
this.shardId = shardId;
this.maxRetry = maxRetry;
init();
}
@Override
public void run() {
int retryNum = 0;
while (retryNum < maxRetry) {
try {
GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.blobTopicName, shardId, cursor, recordLimit);
if (result.getRecordCount() <= 0) {
// 无数据,sleep后读取
System.out.printf("%s no data, sleep %d seconds\n", shardId, noDataSleepTimeMs);
Thread.sleep(noDataSleepTimeMs);
continue;
}
// 消费数据
handleRecords(result.getRecords());
// 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
cursor = result.getNextCursor();
retryNum = 0;
} catch (InvalidParameterException e) {
// invalid parameter, please check your parameter
e.printStackTrace();
throw e;
} catch (AuthorizationFailureException e) {
// AK error, please check your accessId and accessKey
e.printStackTrace();
throw e;
} catch (ResourceNotFoundException e) {
// project or topic not found
e.printStackTrace();
throw e;
} catch (SeekOutOfRangeException e) {
// offset invalid
e.printStackTrace();
throw e;
} catch (ShardSealedException e) {
// throw ShardSealedException when shard status is CLOSED and all data has been read
System.out.printf("shard %s all data has been read\n", shardId);
// 捕获到ShardSealedException异常,一般情况是,是发生了shard分裂或者合并的情况,所以要更新读数据的线程列表
freshThread();
break;
} catch (LimitExceededException e) {
// limit exceed, retry
e.printStackTrace();
retryNum++;
} catch (DatahubClientException e) {
// other error, retry
e.printStackTrace();
retryNum++;
} catch (InterruptedException e) {
e.printStackTrace();
System.exit(-1);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy