com.aliyun.datahub.client.example.examples.WriteExample Maven / Gradle / Ivy
package com.aliyun.datahub.client.example.examples;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;
import org.apache.commons.codec.Charsets;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
//写数据示例
public class WriteExample {
public static void main(String[] args) {
Writer tupleWriter = new Writer();
tupleWriter.writeTupleTopic(3);
// Writer blobWriter = new Writer();
// blobWriter.writeBlobTopic(3);
//
// Writer diffWriter = new Writer();
// diffWriter.writeByDifferentWay();
}
}
class Writer {
private DatahubClient datahubClient;
private RecordSchema recordSchema;
public Writer() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig(Constant.endpoint,
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount(Constant.accessId, Constant.accessKey), false))
.build();
recordSchema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
}
public void writeTupleTopic(int maxRetry) {
// 获取topic中处于active状态的shard
List activeShardIds = new ArrayList<>();
try {
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
for (ShardEntry entry : listShardResult.getShards()) {
if (entry.getState() == ShardState.ACTIVE) {
activeShardIds.add(entry.getShardId());
}
}
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
}
//每个shard分别写入
for (String shardId : activeShardIds) {
// 每个shard生成生成十条数据写入
List recordEntries = new ArrayList<>();
for (int i = 0; i < 10; ++i) {
RecordEntry recordEntry = new RecordEntry();
// 对每条数据设置额外属性
recordEntry.addAttribute("key1", "value1");
TupleRecordData data = new TupleRecordData(recordSchema);
data.setField("bigint_field", i);
data.setField("timestamp_field", 1234);
data.setField("string_field", "hello world");
data.setField("double_field", 3.14);
data.setField("boolean_field", true);
data.setField("decimal_field", new BigDecimal(3.14159265359));
recordEntry.setRecordData(data);
recordEntry.setShardId(shardId);
recordEntries.add(recordEntry);
}
// 数据写入失败时,尝试重新写入,最多尝试次数为maxRetry
int retryNum = 0;
while (retryNum < maxRetry) {
try {
// 服务端从2.12版本开始支持putRecordsByShard,之前版本请使用putRecords接口
// 使用putRecords接口需要用户去验证返回结果是否有写入失败的数据,使用putRecordsByShard接口写入数据失败时直接抛出异常
//datahubClient.putRecordsByShard(Constant.projectName, Constant.topicName, shardId, recordEntries);
PutRecordsResult res = datahubClient.putRecords(Constant.projectName, Constant.topicName, recordEntries);
System.out.printf("failed data num is %d\n", res.getFailedRecordCount());
for (PutErrorEntry entry : res.getPutErrorEntries()) {
System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
}
break;
} catch (InvalidParameterException e) {
// invalid parameter
e.printStackTrace();
throw e;
} catch (AuthorizationFailureException e) {
// AK error
e.printStackTrace();
throw e;
} catch (ResourceNotFoundException e) {
// project or topic not found
e.printStackTrace();
throw e;
} catch (ShardSealedException e) {
// shard status is CLOSED, read only
e.printStackTrace();
throw e;
} catch (LimitExceededException e) {
// limit exceed, retry
e.printStackTrace();
retryNum++;
} catch (DatahubClientException e) {
// other error
e.printStackTrace();
retryNum++;
}
}
}
}
public void writeBlobTopic(int maxRetry) {
// 获取topic中处于active状态的shard
List activeShardIds = new ArrayList<>();
try {
ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);
for (ShardEntry entry : listShardResult.getShards()) {
if (entry.getState() == ShardState.ACTIVE) {
activeShardIds.add(entry.getShardId());
}
}
} catch (DatahubClientException e) {
e.printStackTrace();
throw e;
}
//每个shard分别写入
for (String shardId : activeShardIds) {
// 生成十条数据
List recordEntries = new ArrayList<>();
for (int i = 0; i < 10; ++i) {
RecordEntry recordEntry = new RecordEntry();
// 对每条数据设置额外属性
recordEntry.addAttribute("key2", "value2");
BlobRecordData data = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
recordEntry.setRecordData(data);
recordEntry.setShardId(shardId);
recordEntries.add(recordEntry);
}
// 数据写入失败时,尝试重新写入,最多尝试次数为maxRetry
int retryNum = 0;
while (retryNum < maxRetry) {
try {
// 服务端从2.12版本开始支持putRecordsByShard,之前版本请使用putRecords接口
// 使用putRecords接口需要用户去验证返回结果是否有写入失败的数据,使用putRecordsByShard接口写入数据失败时直接抛出异常
//datahubClient.putRecordsByShard(Constant.projectName, Constant.blobTopicName, shardId, recordEntries);
PutRecordsResult res = datahubClient.putRecords(Constant.projectName, Constant.blobTopicName, recordEntries);
System.out.printf("failed data num is %d\n", res.getFailedRecordCount());
for (PutErrorEntry entry : res.getPutErrorEntries()) {
System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
}
break;
} catch (InvalidParameterException e) {
// invalid parameter
e.printStackTrace();
throw e;
} catch (AuthorizationFailureException e) {
// AK error
e.printStackTrace();
throw e;
} catch (ResourceNotFoundException e) {
// project or topic not found
e.printStackTrace();
throw e;
} catch (ShardSealedException e) {
// shard status is CLOSED, read only
e.printStackTrace();
throw e;
} catch (LimitExceededException e) {
// limit exceed, retry
e.printStackTrace();
retryNum++;
} catch (DatahubClientException e) {
// other error
e.printStackTrace();
retryNum++;
}
}
}
}
// datahub写数据除了直接指定shardId之外,还有很多其他方法。
// 这里以blog型数据为例给出每种写入方式的示例,tuple型数据相同。
public void writeByDifferentWay() {
// 生成十条数据
List recordEntries = new ArrayList<>();
/** 按shardId写入 **/
String shardId = "0";
RecordEntry entry1 = new RecordEntry();
BlobRecordData data1 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
entry1.setRecordData(data1);
entry1.setShardId(shardId);
// recordEntries.add(entry1);
/** 按hashKey写入 **/
RecordEntry entry2 = new RecordEntry();
BlobRecordData data2 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
entry2.setRecordData(data2);
//hashKey是一个128 bit的MD5值,系统会根据Shard的BeginHashKey与EndHashKey决定数据写入的Shard
String hashKey = "7FFFFFFFFFFFFFFD7FFFFFFFFFFFFFFD";
entry2.setHashKey(hashKey);
// recordEntries.add(entry1);
/** PartitionKey写入 **/
RecordEntry entry3 = new RecordEntry();
BlobRecordData data3 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
entry3.setRecordData(data3);
//partitionKey是一个字符串,系统根据该String的MD5值以及Shard的BeginHashKey与EndHashKey决定写入的Shard
String partitionKey = "testPartitionKey";
entry3.setPartitionKey(partitionKey);
// recordEntries.add(entry1);
/** 随机写入,如果以上三种都没有指定那么写入数据时,系统会随机选择一个active状态的shard写入 **/
RecordEntry entry4 = new RecordEntry();
BlobRecordData data4 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
entry4.setRecordData(data4);
recordEntries.add(entry4);
try {
// 如果使用putRecordsByShard写入数据,那么会忽略掉其他写入方式,按putRecordsByShard的参数shardId写入
// 使用不同方式的写入时,优先级为 shardId > hashKey > partitionKey
PutRecordsResult result = datahubClient.putRecords(Constant.projectName, Constant.blobTopicName, recordEntries);
System.out.printf("failed data num is %d\n", result.getFailedRecordCount());
for (PutErrorEntry entry : result.getPutErrorEntries()) {
System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
}
} catch (DatahubClientException e) {
// 正常情况下,需要加入重试逻辑,这里主要介绍每种写入方法
e.printStackTrace();
throw e;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy