All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.example.examples.WriteExample Maven / Gradle / Ivy

There is a newer version: 2.25.6
Show newest version
package com.aliyun.datahub.client.example.examples;

import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;
import org.apache.commons.codec.Charsets;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;

//写数据示例
public class WriteExample {

    public static void main(String[] args) {
        Writer tupleWriter = new Writer();
        tupleWriter.writeTupleTopic(3);

//        Writer blobWriter = new Writer();
//        blobWriter.writeBlobTopic(3);
//
//        Writer diffWriter = new Writer();
//        diffWriter.writeByDifferentWay();
    }

}

class Writer {
    private DatahubClient datahubClient;
    private RecordSchema recordSchema;

    public Writer() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig(Constant.endpoint,
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount(Constant.accessId, Constant.accessKey), false))
                .build();
        recordSchema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
    }

    public void writeTupleTopic(int maxRetry) {
        // 获取topic中处于active状态的shard
        List activeShardIds = new ArrayList<>();
        try {
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
            for (ShardEntry entry : listShardResult.getShards()) {
                if (entry.getState() == ShardState.ACTIVE) {
                    activeShardIds.add(entry.getShardId());
                }
            }
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        }

        //每个shard分别写入
        for (String shardId : activeShardIds) {
            // 每个shard生成生成十条数据写入
            List recordEntries = new ArrayList<>();
            for (int i = 0; i < 10; ++i) {
                RecordEntry recordEntry = new RecordEntry();
                // 对每条数据设置额外属性
                recordEntry.addAttribute("key1", "value1");

                TupleRecordData data = new TupleRecordData(recordSchema);
                data.setField("bigint_field", i);
                data.setField("timestamp_field", 1234);
                data.setField("string_field", "hello world");
                data.setField("double_field", 3.14);
                data.setField("boolean_field", true);
                data.setField("decimal_field", new BigDecimal(3.14159265359));
                recordEntry.setRecordData(data);
                recordEntry.setShardId(shardId);
                recordEntries.add(recordEntry);
            }

            // 数据写入失败时,尝试重新写入,最多尝试次数为maxRetry
            int retryNum = 0;
            while (retryNum < maxRetry) {
                try {
                    // 服务端从2.12版本开始支持putRecordsByShard,之前版本请使用putRecords接口
                    // 使用putRecords接口需要用户去验证返回结果是否有写入失败的数据,使用putRecordsByShard接口写入数据失败时直接抛出异常
                    //datahubClient.putRecordsByShard(Constant.projectName, Constant.topicName, shardId, recordEntries);

                    PutRecordsResult res = datahubClient.putRecords(Constant.projectName, Constant.topicName, recordEntries);
                    System.out.printf("failed data num is %d\n", res.getFailedRecordCount());
                    for (PutErrorEntry entry : res.getPutErrorEntries()) {
                        System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
                    }
                    break;
                } catch (InvalidParameterException e) {
                    // invalid parameter
                    e.printStackTrace();
                    throw e;
                } catch (AuthorizationFailureException e) {
                    // AK error
                    e.printStackTrace();
                    throw e;
                } catch (ResourceNotFoundException e) {
                    // project or topic not found
                    e.printStackTrace();
                    throw e;
                } catch (ShardSealedException e) {
                    // shard status is CLOSED, read only
                    e.printStackTrace();
                    throw e;
                } catch (LimitExceededException e) {
                    // limit exceed, retry
                    e.printStackTrace();
                    retryNum++;
                } catch (DatahubClientException e) {
                    // other error
                    e.printStackTrace();
                    retryNum++;
                }
            }
        }
    }


    public void writeBlobTopic(int maxRetry) {
        // 获取topic中处于active状态的shard
        List activeShardIds = new ArrayList<>();
        try {
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);
            for (ShardEntry entry : listShardResult.getShards()) {
                if (entry.getState() == ShardState.ACTIVE) {
                    activeShardIds.add(entry.getShardId());
                }
            }
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        }

        //每个shard分别写入
        for (String shardId : activeShardIds) {
            // 生成十条数据
            List recordEntries = new ArrayList<>();
            for (int i = 0; i < 10; ++i) {
                RecordEntry recordEntry = new RecordEntry();

                // 对每条数据设置额外属性
                recordEntry.addAttribute("key2", "value2");

                BlobRecordData data = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
                recordEntry.setRecordData(data);
                recordEntry.setShardId(shardId);
                recordEntries.add(recordEntry);
            }

            // 数据写入失败时,尝试重新写入,最多尝试次数为maxRetry
            int retryNum = 0;
            while (retryNum < maxRetry) {
                try {
                    // 服务端从2.12版本开始支持putRecordsByShard,之前版本请使用putRecords接口
                    // 使用putRecords接口需要用户去验证返回结果是否有写入失败的数据,使用putRecordsByShard接口写入数据失败时直接抛出异常
                    //datahubClient.putRecordsByShard(Constant.projectName, Constant.blobTopicName, shardId, recordEntries);

                    PutRecordsResult res = datahubClient.putRecords(Constant.projectName, Constant.blobTopicName, recordEntries);
                    System.out.printf("failed data num is %d\n", res.getFailedRecordCount());
                    for (PutErrorEntry entry : res.getPutErrorEntries()) {
                        System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
                    }
                    break;
                } catch (InvalidParameterException e) {
                    // invalid parameter
                    e.printStackTrace();
                    throw e;
                } catch (AuthorizationFailureException e) {
                    // AK error
                    e.printStackTrace();
                    throw e;
                } catch (ResourceNotFoundException e) {
                    // project or topic not found
                    e.printStackTrace();
                    throw e;
                } catch (ShardSealedException e) {
                    // shard status is CLOSED, read only
                    e.printStackTrace();
                    throw e;
                } catch (LimitExceededException e) {
                    // limit exceed, retry
                    e.printStackTrace();
                    retryNum++;
                } catch (DatahubClientException e) {
                    // other error
                    e.printStackTrace();
                    retryNum++;
                }
            }

        }
    }

    // datahub写数据除了直接指定shardId之外,还有很多其他方法。
    // 这里以blog型数据为例给出每种写入方式的示例,tuple型数据相同。
    public void writeByDifferentWay() {
        // 生成十条数据
        List recordEntries = new ArrayList<>();

        /** 按shardId写入 **/
        String shardId = "0";
        RecordEntry entry1 = new RecordEntry();
        BlobRecordData data1 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
        entry1.setRecordData(data1);
        entry1.setShardId(shardId);
//        recordEntries.add(entry1);


        /** 按hashKey写入 **/
        RecordEntry entry2 = new RecordEntry();
        BlobRecordData data2 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
        entry2.setRecordData(data2);
        //hashKey是一个128 bit的MD5值,系统会根据Shard的BeginHashKey与EndHashKey决定数据写入的Shard
        String hashKey = "7FFFFFFFFFFFFFFD7FFFFFFFFFFFFFFD";
        entry2.setHashKey(hashKey);
//        recordEntries.add(entry1);

        /** PartitionKey写入 **/
        RecordEntry entry3 = new RecordEntry();
        BlobRecordData data3 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
        entry3.setRecordData(data3);
        //partitionKey是一个字符串,系统根据该String的MD5值以及Shard的BeginHashKey与EndHashKey决定写入的Shard
        String partitionKey = "testPartitionKey";
        entry3.setPartitionKey(partitionKey);
//        recordEntries.add(entry1);

        /** 随机写入,如果以上三种都没有指定那么写入数据时,系统会随机选择一个active状态的shard写入 **/
        RecordEntry entry4 = new RecordEntry();
        BlobRecordData data4 = new BlobRecordData("123456".getBytes(Charsets.UTF_8));
        entry4.setRecordData(data4);
        recordEntries.add(entry4);

        try {
            // 如果使用putRecordsByShard写入数据,那么会忽略掉其他写入方式,按putRecordsByShard的参数shardId写入
            // 使用不同方式的写入时,优先级为 shardId > hashKey > partitionKey
            PutRecordsResult result = datahubClient.putRecords(Constant.projectName, Constant.blobTopicName, recordEntries);
            System.out.printf("failed data num is %d\n", result.getFailedRecordCount());
            for (PutErrorEntry entry : result.getPutErrorEntries()) {
                System.out.println(entry.getIndex() + "\t" + entry.getErrorcode() + "\t" + entry.getMessage());
            }
        } catch (DatahubClientException e) {
            // 正常情况下,需要加入重试逻辑,这里主要介绍每种写入方法
            e.printStackTrace();
            throw e;
        }

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy