All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.example.examples.ReadExample Maven / Gradle / Ivy

There is a newer version: 2.25.6
Show newest version
package com.aliyun.datahub.client.example.examples;

import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class ReadExample {
    public static void main(String[] args) {
        ReaderTuple rt = new ReaderTuple();
        rt.getRecords();

//        ReaderBlob rb = new ReaderBlob();
//        rb.getRecords();
    }
}


class ReaderTuple {
    private DatahubClient datahubClient;
    private RecordSchema schema;
    private Map mThread;

    public ReaderTuple() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig(Constant.endpoint,
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount(Constant.accessId, Constant.accessKey), true))
                .build();
        mThread = new HashMap<>();
    }

    // shard列表可能发生更新,更新thread列表
    public void freshThread() {
        try {
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
            List list = new ArrayList<>();
            for (ShardEntry entry : listShardResult.getShards()) {
                // 对新生成的shard,每个shard开一个thread去进行数据读取
                if (!mThread.containsKey(entry.getShardId())) {
                    Thread thread = new ReadThread(3, entry.getShardId(), schema);
                    mThread.put(entry.getShardId(), thread);
                    list.add(thread);
                    thread.start();
                }
            }
            for (Thread thread : list) {
                thread.join();
            }
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    public void getRecords() {
        try {
            schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);

            // 每个shard一个线程进行数据读取
            for (ShardEntry entry : listShardResult.getShards()) {
                Thread thread = new ReadThread(3, entry.getShardId(), schema);
                mThread.put(entry.getShardId(), thread);
                thread.start();
            }
            for (Thread thread : mThread.values()) {
                thread.join();
            }
        } catch (DatahubClientException e) {
            // other error
            e.printStackTrace();
            throw e;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    class ReadThread extends Thread {
        private int maxRetry;
        private int recordLimit = 1000;
        private int noDataSleepTimeMs = 5000;
        private String shardId;
        private String cursor;
        private RecordSchema schema;

        private void init() {
            try {
                // 这里获取读取有效数据中的第一条的cursor,也可以选择其他方式
                cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
            } catch (DatahubClientException e) {
                e.printStackTrace();
                throw e;
            }
        }

        private void handleRecords(List records) {
            // 消费数据
            for (RecordEntry re : records) {
                TupleRecordData data = (TupleRecordData) re.getRecordData();
                String res = shardId + "\t";
                for (Field field : schema.getFields()) {
                    res += field.getName() + ":" + data.getField(field.getName()) + "\t";
                }
                System.out.println(res);
            }
        }

        public ReadThread(int maxRetry, String shardId, RecordSchema schema) {
            this.shardId = shardId;
            this.maxRetry = maxRetry;
            this.schema = schema;
            init();
        }

        @Override
        public void run() {
            int retryNum = 0;
            while (retryNum < maxRetry) {
                try {
                    GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.topicName, shardId, schema, cursor, recordLimit);
                    if (result.getRecordCount() <= 0) {
                        // 无数据,sleep后读取
                        System.out.printf("%s no data, sleep %d seconds\n", shardId, noDataSleepTimeMs);
                        Thread.sleep(noDataSleepTimeMs);
                        continue;
                    }

                    // 消费数据
                    handleRecords(result.getRecords());

                    // 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
                    cursor = result.getNextCursor();
                    retryNum = 0;
                } catch (InvalidParameterException e) {
                    // invalid parameter, please check your parameter
                    e.printStackTrace();
                    throw e;
                } catch (AuthorizationFailureException e) {
                    // AK error, please check your accessId and accessKey
                    e.printStackTrace();
                    throw e;
                } catch (ResourceNotFoundException e) {
                    // project or topic not found
                    e.printStackTrace();
                    throw e;
                } catch (SeekOutOfRangeException e) {
                    // offset invalid
                    e.printStackTrace();
                    throw e;
                } catch (ShardSealedException e) {
                    // throw ShardSealedException when shard status is CLOSED and all data has been read
                    System.out.printf("shard %s all data has been read\n", shardId);

                    // 捕获到ShardSealedException异常,一般情况是,是发生了shard分裂或者合并的情况,所以要更新读数据的线程列表
                    freshThread();
                    break;
                } catch (LimitExceededException e) {
                    // limit exceed, retry
                    e.printStackTrace();
                    retryNum++;

                } catch (DatahubClientException e) {
                    // other error, retry
                    e.printStackTrace();
                    retryNum++;
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
            }
        }
    }
}


class ReaderBlob {
    private DatahubClient datahubClient;
    private Map mThread;

    public ReaderBlob() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig(Constant.endpoint,
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount(Constant.accessId, Constant.accessKey), true))
                .build();

        mThread = new HashMap<>();
    }

    // shard列表可能发生更新,更新thread列表
    public void freshThread() {
        try {
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);
            List list = new ArrayList<>();
            for (ShardEntry entry : listShardResult.getShards()) {
                // 对新生成的shard,每个shard开一个thread去进行数据读取
                if (!mThread.containsKey(entry.getShardId())) {
                    Thread thread = new ReadThread(3, entry.getShardId());
                    mThread.put(entry.getShardId(), thread);
                    list.add(thread);
                    thread.start();
                }
            }
            for (Thread thread : list) {
                thread.join();
            }
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    public void getRecords() {
        try {
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.blobTopicName);

            // 每个shard一个线程进行数据读取
            List threadList = new ArrayList<>();
            for (ShardEntry entry : listShardResult.getShards()) {
                Thread thread = new ReadThread(3, entry.getShardId());
                threadList.add(thread);
                thread.start();
            }
            for (Thread thread : threadList) {
                thread.join();
            }
        } catch (DatahubClientException e) {
            // other error
            e.printStackTrace();
            throw e;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    class ReadThread extends Thread {
        private int maxRetry;
        private int recordLimit = 1000;
        private int noDataSleepTimeMs = 5000;
        private String shardId;
        private String cursor;

        private void init() {
            try {
                // 这里获取读取有效数据中的第一条的cursor,也可以选择其他方式
                cursor = datahubClient.getCursor(Constant.projectName, Constant.blobTopicName, shardId, CursorType.OLDEST).getCursor();
            } catch (DatahubClientException e) {
                e.printStackTrace();
                throw e;
            }
        }

        private void handleRecords(List records) {
            // 消费数据
            for (RecordEntry re : records) {
                BlobRecordData data = (BlobRecordData) re.getRecordData();
                String sData = new String(data.getData());
                String res = shardId + "\t" + sData;
                System.out.println(res);
            }
        }

        public ReadThread(int maxRetry, String shardId) {
            this.shardId = shardId;
            this.maxRetry = maxRetry;
            init();
        }

        @Override
        public void run() {
            int retryNum = 0;
            while (retryNum < maxRetry) {
                try {
                    GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.blobTopicName, shardId, cursor, recordLimit);
                    if (result.getRecordCount() <= 0) {
                        // 无数据,sleep后读取
                        System.out.printf("%s no data, sleep %d seconds\n", shardId, noDataSleepTimeMs);
                        Thread.sleep(noDataSleepTimeMs);
                        continue;
                    }

                    // 消费数据
                    handleRecords(result.getRecords());

                    // 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
                    cursor = result.getNextCursor();
                    retryNum = 0;
                } catch (InvalidParameterException e) {
                    // invalid parameter, please check your parameter
                    e.printStackTrace();
                    throw e;
                } catch (AuthorizationFailureException e) {
                    // AK error, please check your accessId and accessKey
                    e.printStackTrace();
                    throw e;
                } catch (ResourceNotFoundException e) {
                    // project or topic not found
                    e.printStackTrace();
                    throw e;
                } catch (SeekOutOfRangeException e) {
                    // offset invalid
                    e.printStackTrace();
                    throw e;
                } catch (ShardSealedException e) {
                    // throw ShardSealedException when shard status is CLOSED and all data has been read
                    System.out.printf("shard %s all data has been read\n", shardId);

                    // 捕获到ShardSealedException异常,一般情况是,是发生了shard分裂或者合并的情况,所以要更新读数据的线程列表
                    freshThread();
                    break;
                } catch (LimitExceededException e) {
                    // limit exceed, retry
                    e.printStackTrace();
                    retryNum++;

                } catch (DatahubClientException e) {
                    // other error, retry
                    e.printStackTrace();
                    retryNum++;
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy