All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.example.examples.OffsetConsumptionExample Maven / Gradle / Ivy

There is a newer version: 2.25.6
Show newest version
package com.aliyun.datahub.client.example.examples;

import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.exception.*;
import com.aliyun.datahub.client.model.*;

import java.util.*;

//点位消费示例,以消费tuple类型为例
public class OffsetConsumptionExample {

    public static void main(String[] args) {
        Consumption cs = new Consumption();
        cs.offset_consumption();
    }
}


class Consumption {
    private DatahubClient datahubClient;
    private RecordSchema schema;
    private String subId;

    public Consumption() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig(Constant.endpoint,
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount(Constant.accessId, Constant.accessKey), true))
                .build();

        try {
            // 获取schema
            schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        }
        subId = "";
    }


    // 点位消费示例,在消费过程中进行点位的提交
    // 消费数据过后如果未提交点位,那么可能会出现重复消费的情况
    public void offset_consumption() {
        try {
            RecordSchema schema = datahubClient.getTopic(Constant.projectName, Constant.topicName).getRecordSchema();
            ListShardResult listShardResult = datahubClient.listShard(Constant.projectName, Constant.topicName);
            List shardIdList = new ArrayList<>();
            for (ShardEntry entry : listShardResult.getShards()) {
                shardIdList.add(entry.getShardId());
            }

            // 一次性初始化所有shard的点位
            OpenSubscriptionSessionResult openSubscriptionSessionResult = datahubClient.openSubscriptionSession(Constant.projectName, Constant.topicName, subId, shardIdList);

            // 每个shard一个线程进行数据读取
            List threadList = new ArrayList<>();
            for (String shardId : shardIdList) {
                Thread thread = new ConsumerThread(3, shardId, subId, schema, openSubscriptionSessionResult.getOffsets().get(shardId), datahubClient);
                threadList.add(thread);
                thread.start();
                System.out.println(shardId);
                break;
            }

            for (Thread thread : threadList) {
                thread.join();
            }
        } catch (DatahubClientException e) {
            e.printStackTrace();
            throw e;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

class ConsumerThread extends Thread {
    private int maxRetry;
    private String shardId;
    private String subId;
    private RecordSchema schema;
    private SubscriptionOffset subscriptionOffset;
    private String cursor;
    private DatahubClient datahubClient;

    private void init() {
        /* 1、获取当前点位的cursor,如果当前点位已过期则获取生命周期内第一条record的cursor,未消费同样获取生命周期内第一条record的cursor */
        cursor = "";
        //sequence < 0说明还未消费
        if (subscriptionOffset.getSequence() < 0) {
            // 获取生命周期内第一条record的cursor
            cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
        } else {
            // 获取下一条记录的Cursor
            long nextSequence = subscriptionOffset.getSequence() + 1;
            try {
                //按照SEQUENCE getCursor可能报SeekOutOfRange错误,表示当前cursor的数据已过期
                cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SEQUENCE, nextSequence).getCursor();
            } catch (SeekOutOfRangeException e) {
                // 获取生命周期内第一条record的cursor
                cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
            } catch (DatahubClientException e) {
                // other error
                e.printStackTrace();
                throw e;
            }
        }
    }

    public ConsumerThread(int maxRetry, String shardId, String subId, RecordSchema schema, SubscriptionOffset subscriptionOffset,
                          DatahubClient datahubClient) {
        this.maxRetry = maxRetry;
        this.shardId = shardId;
        this.subId = subId;
        this.schema = schema;
        this.subscriptionOffset = subscriptionOffset;
        this.datahubClient = datahubClient;
        init();
    }


    @Override
    public void run() {
        long recordCount = 0L;
        // 每次最多读取数据量
        int fetchNum = 1000;
        int retryNum = 0;
        // 消费数据量为commitNum时,更新一次点位
        int commitNum = 2000;
        int sleepTimeMs = 5000;
        while (retryNum < maxRetry) {
            try {
                GetRecordsResult result = datahubClient.getRecords(Constant.projectName, Constant.topicName, shardId, schema, cursor, fetchNum);
                if (result.getRecordCount() <= 0) {
                    // 无数据,sleep后读取
                    System.out.printf("no data, sleep %d second\n", sleepTimeMs / 1000);
                    Thread.sleep(sleepTimeMs);
                    continue;
                }
                for (RecordEntry recordEntry : result.getRecords()) {

                    // 消费数据
                    TupleRecordData data = (TupleRecordData) recordEntry.getRecordData();
                    String res = "string_field:" + data.getField("string_field") + "\t"
                            + "string_field:" + data.getField("string_field");
                    System.out.println(Thread.currentThread() + "\t" + res);

                    recordCount++;

                    // 更新点位
                    if (recordCount % commitNum == 0) {
                        // 设置点位
                        subscriptionOffset.setSequence(recordEntry.getSequence());
                        subscriptionOffset.setTimestamp(recordEntry.getSystemTime());
                        //提交点位点位
                        Map offsetMap = new HashMap<>();
                        offsetMap.put(shardId, subscriptionOffset);
                        datahubClient.commitSubscriptionOffset(Constant.projectName, Constant.topicName, subId, offsetMap);
                        System.out.println(Thread.currentThread() + " commit offset successful");
                    }
                }

                // 如果连续读数据,则不需要重新getCursor,直接可以通过getRecords的结果拿到下一个游标
                cursor = result.getNextCursor();
                retryNum = 0;
            } catch (SubscriptionOfflineException | SubscriptionSessionInvalidException e) {
                // 退出. Offline: 订阅下线; SessionChange: 表示订阅被其他客户端同时消费
                e.printStackTrace();
                throw e;
            } catch (SubscriptionOffsetResetException e) {
                // 点位被重置,需要重新获取SubscriptionOffset版本信息
                List shardIds = Arrays.asList(shardId);
                SubscriptionOffset offset = datahubClient.getSubscriptionOffset(Constant.projectName, Constant.topicName, subId, shardIds).getOffsets().get(shardId);
                subscriptionOffset.setVersionId(offset.getVersionId());

                // 点位被重置之后,需要重新获取点位,获取点位的方法应该与重置点位时一致,
                // 如果重置点位时,同时设置了sequence和timestamp,那么既可以用SEQUENCE获取,也可以用SYSTEM_TIME获取
                // 如果重置点位时,只设置了sequence,那么只能用sequence获取,
                // 如果重置点位时,只设置了timestamp,那么只能用SYSTEM_TIME获取点位
                // 一般情况下,优先使用SEQUENCE,其次是SYSTEM_TIME,如果都失败,则采用OLDEST获取
                cursor = null;
                CursorType type = CursorType.SEQUENCE;
                while (cursor == null) {
                    if (type == CursorType.SEQUENCE) {
                        try {
                            long nextSequence = offset.getSequence() + 1;
                            cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SEQUENCE, nextSequence).getCursor();
                        } catch (DatahubClientException exception) {
                            type = CursorType.SYSTEM_TIME;
                        }
                    } else if (type == CursorType.SYSTEM_TIME) {
                        try {
                            cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.SYSTEM_TIME, offset.getTimestamp()).getCursor();
                        } catch (DatahubClientException exception) {
                            type = CursorType.OLDEST;
                        }
                    } else {
                        try {
                            cursor = datahubClient.getCursor(Constant.projectName, Constant.topicName, shardId, CursorType.OLDEST).getCursor();
                        } catch (DatahubClientException exception) {
                            System.exit(1);
                        }
                    }
                }

            } catch (LimitExceededException e) {
                // limit exceed, retry
                e.printStackTrace();
                retryNum++;
            } catch (DatahubClientException e) {
                // other error, retry
                e.printStackTrace();
                retryNum++;
            } catch (Exception e) {
                e.printStackTrace();
                System.exit(-1);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy