All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.example.DatahubExample Maven / Gradle / Ivy

There is a newer version: 2.25.6
Show newest version
package com.aliyun.datahub.example;

import com.aliyun.datahub.DatahubClient;
import com.aliyun.datahub.DatahubConfiguration;
import com.aliyun.datahub.auth.AliyunAccount;
import com.aliyun.datahub.common.data.Field;
import com.aliyun.datahub.common.data.FieldType;
import com.aliyun.datahub.common.data.RecordSchema;
import com.aliyun.datahub.common.data.RecordType;
import com.aliyun.datahub.exception.DatahubClientException;
import com.aliyun.datahub.exception.InvalidCursorException;
import com.aliyun.datahub.model.*;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class DatahubExample {
    private String accessId = "";
    private String accessKey = "=";
    private String endpoint = "http://";
    private String projectName = "test_project";
    private String topicName = "topic_test_example";
    private RecordSchema schema = null;
    private DatahubConfiguration conf;
    private DatahubClient client;

    public DatahubExample() {
        this.conf = new DatahubConfiguration(new AliyunAccount(accessId, accessKey), endpoint);
        this.client = new DatahubClient(conf);
    }

    public void init() {
        schema = new RecordSchema();
        schema.addField(new Field("f1", FieldType.STRING));
        client.createTopic(projectName, topicName, 3, 3, RecordType.TUPLE, schema, "topic");
        GetTopicResult topic = client.getTopic(projectName, topicName);
        schema = topic.getRecordSchema();
    }

    public void putRecords() {
        ListShardResult shards = client.listShard(projectName, topicName);

        List recordEntries = new ArrayList();

        int recordNum = 10;

        for (int n = 0; n < recordNum; n++) {
            //RecordData
            RecordEntry entry = new RecordEntry(schema);

            for (int i = 0; i < entry.getFieldCount(); i++) {
                entry.setString(i, "test");
            }

            //RecordShardId
            String shardId = shards.getShards().get(0).getShardId();

            entry.setShardId(shardId);

            recordEntries.add(entry);
        }
        PutRecordsResult result = client.putRecords(projectName, topicName, recordEntries);

        //fail handle


        //handle failed records

    }

    public void getRecords() {
        ListShardResult shards = client.listShard(projectName, topicName);
        String shardId = shards.getShards().get(0).getShardId();

        GetCursorResult cursorRs = client.getCursor(projectName, topicName, shardId, System.currentTimeMillis() - 24 * 3600 * 1000 /* ms */);
        String cursor = cursorRs.getCursor();

        int limit = 10;
        while (true) {
            try {
                GetRecordsResult recordRs = client.getRecords(projectName, topicName, shardId, cursor, limit, schema);

                List recordEntries = recordRs.getRecords();

                if (cursor.equals(recordRs.getNextCursor())) {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                }
                cursor = recordRs.getNextCursor();
            } catch (InvalidCursorException ex) {
                cursorRs = client.getCursor(projectName, topicName, shardId, GetCursorRequest.CursorType.OLDEST);
                cursor = cursorRs.getCursor();
            }
        }
    }
    public void createOdpsDataConnector () {
        // Create SinkOdps DataConnector
        // ODPS相关配置设置
        String odpsProject = "datahub_test";
        String odpsTable = "test_table";
        String odpsEndpoint = "http://test-endpoint";
        String tunnelEndpoint = "http://test-tunnel-endpoint";
        OdpsDesc odpsDesc = new OdpsDesc();
        odpsDesc.setProject(odpsProject);
        odpsDesc.setTable(odpsTable);
        odpsDesc.setOdpsEndpoint(odpsEndpoint);
        odpsDesc.setTunnelEndpoint(tunnelEndpoint);
        odpsDesc.setAccessId(accessId);
        odpsDesc.setAccessKey(accessKey);
        odpsDesc.setPartitionMode(OdpsDesc.PartitionMode.USER_DEFINE);

        // 顺序选中topic中部分列或全部列 同步到odps,未选中的列将不会同步
        List columnFields = new ArrayList();
        columnFields.add("f1");
        // 默认是使用UserDefine 的分区模式,具体参见文档[https://help.aliyun.com/document_detail/47453.html?spm=5176.product53345.6.555.MpixiB]
        // 如果需要使用SYSTEM_TIME或EVENT_TIME模式,需要如下设置
        // begin
        int timeRange = 15;  // 分钟,分区时间间隔,最小15分钟
        odpsDesc.setPartitionMode(OdpsDesc.PartitionMode.SYSTEM_TIME);
        odpsDesc.setTimeRange(timeRange);
        Map partitionConfig = new HashMap();
        //目前仅支持 %Y%m%d%H%M 的组合,任意多级分区
        partitionConfig.put("pt", "%Y%m%d");
        partitionConfig.put("ct", "%H%M");
        odpsDesc.setPartitionConfig(partitionConfig);
        // end

        client.createDataConnector(projectName, topicName, ConnectorType.SINK_ODPS, columnFields, odpsDesc);

        // 特殊需求下可以间歇性 如每15分钟获取Connector状态查看是否有异常,遍历所有shard
        String shard = "0";
        GetDataConnectorShardStatusResult getDataConnectorShardStatusResult =
            client.getDataConnectorShardStatus(projectName, topicName, ConnectorType.SINK_ODPS, shard);
        System.out.println(getDataConnectorShardStatusResult.getCurSequence());
        System.out.println(getDataConnectorShardStatusResult.getLastErrorMessage());
    }

    public void createADSDataConnector () {
        // Create SinkADS/SinkMysql DataConnector
        // ODPS相关配置设置
        String dbHost = "127.0.0.1";
        int dbPort = 3306;
        String dbName = "db";
        String user = "123";
        String password = "123";
        String tableName = "table";
        DatabaseDesc desc = new DatabaseDesc();
        desc.setHost(dbHost);
        desc.setPort(dbPort);
        desc.setDatabase(dbName);
        desc.setUser(user);
        desc.setPassword(password);
        desc.setTable(tableName);
        // batchCommit大小,单位KB
        desc.setMaxCommitSize(512L);
        // 是否忽略错误 采用insert ignore
        desc.setIgnore(true);
        // 顺序选中topic中部分列或全部列 同步到ads,未选中的列将不会同步,选中列必须存在于ADS、Mysql中
        List columnFields = new ArrayList();
        columnFields.add("f1");

        client.createDataConnector(projectName, topicName, ConnectorType.SINK_ADS, columnFields, desc);
        // 或 client.createDataConnector(projectName, topicName, ConnectorType.SINK_MYSQL, columnFields, odpsDesc);
        // 特殊需求下可以间歇性 如每15分钟获取Connector状态查看是否有异常,遍历所有shard
        String shard = "0";
        GetDataConnectorShardStatusResult getDataConnectorShardStatusResult =
            client.getDataConnectorShardStatus(projectName, topicName, ConnectorType.SINK_ADS, shard);
        System.out.println(getDataConnectorShardStatusResult.getCurSequence());
        System.out.println(getDataConnectorShardStatusResult.getLastErrorMessage());
    }

    public void createESDataConnector () {
        // Create SinkES DataConnector
        // ODPS相关配置设置
        String dbHost = "127.0.0.1";
        String index = "index";
        String user = "123";
        String password = "123";
        String tableName = "table";
        List ids = new ArrayList();
        ids.add("f1");
        List types = new ArrayList();
        types.add("f1");
        ElasticSearchDesc desc = new ElasticSearchDesc();
        desc.setEndpoint(dbHost);
        desc.setIndex(index);
        desc.setUser(user);
        desc.setPassword(password);
        // es的id和type将会根据datahub中个别字段的值决定,不同值的数据会写入es的不同type中
        desc.setIdFields(ids);
        desc.setTypeFields(types);
        // batchCommit大小,单位KB
        desc.setMaxCommitSize(512L);
        // 是否使用代理模式,若未true将不会扫描es所有node,直接通过代理写入,vpc es必须使用该选项
        desc.setProxyMode(true);
        // 顺序选中topic中部分列或全部列 同步到ads,未选中的列将不会同步,选中列必须存在于ADS、Mysql中
        List columnFields = new ArrayList();
        columnFields.add("f1");

        client.createDataConnector(projectName, topicName, ConnectorType.SINK_ES, columnFields, desc);
        // 特殊需求下可以间歇性 如每15分钟获取Connector状态查看是否有异常,遍历所有shard
        String shard = "0";
        GetDataConnectorShardStatusResult getDataConnectorShardStatusResult =
            client.getDataConnectorShardStatus(projectName, topicName, ConnectorType.SINK_ADS, shard);
        System.out.println(getDataConnectorShardStatusResult.getCurSequence());
        System.out.println(getDataConnectorShardStatusResult.getLastErrorMessage());
    }

    public void appendField() {
        client.appendField(new AppendFieldRequest(projectName, topicName, new Field("fieldName", FieldType.STRING)));
        client.appendDataConnectorField(new AppendDataConnectorFieldRequest(projectName, topicName, ConnectorType.SINK_ODPS, "fieldName"));
    }

    public static void main(String[] args) {
        DatahubExample example = new DatahubExample();
        try {
            example.init();
            example.putRecords();
            example.getRecords();
            example.createADSDataConnector();
        } catch (DatahubClientException e) {
            e.printStackTrace();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy